lumibot 4.0.22__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lumibot might be problematic. Click here for more details.

Files changed (164) hide show
  1. lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
  2. lumibot/__pycache__/constants.cpython-312.pyc +0 -0
  3. lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
  4. lumibot/backtesting/__init__.py +6 -5
  5. lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
  6. lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
  7. lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
  8. lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
  9. lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
  10. lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
  11. lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
  12. lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
  13. lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
  14. lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
  15. lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
  16. lumibot/backtesting/backtesting_broker.py +209 -9
  17. lumibot/backtesting/databento_backtesting.py +141 -24
  18. lumibot/backtesting/thetadata_backtesting.py +63 -42
  19. lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
  20. lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
  21. lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
  22. lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
  23. lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
  24. lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
  25. lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
  26. lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
  27. lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
  28. lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
  29. lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
  30. lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
  31. lumibot/brokers/alpaca.py +11 -1
  32. lumibot/brokers/tradeovate.py +475 -0
  33. lumibot/components/grok_news_helper.py +284 -0
  34. lumibot/components/options_helper.py +90 -34
  35. lumibot/credentials.py +3 -0
  36. lumibot/data_sources/__init__.py +2 -1
  37. lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
  38. lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
  39. lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
  40. lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
  41. lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
  42. lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
  43. lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
  44. lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
  45. lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
  46. lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
  47. lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
  48. lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
  49. lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
  50. lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
  51. lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
  52. lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
  53. lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
  54. lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
  55. lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
  56. lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
  57. lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
  58. lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
  59. lumibot/data_sources/data_source_backtesting.py +3 -5
  60. lumibot/data_sources/databento_data.py +5 -5
  61. lumibot/data_sources/databento_data_polars_backtesting.py +636 -0
  62. lumibot/data_sources/databento_data_polars_live.py +793 -0
  63. lumibot/data_sources/pandas_data.py +6 -3
  64. lumibot/data_sources/polars_mixin.py +126 -21
  65. lumibot/data_sources/tradeovate_data.py +80 -0
  66. lumibot/data_sources/tradier_data.py +2 -1
  67. lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
  68. lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
  69. lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
  70. lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
  71. lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
  72. lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
  73. lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
  74. lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
  75. lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
  76. lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
  77. lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
  78. lumibot/entities/asset.py +8 -0
  79. lumibot/entities/order.py +1 -1
  80. lumibot/entities/quote.py +14 -0
  81. lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  82. lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
  83. lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  84. lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
  85. lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
  86. lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
  87. lumibot/strategies/_strategy.py +95 -27
  88. lumibot/strategies/strategy.py +5 -6
  89. lumibot/strategies/strategy_executor.py +2 -2
  90. lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  91. lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
  92. lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
  93. lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
  94. lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
  95. lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
  96. lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
  97. lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
  98. lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
  99. lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
  100. lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
  101. lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
  102. lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
  103. lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
  104. lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
  105. lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
  106. lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
  107. lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
  108. lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
  109. lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
  110. lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
  111. lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
  112. lumibot/tools/databento_helper.py +384 -133
  113. lumibot/tools/databento_helper_polars.py +218 -156
  114. lumibot/tools/databento_roll.py +216 -0
  115. lumibot/tools/lumibot_logger.py +32 -17
  116. lumibot/tools/polygon_helper.py +65 -0
  117. lumibot/tools/thetadata_helper.py +588 -70
  118. lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
  119. lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
  120. lumibot/traders/trader.py +1 -1
  121. lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
  122. lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
  123. lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
  124. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/METADATA +1 -2
  125. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/RECORD +164 -46
  126. tests/backtest/check_timing_offset.py +198 -0
  127. tests/backtest/check_volume_spike.py +112 -0
  128. tests/backtest/comprehensive_comparison.py +166 -0
  129. tests/backtest/debug_comparison.py +91 -0
  130. tests/backtest/diagnose_price_difference.py +97 -0
  131. tests/backtest/direct_api_comparison.py +203 -0
  132. tests/backtest/profile_thetadata_vs_polygon.py +255 -0
  133. tests/backtest/root_cause_analysis.py +109 -0
  134. tests/backtest/test_accuracy_verification.py +244 -0
  135. tests/backtest/test_daily_data_timestamp_comparison.py +801 -0
  136. tests/backtest/test_databento.py +57 -0
  137. tests/backtest/test_databento_comprehensive_trading.py +564 -0
  138. tests/backtest/test_debug_avg_fill_price.py +112 -0
  139. tests/backtest/test_dividends.py +8 -3
  140. tests/backtest/test_example_strategies.py +54 -47
  141. tests/backtest/test_futures_edge_cases.py +451 -0
  142. tests/backtest/test_futures_single_trade.py +270 -0
  143. tests/backtest/test_futures_ultra_simple.py +191 -0
  144. tests/backtest/test_index_data_verification.py +348 -0
  145. tests/backtest/test_polygon.py +45 -24
  146. tests/backtest/test_thetadata.py +246 -60
  147. tests/backtest/test_thetadata_comprehensive.py +729 -0
  148. tests/backtest/test_thetadata_vs_polygon.py +557 -0
  149. tests/backtest/test_yahoo.py +1 -2
  150. tests/conftest.py +20 -0
  151. tests/test_backtesting_data_source_env.py +249 -0
  152. tests/test_backtesting_quiet_logs_complete.py +10 -11
  153. tests/test_databento_helper.py +73 -86
  154. tests/test_databento_live.py +10 -10
  155. tests/test_databento_timezone_fixes.py +21 -4
  156. tests/test_get_historical_prices.py +6 -6
  157. tests/test_options_helper.py +162 -40
  158. tests/test_polygon_helper.py +21 -13
  159. tests/test_quiet_logs_requirements.py +5 -5
  160. tests/test_thetadata_helper.py +487 -171
  161. tests/test_yahoo_data.py +125 -0
  162. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/LICENSE +0 -0
  163. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/WHEEL +0 -0
  164. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,255 @@
1
+ """
2
+ Performance profiling comparison between ThetaData and Polygon.
3
+
4
+ This script uses YAPPI (thread-safe profiler) to identify bottlenecks in both data sources.
5
+
6
+ Usage:
7
+ python profile_thetadata_vs_polygon.py
8
+
9
+ Requirements:
10
+ pip install yappi snakeviz
11
+
12
+ To visualize results:
13
+ snakeviz thetadata_nocache.prof
14
+ snakeviz thetadata_cached.prof
15
+ snakeviz polygon_nocache.prof
16
+ snakeviz polygon_cached.prof
17
+ """
18
+
19
+ import datetime
20
+ import os
21
+ import shutil
22
+ from pathlib import Path
23
+ import yappi
24
+ from dotenv import load_dotenv
25
+ from lumibot.strategies import Strategy
26
+ from lumibot.backtesting import ThetaDataBacktesting, PolygonDataBacktesting
27
+ from lumibot.entities import Asset
28
+
29
+ # Load environment variables from .env file
30
+ load_dotenv()
31
+
32
+
33
+ class SimpleBacktestStrategy(Strategy):
34
+ """Simple buy-and-hold strategy for profiling"""
35
+
36
+ parameters = {
37
+ "symbol": "AMZN",
38
+ "quantity": 10
39
+ }
40
+
41
+ def initialize(self):
42
+ self.sleeptime = "1D"
43
+
44
+ def on_trading_iteration(self):
45
+ if self.first_iteration:
46
+ asset = Asset(self.parameters["symbol"])
47
+ order = self.create_order(asset, self.parameters["quantity"], "buy")
48
+ self.submit_order(order)
49
+
50
+
51
+ def get_cache_dir():
52
+ """Get the lumibot cache directory"""
53
+ cache_dir = Path.home() / ".lumibot"
54
+ return cache_dir
55
+
56
+
57
+ def clear_cache():
58
+ """Clear all cached data"""
59
+ cache_dir = get_cache_dir()
60
+ if cache_dir.exists():
61
+ print(f"Clearing cache at {cache_dir}")
62
+ shutil.rmtree(cache_dir)
63
+ print("Cache cleared")
64
+ else:
65
+ print("No cache to clear")
66
+
67
+
68
+ def profile_backtest(data_source_class, name, profile_file, clear_cache_first=True):
69
+ """
70
+ Profile a backtest run.
71
+
72
+ Args:
73
+ data_source_class: ThetaDataBacktesting or PolygonDataBacktesting
74
+ name: Name for logging
75
+ profile_file: Output file for profiling results
76
+ clear_cache_first: Whether to clear cache before running
77
+ """
78
+ if clear_cache_first:
79
+ clear_cache()
80
+
81
+ print(f"\n{'='*80}")
82
+ print(f"PROFILING: {name}")
83
+ print(f"Cache: {'CLEARED' if clear_cache_first else 'WARMED'}")
84
+ print(f"{'='*80}\n")
85
+
86
+ # Configure data source
87
+ start = datetime.datetime(2024, 8, 1)
88
+ end = datetime.datetime(2024, 8, 2)
89
+
90
+ # Get credentials
91
+ thetadata_username = os.environ.get("THETADATA_USERNAME")
92
+ thetadata_password = os.environ.get("THETADATA_PASSWORD")
93
+ polygon_api_key = os.environ.get("POLYGON_API_KEY")
94
+
95
+ # Start profiling
96
+ yappi.clear_stats()
97
+ yappi.set_clock_type("wall") # Use wall clock time
98
+ yappi.start()
99
+
100
+ # Run backtest
101
+ start_time = datetime.datetime.now()
102
+
103
+ try:
104
+ results, strategy = SimpleBacktestStrategy.run_backtest(
105
+ data_source_class,
106
+ start,
107
+ end,
108
+ show_plot=False,
109
+ show_tearsheet=False,
110
+ save_tearsheet=False,
111
+ parameters={"symbol": "AMZN", "quantity": 10},
112
+ thetadata_username=thetadata_username,
113
+ thetadata_password=thetadata_password,
114
+ polygon_api_key=polygon_api_key,
115
+ )
116
+
117
+ end_time = datetime.datetime.now()
118
+ elapsed = (end_time - start_time).total_seconds()
119
+
120
+ print(f"✓ Backtest completed in {elapsed:.2f} seconds")
121
+ print(f" Orders: {len(strategy.orders)}")
122
+ print(f" Final portfolio value: ${strategy.get_portfolio_value():,.2f}")
123
+
124
+ except Exception as e:
125
+ print(f"✗ Backtest failed: {e}")
126
+ raise
127
+
128
+ finally:
129
+ # Stop profiling
130
+ yappi.stop()
131
+
132
+ # Save profiling results
133
+ func_stats = yappi.get_func_stats()
134
+
135
+ # Save to pstat format for snakeviz
136
+ func_stats.save(profile_file, type="pstat")
137
+ print(f" Profile saved to: {profile_file}")
138
+
139
+ # Print top 30 time-consuming functions
140
+ print(f"\nTop 30 time-consuming functions:")
141
+ print("="*120)
142
+ func_stats.sort("totaltime", "desc")
143
+ # Print first 30 functions
144
+ for i, stat in enumerate(func_stats[:30]):
145
+ if i == 0:
146
+ print(f"{'Function':<60} {'Calls':<10} {'TotTime':<12} {'PerCall':<12}")
147
+ print("-"*120)
148
+ print(f"{stat.name:<60} {stat.ncall:<10} {stat.ttot:<12.6f} {stat.tavg:<12.6f}")
149
+
150
+ return elapsed
151
+
152
+
153
+ def main():
154
+ """Run profiling comparison"""
155
+
156
+ # Check if credentials are available
157
+ thetadata_username = os.environ.get("THETADATA_USERNAME")
158
+ thetadata_password = os.environ.get("THETADATA_PASSWORD")
159
+ polygon_api_key = os.environ.get("POLYGON_API_KEY")
160
+
161
+ if not thetadata_username or not thetadata_password:
162
+ print("ERROR: ThetaData credentials not found")
163
+ print("Set THETADATA_USERNAME and THETADATA_PASSWORD environment variables")
164
+ return
165
+
166
+ if not polygon_api_key:
167
+ print("ERROR: Polygon API key not found")
168
+ print("Set POLYGON_API_KEY environment variable")
169
+ return
170
+
171
+ print("\n" + "="*80)
172
+ print("PERFORMANCE PROFILING: ThetaData vs Polygon")
173
+ print("="*80)
174
+ print(f"Date range: 2024-08-01 to 2024-08-02 (1 trading day)")
175
+ print(f"Strategy: Buy & hold 10 shares of AMZN")
176
+ print("="*80)
177
+
178
+ results = {}
179
+
180
+ # 1. ThetaData with cache cleared
181
+ results["thetadata_nocache"] = profile_backtest(
182
+ ThetaDataBacktesting,
183
+ "ThetaData (NO CACHE)",
184
+ "thetadata_nocache.prof",
185
+ clear_cache_first=True
186
+ )
187
+
188
+ # 2. ThetaData with cache warmed
189
+ results["thetadata_cached"] = profile_backtest(
190
+ ThetaDataBacktesting,
191
+ "ThetaData (CACHED)",
192
+ "thetadata_cached.prof",
193
+ clear_cache_first=False
194
+ )
195
+
196
+ # 3. Polygon with cache cleared
197
+ results["polygon_nocache"] = profile_backtest(
198
+ PolygonDataBacktesting,
199
+ "Polygon (NO CACHE)",
200
+ "polygon_nocache.prof",
201
+ clear_cache_first=True
202
+ )
203
+
204
+ # 4. Polygon with cache warmed
205
+ results["polygon_cached"] = profile_backtest(
206
+ PolygonDataBacktesting,
207
+ "Polygon (CACHED)",
208
+ "polygon_cached.prof",
209
+ clear_cache_first=False
210
+ )
211
+
212
+ # Summary
213
+ print("\n" + "="*80)
214
+ print("SUMMARY")
215
+ print("="*80)
216
+ print(f"{'Test':<30} {'Time (s)':<15} {'Speedup vs ThetaData'}")
217
+ print("-"*80)
218
+
219
+ baseline = results["thetadata_nocache"]
220
+ for key, elapsed in results.items():
221
+ speedup = baseline / elapsed if elapsed > 0 else 0
222
+ speedup_str = f"{speedup:.1f}x" if speedup != 1.0 else "-"
223
+ print(f"{key:<30} {elapsed:>10.2f} {speedup_str:>10}")
224
+
225
+ print("\n" + "="*80)
226
+ print("ANALYSIS")
227
+ print("="*80)
228
+
229
+ theta_cache_benefit = results["thetadata_nocache"] / results["thetadata_cached"] if results["thetadata_cached"] > 0 else 0
230
+ polygon_cache_benefit = results["polygon_nocache"] / results["polygon_cached"] if results["polygon_cached"] > 0 else 0
231
+
232
+ print(f"ThetaData cache benefit: {theta_cache_benefit:.1f}x faster with cache")
233
+ print(f"Polygon cache benefit: {polygon_cache_benefit:.1f}x faster with cache")
234
+
235
+ # Compare cached performance (most relevant for production)
236
+ if results["thetadata_cached"] > results["polygon_cached"]:
237
+ slowdown = results["thetadata_cached"] / results["polygon_cached"]
238
+ print(f"\n⚠️ ThetaData (cached) is {slowdown:.1f}x SLOWER than Polygon (cached)")
239
+ else:
240
+ speedup = results["polygon_cached"] / results["thetadata_cached"]
241
+ print(f"\n✓ ThetaData (cached) is {speedup:.1f}x FASTER than Polygon (cached)")
242
+
243
+ print("\n" + "="*80)
244
+ print("PROFILING FILES GENERATED")
245
+ print("="*80)
246
+ print("To visualize bottlenecks, run:")
247
+ print(" snakeviz thetadata_nocache.prof")
248
+ print(" snakeviz thetadata_cached.prof")
249
+ print(" snakeviz polygon_nocache.prof")
250
+ print(" snakeviz polygon_cached.prof")
251
+ print("="*80 + "\n")
252
+
253
+
254
+ if __name__ == "__main__":
255
+ main()
@@ -0,0 +1,109 @@
1
+ """
2
+ Root cause analysis: Is the +1 minute offset from ThetaData's API or our processing?
3
+ """
4
+
5
+ import requests
6
+ import pandas as pd
7
+ import datetime
8
+
9
+ print("="*100)
10
+ print("ROOT CAUSE ANALYSIS: ThetaData +1 Minute Offset")
11
+ print("="*100)
12
+
13
+ # Get raw API response
14
+ response = requests.get('http://127.0.0.1:25510/hist/stock/ohlc', params={
15
+ 'root': 'AMZN',
16
+ 'start_date': '20240801',
17
+ 'end_date': '20240801',
18
+ 'ivl': 60000,
19
+ 'rth': 'true'
20
+ })
21
+
22
+ data = response.json()
23
+
24
+ print("\n1. ThetaData RAW API Response (no processing):")
25
+ print("-" * 100)
26
+ print(f"{'Bar':<5} {'ms_of_day':<12} {'Time':<10} {'Volume':<12} {'Notes'}")
27
+ print("-" * 100)
28
+
29
+ for i, row in enumerate(data['response'][:5]):
30
+ ms_of_day, o, h, l, c, v, count, date = row
31
+ hours = ms_of_day // (1000 * 60 * 60)
32
+ minutes = (ms_of_day % (1000 * 60 * 60)) // (1000 * 60)
33
+ time_str = f"{hours:02d}:{minutes:02d}"
34
+
35
+ note = ""
36
+ if i == 0:
37
+ note = "← Should be pre-market if labeled correctly"
38
+ elif i == 1:
39
+ note = "← MASSIVE SPIKE (market open)" if v > 1000000 else ""
40
+
41
+ print(f"{i+1:<5} {ms_of_day:<12} {time_str:<10} {v:<12,} {note}")
42
+
43
+ print("\n2. After Our Code Processing (thetadata_helper.py):")
44
+ print("-" * 100)
45
+
46
+ # Replicate our processing from thetadata_helper.py
47
+ df = pd.DataFrame(data['response'][:5], columns=data['header']['format'])
48
+
49
+ def combine_datetime(row):
50
+ date_str = str(int(row["date"]))
51
+ base_date = datetime.datetime.strptime(date_str, "%Y%m%d")
52
+ datetime_value = base_date + datetime.timedelta(milliseconds=int(row["ms_of_day"]))
53
+ return datetime_value
54
+
55
+ datetime_combined = df.apply(combine_datetime, axis=1)
56
+ df = df.assign(datetime=datetime_combined)
57
+ df["datetime"] = pd.to_datetime(df["datetime"])
58
+
59
+ print(f"{'Bar':<5} {'Datetime':<30} {'Volume':<12} {'Notes'}")
60
+ print("-" * 100)
61
+
62
+ for i, (idx, row) in enumerate(df.iterrows()):
63
+ note = ""
64
+ if i == 0:
65
+ note = "← Should be pre-market if labeled correctly"
66
+ elif i == 1 and row['volume'] > 1000000:
67
+ note = "← MASSIVE SPIKE (market open)"
68
+
69
+ print(f"{i+1:<5} {str(row['datetime']):<30} {row['volume']:<12,} {note}")
70
+
71
+ print("\n3. Expected Correct Labeling (based on volume spike = market open at 9:30):")
72
+ print("-" * 100)
73
+ print("Bar 1 (10,434 volume): Should be labeled 9:29 (pre-market)")
74
+ print("Bar 2 (1,517,215 volume): Should be labeled 9:30 (market open SPIKE)")
75
+ print()
76
+ print("Actual ThetaData Labeling:")
77
+ print("Bar 1 (10,434 volume): Labeled as 9:30")
78
+ print("Bar 2 (1,517,215 volume): Labeled as 9:31")
79
+ print()
80
+ print("="*100)
81
+ print("CONCLUSION:")
82
+ print("="*100)
83
+ print("The +1 minute offset exists in ThetaData's RAW API response.")
84
+ print("Our processing code does NOT introduce any shifts.")
85
+ print("The ms_of_day values from ThetaData are already off by +1 minute.")
86
+ print()
87
+ print("PROOF:")
88
+ print("- ThetaData labels the low-volume bar as 9:30")
89
+ print("- ThetaData labels the spike bar as 9:31")
90
+ print("- But market opens at 9:30, so the spike SHOULD be labeled 9:30")
91
+ print("- Therefore, ThetaData's timestamps are +1 minute ahead of reality")
92
+ print("="*100)
93
+
94
+ print("\n4. Checking ThetaData's Documentation Claim:")
95
+ print("-" * 100)
96
+ print("ThetaData docs say: 'bar timestamp <= trade time < bar timestamp + ivl'")
97
+ print("For bar labeled 9:30 with ivl=60000ms (1 minute):")
98
+ print(" Should include trades: 9:30:00.000 <= trade < 9:31:00.000")
99
+ print()
100
+ print("But we observe:")
101
+ print(" Bar labeled 9:30 has 10,434 volume (pre-market level)")
102
+ print(" Bar labeled 9:31 has 1,517,215 volume (market open spike)")
103
+ print()
104
+ print("This means:")
105
+ print(" Bar labeled 9:30 actually contains 9:29:00-9:29:59 data")
106
+ print(" Bar labeled 9:31 actually contains 9:30:00-9:30:59 data")
107
+ print()
108
+ print("Therefore: ThetaData's bars are MISLABELED by +1 minute in their API")
109
+ print("="*100)
@@ -0,0 +1,244 @@
1
+ """
2
+ Phase 1: Accuracy Verification Tests
3
+
4
+ This test suite verifies that ThetaData price variance compared to Polygon
5
+ remains acceptable over long time periods and across different price ranges.
6
+
7
+ Goals:
8
+ - Verify portfolio variance < 0.01% over 1 year
9
+ - Verify price differences remain sub-penny across all price ranges
10
+ - Verify no systematic bias (variance is random, not directional)
11
+ """
12
+
13
+ import datetime
14
+ import os
15
+ import pytest
16
+ from dotenv import load_dotenv
17
+ from lumibot.strategies import Strategy
18
+ from lumibot.backtesting import PolygonDataBacktesting, ThetaDataBacktesting
19
+ from lumibot.entities import Asset
20
+
21
+ # Load environment variables from .env file
22
+ load_dotenv()
23
+
24
+ # Get credentials from environment variables
25
+ POLYGON_API_KEY = os.environ.get("POLYGON_API_KEY")
26
+ THETADATA_USERNAME = os.environ.get("THETADATA_USERNAME")
27
+ THETADATA_PASSWORD = os.environ.get("THETADATA_PASSWORD")
28
+
29
+
30
+ class AccuracyTestStrategy(Strategy):
31
+ """Simple buy-and-hold strategy for accuracy testing"""
32
+
33
+ parameters = {
34
+ "symbol": "AMZN",
35
+ "quantity": 10
36
+ }
37
+
38
+ def initialize(self):
39
+ self.sleeptime = "1D"
40
+ self.bought = False
41
+
42
+ def on_trading_iteration(self):
43
+ if not self.bought:
44
+ asset = Asset(self.parameters["symbol"])
45
+ price = self.get_last_price(asset)
46
+ self.log_message(f"Buying {self.parameters['quantity']} shares of {self.parameters['symbol']} at ${price}")
47
+ order = self.create_order(asset, quantity=self.parameters["quantity"], side="buy")
48
+ self.submit_order(order)
49
+ self.bought = True
50
+
51
+
52
+ @pytest.mark.apitest
53
+ @pytest.mark.skipif(
54
+ not POLYGON_API_KEY or not THETADATA_USERNAME or not THETADATA_PASSWORD,
55
+ reason="Requires both Polygon and ThetaData credentials"
56
+ )
57
+ class TestAccuracyVerification:
58
+ """Accuracy verification test suite"""
59
+
60
+ def test_one_year_amzn_accuracy(self):
61
+ """
62
+ Test 1: Verify AMZN accuracy over 1 year (2023)
63
+
64
+ Expected:
65
+ - Portfolio variance < 0.01% ($10 on $100k portfolio)
66
+ - Price differences remain sub-penny
67
+ - No systematic directional bias
68
+ """
69
+ backtesting_start = datetime.datetime(2023, 1, 3) # First trading day of 2023
70
+ backtesting_end = datetime.datetime(2023, 12, 29) # Last trading day of 2023
71
+
72
+ print("\n" + "="*80)
73
+ print("TEST 1: ONE YEAR ACCURACY VERIFICATION - AMZN")
74
+ print("="*80)
75
+ print(f"Period: {backtesting_start.date()} to {backtesting_end.date()}")
76
+ print(f"Symbol: AMZN")
77
+ print(f"Trading days: ~252")
78
+
79
+ # Run ThetaData backtest
80
+ print("\n[1/2] Running ThetaData backtest...")
81
+ theta_results, theta_strat = AccuracyTestStrategy.run_backtest(
82
+ ThetaDataBacktesting,
83
+ backtesting_start,
84
+ backtesting_end,
85
+ benchmark_asset="SPY",
86
+ show_plot=False,
87
+ show_tearsheet=False,
88
+ save_tearsheet=False,
89
+ parameters={"symbol": "AMZN", "quantity": 100},
90
+ thetadata_username=THETADATA_USERNAME,
91
+ thetadata_password=THETADATA_PASSWORD,
92
+ )
93
+
94
+ # Run Polygon backtest
95
+ print("\n[2/2] Running Polygon backtest...")
96
+ polygon_results, polygon_strat = AccuracyTestStrategy.run_backtest(
97
+ PolygonDataBacktesting,
98
+ backtesting_start,
99
+ backtesting_end,
100
+ benchmark_asset="SPY",
101
+ show_plot=False,
102
+ show_tearsheet=False,
103
+ save_tearsheet=False,
104
+ parameters={"symbol": "AMZN", "quantity": 100},
105
+ polygon_api_key=POLYGON_API_KEY,
106
+ )
107
+
108
+ # Compare results - get final portfolio value from strategy
109
+ theta_final = theta_strat.get_portfolio_value()
110
+ polygon_final = polygon_strat.get_portfolio_value()
111
+ difference = abs(theta_final - polygon_final)
112
+ percent_diff = (difference / polygon_final) * 100
113
+
114
+ print("\n" + "-"*80)
115
+ print("RESULTS:")
116
+ print("-"*80)
117
+ print(f"ThetaData Final Portfolio Value: ${theta_final:,.2f}")
118
+ print(f"Polygon Final Portfolio Value: ${polygon_final:,.2f}")
119
+ print(f"Absolute Difference: ${difference:,.2f}")
120
+ print(f"Percentage Difference: {percent_diff:.4f}%")
121
+ print(f"Acceptance Threshold: 0.01% (${polygon_final * 0.0001:,.2f})")
122
+
123
+ # Verify acceptance criteria
124
+ assert percent_diff < 0.01, f"Portfolio variance {percent_diff:.4f}% exceeds 0.01% threshold"
125
+
126
+ print(f"\n✓ TEST PASSED: Variance {percent_diff:.4f}% is within acceptable range")
127
+ print("="*80 + "\n")
128
+
129
+ def test_multi_symbol_price_ranges(self):
130
+ """
131
+ Test 2: Verify accuracy across different price ranges
132
+
133
+ Tests 5 symbols with different price points:
134
+ - AMZN: ~$180
135
+ - AAPL: ~$175
136
+ - GOOGL: ~$140
137
+ - SPY: ~$450
138
+ - BRK.B: ~$420
139
+
140
+ Expected:
141
+ - 0.5¢ variance is consistent percentage across all price ranges
142
+ - Sub-penny differences for all symbols
143
+ """
144
+ backtesting_start = datetime.datetime(2024, 8, 1)
145
+ backtesting_end = datetime.datetime(2024, 8, 5) # 1 week for speed
146
+
147
+ symbols = [
148
+ ("AMZN", 10, 180), # ~$180/share, 10 shares
149
+ ("AAPL", 10, 175), # ~$175/share, 10 shares
150
+ ("GOOGL", 10, 140), # ~$140/share, 10 shares
151
+ ("SPY", 10, 450), # ~$450/share, 10 shares
152
+ ("BRK.B", 5, 420), # ~$420/share, 5 shares
153
+ ]
154
+
155
+ print("\n" + "="*80)
156
+ print("TEST 2: MULTI-SYMBOL PRICE RANGE VERIFICATION")
157
+ print("="*80)
158
+ print(f"Period: {backtesting_start.date()} to {backtesting_end.date()}")
159
+ print(f"Symbols: {len(symbols)}")
160
+
161
+ results_table = []
162
+
163
+ for symbol, qty, approx_price in symbols:
164
+ print(f"\n--- Testing {symbol} (~${approx_price}/share, {qty} shares) ---")
165
+
166
+ # Run ThetaData backtest
167
+ theta_results, theta_strat = AccuracyTestStrategy.run_backtest(
168
+ ThetaDataBacktesting,
169
+ backtesting_start,
170
+ backtesting_end,
171
+ benchmark_asset="SPY",
172
+ show_plot=False,
173
+ show_tearsheet=False,
174
+ save_tearsheet=False,
175
+ parameters={"symbol": symbol, "quantity": qty},
176
+ thetadata_username=THETADATA_USERNAME,
177
+ thetadata_password=THETADATA_PASSWORD,
178
+ )
179
+
180
+ # Run Polygon backtest
181
+ polygon_results, polygon_strat = AccuracyTestStrategy.run_backtest(
182
+ PolygonDataBacktesting,
183
+ backtesting_start,
184
+ backtesting_end,
185
+ benchmark_asset="SPY",
186
+ show_plot=False,
187
+ show_tearsheet=False,
188
+ save_tearsheet=False,
189
+ parameters={"symbol": symbol, "quantity": qty},
190
+ polygon_api_key=POLYGON_API_KEY,
191
+ )
192
+
193
+ # Compare final portfolio values
194
+ theta_final = theta_strat.get_portfolio_value()
195
+ polygon_final = polygon_strat.get_portfolio_value()
196
+ difference = abs(theta_final - polygon_final)
197
+ percent_diff = (difference / polygon_final) * 100
198
+
199
+ results_table.append({
200
+ "symbol": symbol,
201
+ "price": approx_price,
202
+ "qty": qty,
203
+ "theta": theta_final,
204
+ "polygon": polygon_final,
205
+ "diff": difference,
206
+ "pct": percent_diff
207
+ })
208
+
209
+ print(f" ThetaData: ${theta_final:,.2f}")
210
+ print(f" Polygon: ${polygon_final:,.2f}")
211
+ print(f" Difference: ${difference:,.2f} ({percent_diff:.4f}%)")
212
+
213
+ # Verify sub-0.01% variance for each symbol
214
+ assert percent_diff < 0.01, f"{symbol}: Variance {percent_diff:.4f}% exceeds 0.01%"
215
+
216
+ # Summary table
217
+ print("\n" + "-"*80)
218
+ print("SUMMARY TABLE:")
219
+ print("-"*80)
220
+ print(f"{'Symbol':<8} {'Price':<8} {'Qty':<5} {'ThetaData':<15} {'Polygon':<15} {'Diff':<10} {'%':<8}")
221
+ print("-"*80)
222
+
223
+ for r in results_table:
224
+ print(f"{r['symbol']:<8} ${r['price']:<7} {r['qty']:<5} ${r['theta']:<14,.2f} ${r['polygon']:<14,.2f} ${r['diff']:<9,.2f} {r['pct']:.4f}%")
225
+
226
+ # Calculate average variance
227
+ avg_pct = sum(r['pct'] for r in results_table) / len(results_table)
228
+ max_pct = max(r['pct'] for r in results_table)
229
+
230
+ print("-"*80)
231
+ print(f"Average Variance: {avg_pct:.4f}%")
232
+ print(f"Maximum Variance: {max_pct:.4f}%")
233
+ print(f"Threshold: 0.01%")
234
+
235
+ assert avg_pct < 0.01, f"Average variance {avg_pct:.4f}% exceeds 0.01%"
236
+ assert max_pct < 0.01, f"Max variance {max_pct:.4f}% exceeds 0.01%"
237
+
238
+ print(f"\n✓ TEST PASSED: All symbols within acceptable variance")
239
+ print("="*80 + "\n")
240
+
241
+
242
+ if __name__ == "__main__":
243
+ # Run tests directly
244
+ pytest.main([__file__, "-v", "-s"])