lumibot 4.0.23__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lumibot might be problematic. Click here for more details.

Files changed (160) hide show
  1. lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
  2. lumibot/__pycache__/constants.cpython-312.pyc +0 -0
  3. lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
  4. lumibot/backtesting/__init__.py +6 -5
  5. lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
  6. lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
  7. lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
  8. lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
  9. lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
  10. lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
  11. lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
  12. lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
  13. lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
  14. lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
  15. lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
  16. lumibot/backtesting/backtesting_broker.py +209 -9
  17. lumibot/backtesting/databento_backtesting.py +141 -24
  18. lumibot/backtesting/thetadata_backtesting.py +63 -42
  19. lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
  20. lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
  21. lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
  22. lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
  23. lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
  24. lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
  25. lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
  26. lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
  27. lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
  28. lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
  29. lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
  30. lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
  31. lumibot/brokers/alpaca.py +11 -1
  32. lumibot/brokers/tradeovate.py +475 -0
  33. lumibot/components/grok_news_helper.py +284 -0
  34. lumibot/components/options_helper.py +90 -34
  35. lumibot/credentials.py +3 -0
  36. lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
  37. lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
  38. lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
  39. lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
  40. lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
  41. lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
  42. lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
  43. lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
  44. lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
  45. lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
  46. lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
  47. lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
  48. lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
  49. lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
  50. lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
  51. lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
  52. lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
  53. lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
  54. lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
  55. lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
  56. lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
  57. lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
  58. lumibot/data_sources/data_source_backtesting.py +3 -5
  59. lumibot/data_sources/databento_data_polars_backtesting.py +194 -48
  60. lumibot/data_sources/pandas_data.py +6 -3
  61. lumibot/data_sources/polars_mixin.py +126 -21
  62. lumibot/data_sources/tradeovate_data.py +80 -0
  63. lumibot/data_sources/tradier_data.py +2 -1
  64. lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
  65. lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
  66. lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
  67. lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
  68. lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
  69. lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
  70. lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
  71. lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
  72. lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
  73. lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
  74. lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
  75. lumibot/entities/asset.py +8 -0
  76. lumibot/entities/order.py +1 -1
  77. lumibot/entities/quote.py +14 -0
  78. lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  79. lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
  80. lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  81. lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
  82. lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
  83. lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
  84. lumibot/strategies/_strategy.py +95 -27
  85. lumibot/strategies/strategy.py +5 -6
  86. lumibot/strategies/strategy_executor.py +2 -2
  87. lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  88. lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
  89. lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
  90. lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
  91. lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
  92. lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
  93. lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
  94. lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
  95. lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
  96. lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
  97. lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
  98. lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
  99. lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
  100. lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
  101. lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
  102. lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
  103. lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
  104. lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
  105. lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
  106. lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
  107. lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
  108. lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
  109. lumibot/tools/databento_helper.py +384 -133
  110. lumibot/tools/databento_helper_polars.py +218 -156
  111. lumibot/tools/databento_roll.py +216 -0
  112. lumibot/tools/lumibot_logger.py +32 -17
  113. lumibot/tools/polygon_helper.py +65 -0
  114. lumibot/tools/thetadata_helper.py +588 -70
  115. lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
  116. lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
  117. lumibot/traders/trader.py +1 -1
  118. lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
  119. lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
  120. lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
  121. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/METADATA +1 -2
  122. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/RECORD +160 -44
  123. tests/backtest/check_timing_offset.py +198 -0
  124. tests/backtest/check_volume_spike.py +112 -0
  125. tests/backtest/comprehensive_comparison.py +166 -0
  126. tests/backtest/debug_comparison.py +91 -0
  127. tests/backtest/diagnose_price_difference.py +97 -0
  128. tests/backtest/direct_api_comparison.py +203 -0
  129. tests/backtest/profile_thetadata_vs_polygon.py +255 -0
  130. tests/backtest/root_cause_analysis.py +109 -0
  131. tests/backtest/test_accuracy_verification.py +244 -0
  132. tests/backtest/test_daily_data_timestamp_comparison.py +801 -0
  133. tests/backtest/test_databento.py +4 -0
  134. tests/backtest/test_databento_comprehensive_trading.py +564 -0
  135. tests/backtest/test_debug_avg_fill_price.py +112 -0
  136. tests/backtest/test_dividends.py +8 -3
  137. tests/backtest/test_example_strategies.py +54 -47
  138. tests/backtest/test_futures_edge_cases.py +451 -0
  139. tests/backtest/test_futures_single_trade.py +270 -0
  140. tests/backtest/test_futures_ultra_simple.py +191 -0
  141. tests/backtest/test_index_data_verification.py +348 -0
  142. tests/backtest/test_polygon.py +45 -24
  143. tests/backtest/test_thetadata.py +246 -60
  144. tests/backtest/test_thetadata_comprehensive.py +729 -0
  145. tests/backtest/test_thetadata_vs_polygon.py +557 -0
  146. tests/backtest/test_yahoo.py +1 -2
  147. tests/conftest.py +20 -0
  148. tests/test_backtesting_data_source_env.py +249 -0
  149. tests/test_backtesting_quiet_logs_complete.py +10 -11
  150. tests/test_databento_helper.py +73 -86
  151. tests/test_databento_timezone_fixes.py +21 -4
  152. tests/test_get_historical_prices.py +6 -6
  153. tests/test_options_helper.py +162 -40
  154. tests/test_polygon_helper.py +21 -13
  155. tests/test_quiet_logs_requirements.py +5 -5
  156. tests/test_thetadata_helper.py +487 -171
  157. tests/test_yahoo_data.py +125 -0
  158. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/LICENSE +0 -0
  159. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/WHEEL +0 -0
  160. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,112 @@
1
+ """
2
+ Check where the volume spike happens to determine which provider is correct.
3
+ Market opens at 9:30 AM, so we should see a massive volume spike AT 9:30 AM.
4
+ """
5
+
6
+ import requests
7
+ import datetime
8
+ import pandas as pd
9
+ from polygon import RESTClient
10
+ from lumibot.credentials import POLYGON_API_KEY
11
+
12
+
13
+ def check_volume_pattern(symbol, date_str):
14
+ """Check volume patterns around market open."""
15
+
16
+ print(f"\n{'='*100}")
17
+ print(f"{symbol} - Volume Pattern Analysis")
18
+ print(f"{'='*100}")
19
+
20
+ # Get ThetaData
21
+ response = requests.get('http://127.0.0.1:25510/hist/stock/ohlc', params={
22
+ 'root': symbol,
23
+ 'start_date': date_str,
24
+ 'end_date': date_str,
25
+ 'ivl': 60000,
26
+ 'rth': 'true'
27
+ })
28
+
29
+ data = response.json()
30
+ theta_bars = []
31
+ for row in data['response'][:15]: # First 15 bars
32
+ ms_of_day, o, h, l, c, v, count, date = row
33
+ hours = ms_of_day // (1000 * 60 * 60)
34
+ minutes = (ms_of_day % (1000 * 60 * 60)) // (1000 * 60)
35
+ theta_bars.append({
36
+ 'time': f"{hours:02d}:{minutes:02d}",
37
+ 'volume': v
38
+ })
39
+
40
+ # Get Polygon
41
+ client = RESTClient(POLYGON_API_KEY)
42
+ date = datetime.date(2024, 8, 1)
43
+ aggs = client.get_aggs(ticker=symbol, multiplier=1, timespan="minute", from_=date, to=date, limit=50000)
44
+
45
+ polygon_bars = []
46
+ for agg in aggs:
47
+ dt = datetime.datetime.fromtimestamp(agg.timestamp/1000, tz=datetime.timezone.utc)
48
+ dt_et = dt.astimezone(datetime.timezone(datetime.timedelta(hours=-4)))
49
+
50
+ # Only first 15 bars after 9:25
51
+ if dt_et.hour == 9 and dt_et.minute >= 25 and len(polygon_bars) < 15:
52
+ polygon_bars.append({
53
+ 'time': dt_et.strftime("%H:%M"),
54
+ 'volume': agg.volume
55
+ })
56
+ elif dt_et.hour > 9 and len(polygon_bars) < 15:
57
+ polygon_bars.append({
58
+ 'time': dt_et.strftime("%H:%M"),
59
+ 'volume': agg.volume
60
+ })
61
+
62
+ print(f"\nThetaData Bars (first 15):")
63
+ print(f"{'Time':<10} {'Volume':>15} {'Notes':<30}")
64
+ print("-" * 60)
65
+ max_theta_vol = max(b['volume'] for b in theta_bars)
66
+ for bar in theta_bars:
67
+ note = "← SPIKE!" if bar['volume'] == max_theta_vol else ""
68
+ print(f"{bar['time']:<10} {bar['volume']:>15,} {note:<30}")
69
+
70
+ print(f"\nPolygon Bars (first 15):")
71
+ print(f"{'Time':<10} {'Volume':>15} {'Notes':<30}")
72
+ print("-" * 60)
73
+ max_polygon_vol = max(b['volume'] for b in polygon_bars)
74
+ for bar in polygon_bars:
75
+ note = "← SPIKE!" if bar['volume'] == max_polygon_vol else ""
76
+ print(f"{bar['time']:<10} {bar['volume']:>15,} {note:<30}")
77
+
78
+ # Analysis
79
+ theta_spike_time = next(b['time'] for b in theta_bars if b['volume'] == max_theta_vol)
80
+ polygon_spike_time = next(b['time'] for b in polygon_bars if b['volume'] == max_polygon_vol)
81
+
82
+ print(f"\n{'='*100}")
83
+ print(f"ANALYSIS")
84
+ print(f"{'='*100}")
85
+ print(f"ThetaData: Volume spike at {theta_spike_time}")
86
+ print(f"Polygon: Volume spike at {polygon_spike_time}")
87
+ print(f"\nMarket officially opens at 9:30 AM ET")
88
+
89
+ if polygon_spike_time == "09:30":
90
+ print(f"✓ Polygon shows spike at 9:30 AM (CORRECT - matches market open)")
91
+ else:
92
+ print(f"✗ Polygon shows spike at {polygon_spike_time} (WRONG - doesn't match market open)")
93
+
94
+ if theta_spike_time == "09:30":
95
+ print(f"✓ ThetaData shows spike at 9:30 AM (CORRECT - matches market open)")
96
+ elif theta_spike_time == "09:31":
97
+ print(f"✗ ThetaData shows spike at 9:31 AM (WRONG - should be at 9:30)")
98
+ print(f" → This suggests ThetaData timestamps are OFF BY +1 MINUTE")
99
+ else:
100
+ print(f"✗ ThetaData shows spike at {theta_spike_time} (UNEXPECTED)")
101
+
102
+
103
+ def main():
104
+ symbols = ["AMZN", "AAPL", "SPY"]
105
+ date_str = "20240801"
106
+
107
+ for symbol in symbols:
108
+ check_volume_pattern(symbol, date_str)
109
+
110
+
111
+ if __name__ == "__main__":
112
+ main()
@@ -0,0 +1,166 @@
1
+ """
2
+ Comprehensive comparison test between ThetaData and Polygon across:
3
+ - Multiple stocks (AMZN, AAPL, SPY, TSLA, PLTR)
4
+ - Multiple times of day (9:30, 10:00, 12:00, 15:00, 15:30)
5
+ - Check for systematic patterns vs random differences
6
+ """
7
+
8
+ import datetime
9
+ import os
10
+ import pandas as pd
11
+ from lumibot.backtesting import ThetaDataBacktesting, PolygonDataBacktesting
12
+ from lumibot.entities import Asset
13
+ from lumibot.credentials import POLYGON_API_KEY
14
+
15
+
16
+ def get_bar_at_time(data_source_class, symbol, date, hour, minute):
17
+ """Get a specific bar from a data source."""
18
+ start = datetime.datetime(date.year, date.month, date.day, 9, 0)
19
+ end = datetime.datetime(date.year, date.month, date.day, 16, 0)
20
+
21
+ if data_source_class == ThetaDataBacktesting:
22
+ ds = ThetaDataBacktesting(
23
+ datetime_start=start,
24
+ datetime_end=end,
25
+ username=os.environ.get("THETADATA_USERNAME"),
26
+ password=os.environ.get("THETADATA_PASSWORD"),
27
+ )
28
+ else:
29
+ ds = PolygonDataBacktesting(
30
+ datetime_start=start,
31
+ datetime_end=end,
32
+ api_key=POLYGON_API_KEY,
33
+ )
34
+
35
+ asset = Asset(symbol, asset_type="stock")
36
+
37
+ # Get all bars for the day
38
+ bars = ds.get_historical_prices_between_dates(asset, "minute", start_date=start, end_date=end)
39
+
40
+ if bars and not bars.df.empty:
41
+ # Find the bar at our target time
42
+ df = bars.df
43
+
44
+ # Try to find bars matching our target time
45
+ for idx in df.index:
46
+ if idx.hour == hour and idx.minute == minute:
47
+ bar = df.loc[idx]
48
+ return {
49
+ "symbol": symbol,
50
+ "datetime": idx,
51
+ "open": float(bar["open"]),
52
+ "high": float(bar["high"]),
53
+ "low": float(bar["low"]),
54
+ "close": float(bar["close"]),
55
+ "volume": float(bar["volume"]),
56
+ }
57
+
58
+ return None
59
+
60
+
61
+ def compare_providers():
62
+ """Compare ThetaData vs Polygon across multiple stocks and times."""
63
+
64
+ test_date = datetime.date(2024, 8, 1)
65
+ symbols = ["AMZN", "AAPL", "SPY", "TSLA", "PLTR"]
66
+ times = [
67
+ (9, 30, "Market Open"),
68
+ (10, 0, "Early Morning"),
69
+ (12, 0, "Midday"),
70
+ (14, 0, "Afternoon"),
71
+ (15, 30, "Near Close"),
72
+ ]
73
+
74
+ results = []
75
+
76
+ print(f"\nComprehensive ThetaData vs Polygon Comparison")
77
+ print(f"Date: {test_date}")
78
+ print(f"=" * 120)
79
+
80
+ for symbol in symbols:
81
+ print(f"\n{symbol}:")
82
+ print("-" * 120)
83
+
84
+ for hour, minute, label in times:
85
+ print(f"\n {label} ({hour}:{minute:02d} ET):")
86
+
87
+ try:
88
+ theta_bar = get_bar_at_time(ThetaDataBacktesting, symbol, test_date, hour, minute)
89
+ polygon_bar = get_bar_at_time(PolygonDataBacktesting, symbol, test_date, hour, minute)
90
+
91
+ if theta_bar and polygon_bar:
92
+ # Calculate differences
93
+ open_diff = theta_bar["open"] - polygon_bar["open"]
94
+ close_diff = theta_bar["close"] - polygon_bar["close"]
95
+ volume_diff = theta_bar["volume"] - polygon_bar["volume"]
96
+ volume_pct = (volume_diff / polygon_bar["volume"] * 100) if polygon_bar["volume"] > 0 else 0
97
+
98
+ print(f" ThetaData : O=${theta_bar['open']:.2f} H=${theta_bar['high']:.2f} L=${theta_bar['low']:.2f} C=${theta_bar['close']:.2f} V={theta_bar['volume']:,.0f}")
99
+ print(f" Polygon : O=${polygon_bar['open']:.2f} H=${polygon_bar['high']:.2f} L=${polygon_bar['low']:.2f} C=${polygon_bar['close']:.2f} V={polygon_bar['volume']:,.0f}")
100
+ print(f" Difference: O=${open_diff:+.3f} C=${close_diff:+.3f} V={volume_diff:+,.0f} ({volume_pct:+.1f}%)")
101
+
102
+ results.append({
103
+ "symbol": symbol,
104
+ "time": f"{hour}:{minute:02d}",
105
+ "label": label,
106
+ "theta_open": theta_bar["open"],
107
+ "polygon_open": polygon_bar["open"],
108
+ "open_diff": open_diff,
109
+ "theta_close": theta_bar["close"],
110
+ "polygon_close": polygon_bar["close"],
111
+ "close_diff": close_diff,
112
+ "theta_volume": theta_bar["volume"],
113
+ "polygon_volume": polygon_bar["volume"],
114
+ "volume_diff": volume_diff,
115
+ "volume_pct_diff": volume_pct,
116
+ })
117
+ else:
118
+ print(f" ❌ Missing data (Theta: {theta_bar is not None}, Polygon: {polygon_bar is not None})")
119
+
120
+ except Exception as e:
121
+ print(f" ❌ Error: {e}")
122
+
123
+ # Create summary statistics
124
+ if results:
125
+ df = pd.DataFrame(results)
126
+
127
+ print(f"\n{'=' * 120}")
128
+ print(f"SUMMARY STATISTICS")
129
+ print(f"{'=' * 120}")
130
+
131
+ print(f"\nPrice Differences (Open):")
132
+ print(f" Mean: ${df['open_diff'].mean():.4f}")
133
+ print(f" Std: ${df['open_diff'].std():.4f}")
134
+ print(f" Min: ${df['open_diff'].min():.4f}")
135
+ print(f" Max: ${df['open_diff'].max():.4f}")
136
+ print(f" Abs Mean: ${df['open_diff'].abs().mean():.4f}")
137
+
138
+ print(f"\nPrice Differences (Close):")
139
+ print(f" Mean: ${df['close_diff'].mean():.4f}")
140
+ print(f" Std: ${df['close_diff'].std():.4f}")
141
+ print(f" Min: ${df['close_diff'].min():.4f}")
142
+ print(f" Max: ${df['close_diff'].max():.4f}")
143
+ print(f" Abs Mean: ${df['close_diff'].abs().mean():.4f}")
144
+
145
+ print(f"\nVolume Differences:")
146
+ print(f" Mean: {df['volume_diff'].mean():,.0f} ({df['volume_pct_diff'].mean():+.2f}%)")
147
+ print(f" Std: {df['volume_diff'].std():,.0f}")
148
+ print(f" Min: {df['volume_diff'].min():,.0f} ({df['volume_pct_diff'].min():+.2f}%)")
149
+ print(f" Max: {df['volume_diff'].max():,.0f} ({df['volume_pct_diff'].max():+.2f}%)")
150
+
151
+ # Check if ThetaData consistently has higher volume
152
+ higher_volume_count = (df['volume_diff'] > 0).sum()
153
+ total_count = len(df)
154
+ print(f"\nThetaData has HIGHER volume in {higher_volume_count}/{total_count} cases ({higher_volume_count/total_count*100:.1f}%)")
155
+
156
+ # Save to CSV
157
+ df.to_csv("thetadata_vs_polygon_comparison.csv", index=False)
158
+ print(f"\n✓ Results saved to thetadata_vs_polygon_comparison.csv")
159
+
160
+ return df
161
+
162
+ return None
163
+
164
+
165
+ if __name__ == "__main__":
166
+ compare_providers()
@@ -0,0 +1,91 @@
1
+ """
2
+ Deep dive comparison: ThetaData vs Polygon
3
+ Logs every detail to understand divergence
4
+ """
5
+ import os
6
+ import sys
7
+ import logging
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+
11
+ # Enable detailed logging
12
+ logging.basicConfig(
13
+ level=logging.DEBUG,
14
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
15
+ handlers=[
16
+ logging.FileHandler('/tmp/thetadata_debug.log'),
17
+ logging.StreamHandler()
18
+ ]
19
+ )
20
+
21
+ # Set environment
22
+ os.environ['BACKTESTING_START'] = '2025-09-01'
23
+ os.environ['BACKTESTING_END'] = '2025-09-05'
24
+ os.environ['BACKTESTING_QUIET_LOGS'] = 'False'
25
+ os.environ['BACKTESTING_SHOW_PROGRESS_BAR'] = 'True'
26
+
27
+ sys.path.insert(0, '/Users/robertgrzesik/Documents/Development/lumivest_bot_server/strategies/lumibot')
28
+
29
+ from lumibot.strategies import Strategy
30
+ from lumibot.entities import Asset
31
+ from lumibot.backtesting import ThetaDataBacktesting, PolygonDataBacktesting
32
+ from dotenv import load_dotenv
33
+
34
+ load_dotenv('/Users/robertgrzesik/Documents/Development/Strategy Library/Demos/.env.test')
35
+
36
+ # Import the strategy
37
+ exec(open('/Users/robertgrzesik/Documents/Development/Strategy Library/Demos/PLTR Weekly Call Roller.py').read())
38
+
39
+ print("=" * 100)
40
+ print("DETAILED THETADATA BACKTEST")
41
+ print("=" * 100)
42
+
43
+ os.environ['BACKTESTING_DATA_SOURCE'] = 'ThetaData'
44
+ theta_results = PLTRWeeklyCallRoller.backtest(
45
+ ThetaDataBacktesting,
46
+ backtesting_start=datetime(2025, 9, 1),
47
+ backtesting_end=datetime(2025, 9, 5),
48
+ benchmark_asset=Asset("SPY", Asset.AssetType.STOCK),
49
+ quote_asset=Asset("USD", Asset.AssetType.FOREX),
50
+ )
51
+
52
+ print("\n" + "=" * 100)
53
+ print("DETAILED POLYGON BACKTEST")
54
+ print("=" * 100)
55
+
56
+ os.environ['BACKTESTING_DATA_SOURCE'] = 'Polygon'
57
+ polygon_results = PLTRWeeklyCallRoller.backtest(
58
+ PolygonDataBacktesting,
59
+ backtesting_start=datetime(2025, 9, 1),
60
+ backtesting_end=datetime(2025, 9, 5),
61
+ benchmark_asset=Asset("SPY", Asset.AssetType.STOCK),
62
+ quote_asset=Asset("USD", Asset.AssetType.FOREX),
63
+ )
64
+
65
+ print("\n" + "=" * 100)
66
+ print("COMPARISON ANALYSIS")
67
+ print("=" * 100)
68
+
69
+ # Portfolio values
70
+ theta_final = theta_results['portfolio_value'][-1]
71
+ polygon_final = polygon_results['portfolio_value'][-1]
72
+
73
+ print(f"\nFinal Portfolio Values:")
74
+ print(f" ThetaData: ${theta_final:,.2f}")
75
+ print(f" Polygon: ${polygon_final:,.2f}")
76
+ print(f" Difference: ${abs(theta_final - polygon_final):,.2f}")
77
+
78
+ # Returns
79
+ theta_return = (theta_final - 100000) / 100000 * 100
80
+ polygon_return = (polygon_final - 100000) / 100000 * 100
81
+ print(f"\nReturns:")
82
+ print(f" ThetaData: {theta_return:.2f}%")
83
+ print(f" Polygon: {polygon_return:.2f}%")
84
+
85
+ # Analyze trades from logs
86
+ print(f"\nLog file created at: /tmp/thetadata_debug.log")
87
+ print("Search log for:")
88
+ print(" - 'Submitting order' to see trade submissions")
89
+ print(" - 'Fill price' to see execution prices")
90
+ print(" - 'bid' and 'ask' to see quote data")
91
+ print(" - 'get_last_price' to see price lookups")
@@ -0,0 +1,97 @@
1
+ """
2
+ Diagnostic script to examine raw bar data from ThetaData and Polygon
3
+ to understand why prices differ.
4
+ """
5
+
6
+ import datetime
7
+ import os
8
+ from lumibot.backtesting import ThetaDataBacktesting, PolygonDataBacktesting
9
+ from lumibot.entities import Asset
10
+ from lumibot.credentials import POLYGON_API_KEY
11
+
12
+
13
+ def examine_data_source(data_source_class, name):
14
+ """Examine raw data from a data source."""
15
+ print(f"\n{'='*80}")
16
+ print(f"{name} Data Source")
17
+ print(f"{'='*80}")
18
+
19
+ start = datetime.datetime(2024, 8, 1, 9, 30)
20
+ end = datetime.datetime(2024, 8, 1, 10, 0)
21
+
22
+ if data_source_class == ThetaDataBacktesting:
23
+ data_source = ThetaDataBacktesting(
24
+ datetime_start=start,
25
+ datetime_end=end,
26
+ username=os.environ.get("THETADATA_USERNAME"),
27
+ password=os.environ.get("THETADATA_PASSWORD"),
28
+ )
29
+ else:
30
+ data_source = PolygonDataBacktesting(
31
+ datetime_start=start,
32
+ datetime_end=end,
33
+ api_key=POLYGON_API_KEY,
34
+ )
35
+
36
+ # Set datetime to 9:30 AM
37
+ data_source.datetime = datetime.datetime(2024, 8, 1, 9, 30, tzinfo=datetime.timezone(datetime.timedelta(hours=-4)))
38
+
39
+ asset = Asset("AMZN", asset_type="stock")
40
+
41
+ # Get historical bars
42
+ bars = data_source.get_historical_prices(asset, 5, "minute")
43
+
44
+ if bars:
45
+ df = bars.df
46
+ print(f"\nFirst 5 minute bars:")
47
+ print(df.head(10))
48
+ print(f"\nColumns: {df.columns.tolist()}")
49
+ print(f"\nFirst bar details:")
50
+ first_bar = df.iloc[0]
51
+ for col in df.columns:
52
+ print(f" {col}: {first_bar[col]}")
53
+ else:
54
+ print("No bars returned")
55
+
56
+ # Get last price
57
+ price = data_source.get_last_price(asset)
58
+ print(f"\nget_last_price(): ${price}")
59
+
60
+ # Get the Data object and check what bar it's using
61
+ tuple_to_find = data_source.find_asset_in_data_store(asset, None)
62
+ if tuple_to_find in data_source._data_store:
63
+ data = data_source._data_store[tuple_to_find]
64
+ dt = data_source.get_datetime()
65
+ iter_count = data.get_iter_count(dt)
66
+ print(f"\niter_count: {iter_count}")
67
+ print(f"Bar datetime: {data.datalines['datetime'].dataline[iter_count]}")
68
+ print(f"Bar open: {data.datalines['open'].dataline[iter_count]}")
69
+ print(f"Bar close: {data.datalines['close'].dataline[iter_count]}")
70
+ print(f"Current dt: {dt}")
71
+ print(f"dt > bar_datetime: {dt > data.datalines['datetime'].dataline[iter_count]}")
72
+
73
+ # Get quote
74
+ quote = data_source.get_quote(asset)
75
+ print(f"\nget_quote():")
76
+ print(f" price: {quote.price}")
77
+ print(f" bid: {quote.bid}")
78
+ print(f" ask: {quote.ask}")
79
+ print(f" volume: {quote.volume}")
80
+ print(f" timestamp: {quote.timestamp}")
81
+ if hasattr(quote, 'raw_data') and quote.raw_data:
82
+ print(f" raw_data: {quote.raw_data}")
83
+
84
+
85
+ if __name__ == "__main__":
86
+ examine_data_source(ThetaDataBacktesting, "THETADATA")
87
+ examine_data_source(PolygonDataBacktesting, "POLYGON")
88
+
89
+ print(f"\n{'='*80}")
90
+ print("ANALYSIS")
91
+ print(f"{'='*80}")
92
+ print("If the 'open' prices differ in the first bar, that's the root cause.")
93
+ print("We need to investigate WHY the open prices differ:")
94
+ print(" 1. Are they pulling from different exchanges?")
95
+ print(" 2. Are they using different data types (trade vs NBBO)?")
96
+ print(" 3. Is there a timestamp alignment issue?")
97
+ print(" 4. Is one source incorrect?")
@@ -0,0 +1,203 @@
1
+ """
2
+ Direct API comparison between ThetaData and Polygon.
3
+ Bypasses Lumibot classes to isolate any framework issues.
4
+ """
5
+
6
+ import os
7
+ import datetime
8
+ import pandas as pd
9
+ import requests
10
+ from polygon import RESTClient
11
+ from lumibot.credentials import POLYGON_API_KEY
12
+
13
+
14
+ def get_thetadata_bars(symbol, date_str):
15
+ """Get minute bars from ThetaData API directly."""
16
+ url = "http://127.0.0.1:25510/hist/stock/ohlc"
17
+ params = {
18
+ "root": symbol,
19
+ "start_date": date_str,
20
+ "end_date": date_str,
21
+ "ivl": 60000, # 1 minute
22
+ "rth": "true"
23
+ }
24
+
25
+ response = requests.get(url, params=params)
26
+ data = response.json()
27
+
28
+ if data and "response" in data:
29
+ # Convert to DataFrame
30
+ df = pd.DataFrame(data["response"], columns=data["header"]["format"])
31
+
32
+ # Convert to datetime - ThetaData returns ms_of_day in Eastern Time!
33
+ df["datetime"] = pd.to_datetime(df["date"].astype(str), format="%Y%m%d") + pd.to_timedelta(df["ms_of_day"], unit="ms")
34
+ df = df.set_index("datetime")
35
+ # Localize to Eastern Time (not UTC!)
36
+ df.index = df.index.tz_localize("America/New_York")
37
+ df = df[["open", "high", "low", "close", "volume"]]
38
+
39
+ return df
40
+
41
+ return None
42
+
43
+
44
+ def get_polygon_bars(symbol, date):
45
+ """Get minute bars from Polygon API directly."""
46
+ client = RESTClient(POLYGON_API_KEY)
47
+
48
+ aggs = client.get_aggs(
49
+ ticker=symbol,
50
+ multiplier=1,
51
+ timespan="minute",
52
+ from_=date,
53
+ to=date,
54
+ limit=50000
55
+ )
56
+
57
+ bars = []
58
+ for agg in aggs:
59
+ dt = datetime.datetime.fromtimestamp(agg.timestamp/1000, tz=datetime.timezone.utc)
60
+ bars.append({
61
+ "datetime": dt,
62
+ "open": agg.open,
63
+ "high": agg.high,
64
+ "low": agg.low,
65
+ "close": agg.close,
66
+ "volume": agg.volume,
67
+ })
68
+
69
+ if bars:
70
+ df = pd.DataFrame(bars)
71
+ df = df.set_index("datetime")
72
+ return df
73
+
74
+ return None
75
+
76
+
77
+ def compare_bar(theta_bar, polygon_bar, symbol, time_str):
78
+ """Compare a single bar from both sources."""
79
+ open_diff = theta_bar["open"] - polygon_bar["open"]
80
+ high_diff = theta_bar["high"] - polygon_bar["high"]
81
+ low_diff = theta_bar["low"] - polygon_bar["low"]
82
+ close_diff = theta_bar["close"] - polygon_bar["close"]
83
+ volume_diff = theta_bar["volume"] - polygon_bar["volume"]
84
+ volume_pct = (volume_diff / polygon_bar["volume"] * 100) if polygon_bar["volume"] > 0 else 0
85
+
86
+ print(f"\n {time_str}:")
87
+ print(f" ThetaData : O=${theta_bar['open']:.3f} H=${theta_bar['high']:.3f} L=${theta_bar['low']:.3f} C=${theta_bar['close']:.3f} V={theta_bar['volume']:,.0f}")
88
+ print(f" Polygon : O=${polygon_bar['open']:.3f} H=${polygon_bar['high']:.3f} L=${polygon_bar['low']:.3f} C=${polygon_bar['close']:.3f} V={polygon_bar['volume']:,.0f}")
89
+ print(f" Difference: O=${open_diff:+.3f} H=${high_diff:+.3f} L=${low_diff:+.3f} C=${close_diff:+.3f} V={volume_diff:+,.0f} ({volume_pct:+.1f}%)")
90
+
91
+ return {
92
+ "symbol": symbol,
93
+ "time": time_str,
94
+ "theta_open": theta_bar["open"],
95
+ "polygon_open": polygon_bar["open"],
96
+ "open_diff": open_diff,
97
+ "theta_close": theta_bar["close"],
98
+ "polygon_close": polygon_bar["close"],
99
+ "close_diff": close_diff,
100
+ "theta_volume": theta_bar["volume"],
101
+ "polygon_volume": polygon_bar["volume"],
102
+ "volume_diff": volume_diff,
103
+ "volume_pct_diff": volume_pct,
104
+ }
105
+
106
+
107
+ def main():
108
+ date = datetime.date(2024, 8, 1)
109
+ date_str = "20240801"
110
+ symbols = ["AMZN", "AAPL", "SPY", "TSLA", "PLTR"]
111
+
112
+ # Times to check (in UTC, not ET!)
113
+ times_to_check = [
114
+ ("09:30", "Market Open"),
115
+ ("10:00", "Early Morning"),
116
+ ("12:00", "Midday"),
117
+ ("14:00", "Afternoon"),
118
+ ("15:30", "Near Close"),
119
+ ]
120
+
121
+ results = []
122
+
123
+ print(f"\n{'='*120}")
124
+ print(f"Direct API Comparison: ThetaData vs Polygon")
125
+ print(f"Date: {date}")
126
+ print(f"{'='*120}")
127
+
128
+ for symbol in symbols:
129
+ print(f"\n{symbol}:")
130
+ print("-" * 120)
131
+
132
+ # Get all bars for the day from both sources
133
+ theta_df = get_thetadata_bars(symbol, date_str)
134
+ polygon_df = get_polygon_bars(symbol, date)
135
+
136
+ if theta_df is None:
137
+ print(f" ❌ No ThetaData bars")
138
+ continue
139
+
140
+ if polygon_df is None:
141
+ print(f" ❌ No Polygon bars")
142
+ continue
143
+
144
+ # ThetaData is already in ET, Polygon needs conversion from UTC to ET
145
+ polygon_df.index = polygon_df.index.tz_convert("America/New_York")
146
+
147
+ # Compare specific times
148
+ for time_str, label in times_to_check:
149
+ hour, minute = map(int, time_str.split(":"))
150
+
151
+ # Find matching bars
152
+ theta_matches = theta_df[(theta_df.index.hour == hour) & (theta_df.index.minute == minute)]
153
+ polygon_matches = polygon_df[(polygon_df.index.hour == hour) & (polygon_df.index.minute == minute)]
154
+
155
+ if not theta_matches.empty and not polygon_matches.empty:
156
+ theta_bar = theta_matches.iloc[0]
157
+ polygon_bar = polygon_matches.iloc[0]
158
+
159
+ result = compare_bar(theta_bar, polygon_bar, symbol, f"{label} ({time_str})")
160
+ results.append(result)
161
+ else:
162
+ print(f"\n {label} ({time_str}): ❌ Missing bars (Theta: {not theta_matches.empty}, Polygon: {not polygon_matches.empty})")
163
+
164
+ # Summary statistics
165
+ if results:
166
+ df = pd.DataFrame(results)
167
+
168
+ print(f"\n{'='*120}")
169
+ print(f"SUMMARY STATISTICS ({len(results)} comparisons)")
170
+ print(f"{'='*120}")
171
+
172
+ print(f"\nPrice Differences (Open):")
173
+ print(f" Mean: ${df['open_diff'].mean():.4f}")
174
+ print(f" Std: ${df['open_diff'].std():.4f}")
175
+ print(f" Min: ${df['open_diff'].min():.4f}")
176
+ print(f" Max: ${df['open_diff'].max():.4f}")
177
+ print(f" Abs Mean: ${df['open_diff'].abs().mean():.4f}")
178
+
179
+ print(f"\nPrice Differences (Close):")
180
+ print(f" Mean: ${df['close_diff'].mean():.4f}")
181
+ print(f" Std: ${df['close_diff'].std():.4f}")
182
+ print(f" Min: ${df['close_diff'].min():.4f}")
183
+ print(f" Max: ${df['close_diff'].max():.4f}")
184
+ print(f" Abs Mean: ${df['close_diff'].abs().mean():.4f}")
185
+
186
+ print(f"\nVolume Differences:")
187
+ print(f" Mean: {df['volume_diff'].mean():,.0f} ({df['volume_pct_diff'].mean():+.2f}%)")
188
+ print(f" Std: {df['volume_diff'].std():,.0f}")
189
+ print(f" Min: {df['volume_diff'].min():,.0f} ({df['volume_pct_diff'].min():+.2f}%)")
190
+ print(f" Max: {df['volume_diff'].max():,.0f} ({df['volume_pct_diff'].max():+.2f}%)")
191
+
192
+ # Check patterns
193
+ higher_volume_count = (df['volume_diff'] > 0).sum()
194
+ total_count = len(df)
195
+ print(f"\nThetaData has HIGHER volume in {higher_volume_count}/{total_count} cases ({higher_volume_count/total_count*100:.1f}%)")
196
+
197
+ # Save results
198
+ df.to_csv("direct_api_comparison.csv", index=False)
199
+ print(f"\n✓ Results saved to direct_api_comparison.csv")
200
+
201
+
202
+ if __name__ == "__main__":
203
+ main()