lumibot 4.0.22__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lumibot might be problematic. Click here for more details.

Files changed (164) hide show
  1. lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
  2. lumibot/__pycache__/constants.cpython-312.pyc +0 -0
  3. lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
  4. lumibot/backtesting/__init__.py +6 -5
  5. lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
  6. lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
  7. lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
  8. lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
  9. lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
  10. lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
  11. lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
  12. lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
  13. lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
  14. lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
  15. lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
  16. lumibot/backtesting/backtesting_broker.py +209 -9
  17. lumibot/backtesting/databento_backtesting.py +141 -24
  18. lumibot/backtesting/thetadata_backtesting.py +63 -42
  19. lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
  20. lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
  21. lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
  22. lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
  23. lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
  24. lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
  25. lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
  26. lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
  27. lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
  28. lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
  29. lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
  30. lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
  31. lumibot/brokers/alpaca.py +11 -1
  32. lumibot/brokers/tradeovate.py +475 -0
  33. lumibot/components/grok_news_helper.py +284 -0
  34. lumibot/components/options_helper.py +90 -34
  35. lumibot/credentials.py +3 -0
  36. lumibot/data_sources/__init__.py +2 -1
  37. lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
  38. lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
  39. lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
  40. lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
  41. lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
  42. lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
  43. lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
  44. lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
  45. lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
  46. lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
  47. lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
  48. lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
  49. lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
  50. lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
  51. lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
  52. lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
  53. lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
  54. lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
  55. lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
  56. lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
  57. lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
  58. lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
  59. lumibot/data_sources/data_source_backtesting.py +3 -5
  60. lumibot/data_sources/databento_data.py +5 -5
  61. lumibot/data_sources/databento_data_polars_backtesting.py +636 -0
  62. lumibot/data_sources/databento_data_polars_live.py +793 -0
  63. lumibot/data_sources/pandas_data.py +6 -3
  64. lumibot/data_sources/polars_mixin.py +126 -21
  65. lumibot/data_sources/tradeovate_data.py +80 -0
  66. lumibot/data_sources/tradier_data.py +2 -1
  67. lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
  68. lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
  69. lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
  70. lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
  71. lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
  72. lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
  73. lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
  74. lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
  75. lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
  76. lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
  77. lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
  78. lumibot/entities/asset.py +8 -0
  79. lumibot/entities/order.py +1 -1
  80. lumibot/entities/quote.py +14 -0
  81. lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  82. lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
  83. lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  84. lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
  85. lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
  86. lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
  87. lumibot/strategies/_strategy.py +95 -27
  88. lumibot/strategies/strategy.py +5 -6
  89. lumibot/strategies/strategy_executor.py +2 -2
  90. lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  91. lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
  92. lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
  93. lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
  94. lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
  95. lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
  96. lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
  97. lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
  98. lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
  99. lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
  100. lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
  101. lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
  102. lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
  103. lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
  104. lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
  105. lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
  106. lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
  107. lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
  108. lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
  109. lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
  110. lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
  111. lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
  112. lumibot/tools/databento_helper.py +384 -133
  113. lumibot/tools/databento_helper_polars.py +218 -156
  114. lumibot/tools/databento_roll.py +216 -0
  115. lumibot/tools/lumibot_logger.py +32 -17
  116. lumibot/tools/polygon_helper.py +65 -0
  117. lumibot/tools/thetadata_helper.py +588 -70
  118. lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
  119. lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
  120. lumibot/traders/trader.py +1 -1
  121. lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
  122. lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
  123. lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
  124. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/METADATA +1 -2
  125. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/RECORD +164 -46
  126. tests/backtest/check_timing_offset.py +198 -0
  127. tests/backtest/check_volume_spike.py +112 -0
  128. tests/backtest/comprehensive_comparison.py +166 -0
  129. tests/backtest/debug_comparison.py +91 -0
  130. tests/backtest/diagnose_price_difference.py +97 -0
  131. tests/backtest/direct_api_comparison.py +203 -0
  132. tests/backtest/profile_thetadata_vs_polygon.py +255 -0
  133. tests/backtest/root_cause_analysis.py +109 -0
  134. tests/backtest/test_accuracy_verification.py +244 -0
  135. tests/backtest/test_daily_data_timestamp_comparison.py +801 -0
  136. tests/backtest/test_databento.py +57 -0
  137. tests/backtest/test_databento_comprehensive_trading.py +564 -0
  138. tests/backtest/test_debug_avg_fill_price.py +112 -0
  139. tests/backtest/test_dividends.py +8 -3
  140. tests/backtest/test_example_strategies.py +54 -47
  141. tests/backtest/test_futures_edge_cases.py +451 -0
  142. tests/backtest/test_futures_single_trade.py +270 -0
  143. tests/backtest/test_futures_ultra_simple.py +191 -0
  144. tests/backtest/test_index_data_verification.py +348 -0
  145. tests/backtest/test_polygon.py +45 -24
  146. tests/backtest/test_thetadata.py +246 -60
  147. tests/backtest/test_thetadata_comprehensive.py +729 -0
  148. tests/backtest/test_thetadata_vs_polygon.py +557 -0
  149. tests/backtest/test_yahoo.py +1 -2
  150. tests/conftest.py +20 -0
  151. tests/test_backtesting_data_source_env.py +249 -0
  152. tests/test_backtesting_quiet_logs_complete.py +10 -11
  153. tests/test_databento_helper.py +73 -86
  154. tests/test_databento_live.py +10 -10
  155. tests/test_databento_timezone_fixes.py +21 -4
  156. tests/test_get_historical_prices.py +6 -6
  157. tests/test_options_helper.py +162 -40
  158. tests/test_polygon_helper.py +21 -13
  159. tests/test_quiet_logs_requirements.py +5 -5
  160. tests/test_thetadata_helper.py +487 -171
  161. tests/test_yahoo_data.py +125 -0
  162. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/LICENSE +0 -0
  163. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/WHEEL +0 -0
  164. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/top_level.txt +0 -0
@@ -4,7 +4,7 @@ import re
4
4
  from datetime import date, datetime, timedelta, timezone
5
5
  from decimal import Decimal
6
6
  from pathlib import Path
7
- from typing import Dict, List, Optional, Union
7
+ from typing import Dict, List, Optional, Tuple, Union
8
8
 
9
9
  import pytz
10
10
 
@@ -13,6 +13,7 @@ from polars.datatypes import Datetime as PlDatetime
13
13
 
14
14
  from lumibot.constants import LUMIBOT_CACHE_FOLDER, LUMIBOT_DEFAULT_PYTZ
15
15
  from lumibot.entities import Asset
16
+ from lumibot.tools import databento_helper, databento_roll
16
17
 
17
18
  # Set up module-specific logger
18
19
  from lumibot.tools.lumibot_logger import get_logger
@@ -31,7 +32,7 @@ except ImportError:
31
32
  logger.warning("DataBento package not available. Please install with: pip install databento")
32
33
 
33
34
  # Cache settings
34
- CACHE_SUBFOLDER = "databento_polars"
35
+ CACHE_SUBFOLDER = "databento_polars_v2"
35
36
  LUMIBOT_DATABENTO_CACHE_FOLDER = os.path.join(LUMIBOT_CACHE_FOLDER, CACHE_SUBFOLDER)
36
37
  RECENT_FILE_TOLERANCE_DAYS = 14
37
38
  MAX_DATABENTO_DAYS = 365 # DataBento can handle larger date ranges than some providers
@@ -43,15 +44,8 @@ if not os.path.exists(LUMIBOT_DATABENTO_CACHE_FOLDER):
43
44
  except Exception as e:
44
45
  logger.warning(f"Could not create DataBento cache folder: {e}")
45
46
 
46
- # ============================================================================
47
- # PERFORMANCE CACHES - Critical for backtesting performance
48
- # ============================================================================
49
- # These caches dramatically reduce overhead for high-frequency function calls
50
- # Symbol resolution cache: saves ~2.5s on 362k calls (10-20x speedup)
51
- _SYMBOL_RESOLUTION_CACHE = {} # {(asset_symbol, asset_type, dt_str): resolved_symbol}
52
-
53
- # Datetime normalization cache: saves ~1.2s on 362k calls (5-10x speedup)
54
- _DATETIME_NORMALIZATION_CACHE = {} # {dt_timestamp: normalized_dt}
47
+ # Instrument definition cache: stores multipliers and contract specs
48
+ _INSTRUMENT_DEFINITION_CACHE = {} # {(symbol, dataset): definition_dict}
55
49
 
56
50
 
57
51
  class DataBentoClientPolars:
@@ -412,7 +406,15 @@ class DataBentoClientPolars:
412
406
  pandas_df = pandas_df.rename(columns={index_name: 'datetime'})
413
407
  # Convert to polars
414
408
  df = pl.from_pandas(pandas_df)
415
- logger.debug(f"[DataBentoClientPolars] Converted to polars, columns: {df.columns}")
409
+ logger.info(f"[DataBentoClientPolars] Converted to polars, shape: {df.shape}, columns: {df.columns}")
410
+
411
+ # DEBUG: Check for duplicates immediately after conversion
412
+ if 'datetime' in df.columns:
413
+ dup_count = df.filter(df['datetime'].is_duplicated()).height
414
+ if dup_count > 0:
415
+ logger.warning(f"[DataBentoClientPolars] ⚠️ FOUND {dup_count} DUPLICATE TIMESTAMPS AFTER CONVERSION!")
416
+ else:
417
+ logger.info(f"[DataBentoClientPolars] ✓ No duplicates after conversion")
416
418
  # Ensure datetime column is datetime type
417
419
  if 'datetime' in df.columns:
418
420
  df = df.with_columns(pl.col('datetime').cast(pl.Datetime))
@@ -640,124 +642,28 @@ def _build_cache_filename(
640
642
  return path
641
643
 
642
644
 
643
- def _normalize_reference_datetime(dt: datetime) -> datetime:
644
- """
645
- Normalize datetime to the default Lumibot timezone and drop tzinfo.
646
-
647
- PERFORMANCE OPTIMIZATION: This function is called 362k+ times during backtesting.
648
- Caching provides 5-10x speedup, saving ~1.2s per backtest.
649
- """
650
- if dt is None:
651
- return dt
652
-
653
- # Cache key: use timestamp for faster lookup than full datetime
654
- cache_key = dt.timestamp() if hasattr(dt, 'timestamp') else None
655
-
656
- if cache_key is not None and cache_key in _DATETIME_NORMALIZATION_CACHE:
657
- return _DATETIME_NORMALIZATION_CACHE[cache_key]
658
-
659
- # Perform normalization
660
- if dt.tzinfo is not None:
661
- normalized = dt.astimezone(LUMIBOT_DEFAULT_PYTZ).replace(tzinfo=None)
662
- else:
663
- normalized = dt
664
-
665
- # Cache the result
666
- if cache_key is not None:
667
- _DATETIME_NORMALIZATION_CACHE[cache_key] = normalized
668
-
669
- return normalized
670
-
671
-
672
- def _resolve_databento_symbol_for_datetime(asset: Asset, dt: datetime) -> str:
673
- """
674
- Resolve the expected DataBento symbol for a datetime using the strategy roll rules.
675
-
676
- PERFORMANCE OPTIMIZATION: This function is called 362k+ times during backtesting.
677
- Caching provides 10-20x speedup, saving ~2.5s per backtest.
678
- """
679
- # Create cache key from asset and datetime
680
- # Use normalized datetime string for consistent caching
681
- dt_timestamp = dt.timestamp() if hasattr(dt, 'timestamp') else str(dt)
682
- cache_key = (asset.symbol, asset.asset_type, dt_timestamp)
683
-
684
- if cache_key in _SYMBOL_RESOLUTION_CACHE:
685
- return _SYMBOL_RESOLUTION_CACHE[cache_key]
686
-
687
- # Perform symbol resolution
688
- reference_dt = _normalize_reference_datetime(dt)
689
- variants = asset.resolve_continuous_futures_contract_variants(reference_date=reference_dt)
690
- contract = variants[2]
691
- resolved_symbol = _generate_databento_symbol_alternatives(asset.symbol, contract)[0]
692
-
693
- # Cache the result
694
- _SYMBOL_RESOLUTION_CACHE[cache_key] = resolved_symbol
695
-
696
- return resolved_symbol
697
-
698
-
699
- def _resolve_databento_symbols_for_range(
700
- asset: Asset,
701
- start: datetime,
702
- end: datetime,
703
- ) -> List[str]:
704
- """Resolve all DataBento symbols necessary to cover a time range for continuous futures."""
705
- if asset.asset_type != Asset.AssetType.CONT_FUTURE:
706
- return [_format_futures_symbol_for_databento(asset)]
707
-
708
- start_ref = _normalize_reference_datetime(start)
709
- end_ref = _normalize_reference_datetime(end)
710
- if start_ref is None or end_ref is None:
711
- return [_format_futures_symbol_for_databento(asset)]
712
-
713
- symbols: List[str] = []
714
- seen = set()
715
- cursor = start_ref
716
- # Step roughly every 45 days to guarantee we cross quarter roll boundaries
717
- step = timedelta(days=45)
718
- while cursor <= end_ref + timedelta(days=45):
719
- symbol = _resolve_databento_symbol_for_datetime(asset, cursor)
720
- if symbol not in seen:
721
- seen.add(symbol)
722
- symbols.append(symbol)
723
- cursor += step
724
-
725
- # Ensure the end of the range is covered
726
- end_symbol = _resolve_databento_symbol_for_datetime(asset, end_ref)
727
- if end_symbol not in seen:
728
- symbols.append(end_symbol)
729
-
730
- return symbols
731
-
732
-
733
- def _filter_front_month_rows(asset: Asset, df: pl.DataFrame) -> pl.DataFrame:
734
- """
735
- Keep only rows matching the expected continuous contract for each timestamp.
736
-
737
- PERFORMANCE OPTIMIZATION: Uses cached symbol resolution to avoid
738
- repeated computation for the same datetime values.
739
- """
645
+ def _filter_front_month_rows(df: pl.DataFrame, schedule: List[Tuple[str, datetime, datetime]]) -> pl.DataFrame:
646
+ """Filter a polars DataFrame so that each timestamp uses the scheduled contract."""
740
647
  if df.is_empty() or "symbol" not in df.columns or "datetime" not in df.columns:
741
648
  return df
742
649
 
743
- def expected_symbol(dt: datetime) -> str:
744
- # This now uses the cached _resolve_databento_symbol_for_datetime
745
- return _resolve_databento_symbol_for_datetime(asset, dt)
650
+ if not schedule:
651
+ return df
746
652
 
747
- try:
748
- df_with_expectation = df.with_columns(
749
- pl.col("datetime")
750
- .map_elements(expected_symbol, return_dtype=pl.Utf8)
751
- .alias("_expected_symbol")
752
- )
753
- filtered = df_with_expectation.filter(pl.col("symbol") == pl.col("_expected_symbol")).drop("_expected_symbol")
754
- if not filtered.is_empty():
755
- return filtered
756
- except Exception as filtering_err:
757
- logger.debug(f"Continuous futures filtering fallback due to: {filtering_err}")
653
+ mask = None
654
+ for symbol, start_dt, end_dt in schedule:
655
+ condition = pl.col("symbol") == symbol
656
+ if start_dt is not None:
657
+ condition = condition & (pl.col("datetime") >= pl.lit(start_dt))
658
+ if end_dt is not None:
659
+ condition = condition & (pl.col("datetime") < pl.lit(end_dt))
660
+ mask = condition if mask is None else mask | condition
758
661
 
759
- # Fallback to original data if filtering fails or removes all rows
760
- return df
662
+ if mask is None:
663
+ return df
664
+
665
+ filtered = df.filter(mask)
666
+ return filtered if not filtered.is_empty() else df
761
667
 
762
668
 
763
669
  def _load_cache(cache_file: Path) -> Optional[pl.LazyFrame]:
@@ -798,17 +704,19 @@ def _save_cache(df: pl.DataFrame, cache_file: Path) -> None:
798
704
  def _normalize_databento_dataframe(df: pl.DataFrame) -> pl.DataFrame:
799
705
  """
800
706
  Normalize DataBento DataFrame to Lumibot standard format using polars
801
-
707
+
802
708
  Parameters
803
709
  ----------
804
710
  df : pl.DataFrame
805
711
  Raw DataBento DataFrame
806
-
712
+
807
713
  Returns
808
714
  -------
809
715
  pl.DataFrame
810
716
  Normalized DataFrame with standard OHLCV columns
811
717
  """
718
+ logger.info(f"[_normalize_databento_dataframe] INPUT: shape={df.shape}, has duplicates={'datetime' in df.columns and df.filter(df['datetime'].is_duplicated()).height > 0}")
719
+
812
720
  if df.is_empty():
813
721
  return df
814
722
 
@@ -866,9 +774,111 @@ def _normalize_databento_dataframe(df: pl.DataFrame) -> pl.DataFrame:
866
774
  df_norm = _ensure_polars_datetime_timezone(df_norm)
867
775
  df_norm = df_norm.sort('datetime')
868
776
 
777
+ logger.info(f"[_normalize_databento_dataframe] OUTPUT: shape={df_norm.shape}, has duplicates={'datetime' in df_norm.columns and df_norm.filter(df_norm['datetime'].is_duplicated()).height > 0}")
778
+
869
779
  return df_norm
870
780
 
871
781
 
782
+ def _fetch_and_update_futures_multiplier(
783
+ api_key: str,
784
+ asset: Asset,
785
+ resolved_symbol: str,
786
+ dataset: str = "GLBX.MDP3",
787
+ reference_date: Optional[datetime] = None
788
+ ) -> None:
789
+ """
790
+ Fetch futures contract multiplier from DataBento and update the asset in-place.
791
+ Uses caching to avoid repeated API calls.
792
+
793
+ Parameters
794
+ ----------
795
+ api_key : str
796
+ DataBento API key
797
+ asset : Asset
798
+ Futures asset to fetch multiplier for (will be updated in-place)
799
+ resolved_symbol : str
800
+ The resolved contract symbol (e.g., "MESH4" for MES continuous)
801
+ dataset : str
802
+ DataBento dataset (default: GLBX.MDP3 for CME futures)
803
+ reference_date : datetime, optional
804
+ Reference date for fetching definition. If None, uses yesterday.
805
+ """
806
+ # Only fetch for futures contracts
807
+ if asset.asset_type not in (Asset.AssetType.FUTURE, Asset.AssetType.CONT_FUTURE):
808
+ logger.info(f"[POLARS-MULTIPLIER] Skipping {asset.symbol} - not a futures contract (type={asset.asset_type})")
809
+ return
810
+
811
+ logger.info(f"[POLARS-MULTIPLIER] Starting fetch for {asset.symbol}, current multiplier={asset.multiplier}")
812
+
813
+ # Skip if multiplier already set (and not default value of 1)
814
+ if asset.multiplier != 1:
815
+ logger.info(f"[POLARS-MULTIPLIER] Asset {asset.symbol} already has multiplier={asset.multiplier}, skipping fetch")
816
+ return
817
+
818
+ # Use the resolved symbol for cache key
819
+ cache_key = (resolved_symbol, dataset)
820
+ logger.info(f"[POLARS-MULTIPLIER] Cache key: {cache_key}, cache has {len(_INSTRUMENT_DEFINITION_CACHE)} entries")
821
+ if cache_key in _INSTRUMENT_DEFINITION_CACHE:
822
+ cached_def = _INSTRUMENT_DEFINITION_CACHE[cache_key]
823
+ if 'unit_of_measure_qty' in cached_def:
824
+ asset.multiplier = int(cached_def['unit_of_measure_qty'])
825
+ logger.info(f"[POLARS-MULTIPLIER] ✓ Using cached multiplier for {resolved_symbol}: {asset.multiplier}")
826
+ return
827
+ else:
828
+ logger.warning(f"[POLARS-MULTIPLIER] Cache entry exists but missing unit_of_measure_qty field")
829
+
830
+ try:
831
+ # Use yesterday if no reference date provided
832
+ if reference_date is None:
833
+ reference_date = datetime.now() - timedelta(days=1)
834
+
835
+ # Convert to datetime if needed
836
+ if not isinstance(reference_date, datetime):
837
+ if isinstance(reference_date, str):
838
+ reference_date = datetime.strptime(reference_date, "%Y-%m-%d")
839
+
840
+ # DataBento requires start < end, so add 1 day to end
841
+ start_date = reference_date.strftime("%Y-%m-%d")
842
+ end_date = (reference_date + timedelta(days=1)).strftime("%Y-%m-%d")
843
+
844
+ logger.info(f"Fetching instrument definition for {resolved_symbol} from DataBento")
845
+
846
+ # Create client
847
+ client = DataBentoClientPolars(api_key)
848
+
849
+ # Fetch definition data using the RESOLVED symbol
850
+ df = client.get_historical_data(
851
+ dataset=dataset,
852
+ symbols=[resolved_symbol],
853
+ schema="definition",
854
+ start=start_date,
855
+ end=end_date,
856
+ )
857
+
858
+ if df is None or df.is_empty():
859
+ logger.warning(f"No instrument definition found for {resolved_symbol}")
860
+ return
861
+
862
+ # Convert first row to dict
863
+ definition = df.to_dicts()[0]
864
+
865
+ # Cache the definition
866
+ _INSTRUMENT_DEFINITION_CACHE[cache_key] = definition
867
+
868
+ # Update asset multiplier
869
+ if 'unit_of_measure_qty' in definition:
870
+ multiplier = int(definition['unit_of_measure_qty'])
871
+ logger.info(f"[POLARS-MULTIPLIER] BEFORE update: asset.multiplier = {asset.multiplier}")
872
+ asset.multiplier = multiplier
873
+ logger.info(f"[POLARS-MULTIPLIER] ✓✓✓ SUCCESS! Set multiplier for {asset.symbol} (resolved to {resolved_symbol}): {multiplier}")
874
+ logger.info(f"[POLARS-MULTIPLIER] AFTER update: asset.multiplier = {asset.multiplier}")
875
+ else:
876
+ logger.error(f"[POLARS-MULTIPLIER] ✗ Definition missing unit_of_measure_qty field! Fields: {list(definition.keys())}")
877
+
878
+ except Exception as e:
879
+ logger.warning(f"Could not fetch multiplier for {resolved_symbol}: {str(e)}")
880
+
881
+
872
882
  def get_price_data_from_databento_polars(
873
883
  api_key: str,
874
884
  asset: Asset,
@@ -918,16 +928,33 @@ def get_price_data_from_databento_polars(
918
928
  # Ensure start and end are timezone-naive for DataBento API
919
929
  start_naive = start.replace(tzinfo=None) if start.tzinfo is not None else start
920
930
  end_naive = end.replace(tzinfo=None) if end.tzinfo is not None else end
921
- requested_end_naive = end_naive
922
-
923
- # Resolve which symbols we need to cover the requested window
924
- symbols_to_fetch = _resolve_databento_symbols_for_range(asset, start_naive, end_naive)
925
- logger.debug(
926
- "[get_price_data_from_databento_polars] Resolved symbols for %s between %s and %s: %s",
927
- asset.symbol,
928
- start_naive,
929
- end_naive,
930
- symbols_to_fetch,
931
+
932
+ if asset.asset_type == Asset.AssetType.CONT_FUTURE:
933
+ schedule_start = start
934
+ symbols_to_fetch = databento_roll.resolve_symbols_for_range(asset, schedule_start, end)
935
+ front_symbol = databento_roll.resolve_symbol_for_datetime(asset, reference_date or start)
936
+ if front_symbol not in symbols_to_fetch:
937
+ symbols_to_fetch.insert(0, front_symbol)
938
+ logger.info(
939
+ f"Resolved continuous future {asset.symbol} for range "
940
+ f"{schedule_start.strftime('%Y-%m-%d')} → {end.strftime('%Y-%m-%d')} -> {symbols_to_fetch}"
941
+ )
942
+ else:
943
+ schedule_start = start
944
+ front_symbol = _format_futures_symbol_for_databento(asset)
945
+ symbols_to_fetch = [front_symbol]
946
+
947
+ # Fetch and cache futures multiplier from DataBento if needed (after symbol resolution)
948
+ _fetch_and_update_futures_multiplier(
949
+ api_key=api_key,
950
+ asset=asset,
951
+ resolved_symbol=symbols_to_fetch[0],
952
+ dataset=dataset,
953
+ reference_date=reference_date or start
954
+ )
955
+
956
+ logger.info(
957
+ f"[get_price_data_from_databento_polars] Fetching {len(symbols_to_fetch)} symbol(s) for {asset.symbol}: {symbols_to_fetch}"
931
958
  )
932
959
 
933
960
  # Inspect cache for each symbol
@@ -944,6 +971,9 @@ def get_price_data_from_databento_polars(
944
971
  continue
945
972
  # Keep as lazy frame for now, collect later in batch
946
973
  cached_lazy_frames.append((symbol_code, cached_lazy))
974
+ else:
975
+ # If forcing cache update, mark all symbols as missing
976
+ symbols_missing = list(symbols_to_fetch)
947
977
 
948
978
  # Collect all lazy frames at once for better performance
949
979
  cached_frames: List[pl.DataFrame] = []
@@ -959,9 +989,9 @@ def get_price_data_from_databento_polars(
959
989
  )
960
990
  cached_frames.append(_ensure_polars_datetime_timezone(cached_df))
961
991
 
962
- else:
963
- symbols_missing = list(symbols_to_fetch)
964
-
992
+ logger.info(
993
+ f"[get_price_data_from_databento_polars] Cache check done: cached_frames={len(cached_frames)}, symbols_missing={symbols_missing}"
994
+ )
965
995
  frames: List[pl.DataFrame] = list(cached_frames)
966
996
 
967
997
  # Fetch missing symbols from DataBento
@@ -1004,7 +1034,9 @@ def get_price_data_from_databento_polars(
1004
1034
  continue
1005
1035
 
1006
1036
  df_normalized = _normalize_databento_dataframe(df)
1037
+ logger.info(f"[get_price_data_from_databento_polars] BEFORE append: frames has {len(frames)} items, normalized shape={df_normalized.shape}")
1007
1038
  frames.append(df_normalized)
1039
+ logger.info(f"[get_price_data_from_databento_polars] AFTER append: frames has {len(frames)} items")
1008
1040
 
1009
1041
  cache_path = _build_cache_filename(asset, start, end, timestep, symbol_override=symbol_code)
1010
1042
  _save_cache(df_normalized, cache_path)
@@ -1020,25 +1052,55 @@ def get_price_data_from_databento_polars(
1020
1052
  logger.error(f"DataBento symbol resolution failed for {asset.symbol}")
1021
1053
  return None
1022
1054
 
1055
+ logger.info(
1056
+ f"[get_price_data_from_databento_polars] BEFORE concat: {len(frames)} frames with shapes: {[f.shape for f in frames]}"
1057
+ )
1023
1058
  combined = pl.concat(frames, how="vertical", rechunk=True)
1024
1059
  combined = combined.sort("datetime")
1025
- filter_end = end_naive if end_naive > requested_end_naive else requested_end_naive
1026
-
1027
- datetime_dtype = combined.schema.get("datetime")
1028
- if isinstance(datetime_dtype, PlDatetime) and datetime_dtype.time_zone is not None:
1029
- tz = pytz.timezone(datetime_dtype.time_zone)
1030
- start_filter = tz.localize(start_naive) if start_naive.tzinfo is None else start_naive.astimezone(tz)
1031
- end_filter = tz.localize(filter_end) if filter_end.tzinfo is None else filter_end.astimezone(tz)
1032
- combined = combined.filter(
1033
- (pl.col("datetime") >= start_filter) & (pl.col("datetime") <= end_filter)
1034
- )
1035
- else:
1036
- combined = combined.filter(
1037
- (pl.col("datetime") >= start_naive) & (pl.col("datetime") <= filter_end)
1038
- )
1060
+ logger.info(f"[get_price_data_from_databento_polars] AFTER concat+sort: combined shape={combined.shape}")
1061
+
1062
+ primary_definition_cache = databento_helper._INSTRUMENT_DEFINITION_CACHE
1063
+ definition_client = None
1064
+
1065
+ def get_definition(symbol_code: str) -> Optional[Dict]:
1066
+ nonlocal definition_client
1067
+ cache_key = (symbol_code, dataset)
1068
+ if cache_key in primary_definition_cache:
1069
+ return primary_definition_cache[cache_key]
1070
+ if cache_key in _INSTRUMENT_DEFINITION_CACHE:
1071
+ definition = _INSTRUMENT_DEFINITION_CACHE[cache_key]
1072
+ primary_definition_cache[cache_key] = definition
1073
+ return definition
1074
+ if definition_client is None:
1075
+ try:
1076
+ definition_client = databento_helper.DataBentoClient(api_key=api_key)
1077
+ except Exception as exc:
1078
+ logger.warning(f"Unable to initialize DataBento definition client: {exc}")
1079
+ return None
1080
+ try:
1081
+ definition = definition_client.get_instrument_definition(
1082
+ dataset=dataset,
1083
+ symbol=symbol_code,
1084
+ reference_date=reference_date or start,
1085
+ )
1086
+ except Exception as exc:
1087
+ logger.warning(f"Failed to fetch definition for {symbol_code}: {exc}")
1088
+ return None
1089
+ if definition:
1090
+ primary_definition_cache[cache_key] = definition
1091
+ _INSTRUMENT_DEFINITION_CACHE[cache_key] = definition
1092
+ return definition
1093
+
1094
+ schedule = databento_roll.build_roll_schedule(
1095
+ asset,
1096
+ schedule_start,
1097
+ end,
1098
+ definition_provider=get_definition,
1099
+ roll_days=databento_roll.ROLL_DAYS_BEFORE_EXPIRATION,
1100
+ )
1039
1101
 
1040
- if asset.asset_type == Asset.AssetType.CONT_FUTURE:
1041
- combined = _filter_front_month_rows(asset, combined)
1102
+ if schedule:
1103
+ combined = _filter_front_month_rows(combined, schedule)
1042
1104
 
1043
1105
  if combined.is_empty():
1044
1106
  logger.warning("[get_price_data_from_databento_polars] Combined dataset empty after filtering")