lumibot 4.0.22__py3-none-any.whl → 4.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lumibot might be problematic. Click here for more details.
- lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/__pycache__/constants.cpython-312.pyc +0 -0
- lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
- lumibot/backtesting/__init__.py +6 -5
- lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/backtesting_broker.py +209 -9
- lumibot/backtesting/databento_backtesting.py +141 -24
- lumibot/backtesting/thetadata_backtesting.py +63 -42
- lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
- lumibot/brokers/alpaca.py +11 -1
- lumibot/brokers/tradeovate.py +475 -0
- lumibot/components/grok_news_helper.py +284 -0
- lumibot/components/options_helper.py +90 -34
- lumibot/credentials.py +3 -0
- lumibot/data_sources/__init__.py +2 -1
- lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
- lumibot/data_sources/data_source_backtesting.py +3 -5
- lumibot/data_sources/databento_data.py +5 -5
- lumibot/data_sources/databento_data_polars_backtesting.py +636 -0
- lumibot/data_sources/databento_data_polars_live.py +793 -0
- lumibot/data_sources/pandas_data.py +6 -3
- lumibot/data_sources/polars_mixin.py +126 -21
- lumibot/data_sources/tradeovate_data.py +80 -0
- lumibot/data_sources/tradier_data.py +2 -1
- lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
- lumibot/entities/asset.py +8 -0
- lumibot/entities/order.py +1 -1
- lumibot/entities/quote.py +14 -0
- lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
- lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
- lumibot/strategies/_strategy.py +95 -27
- lumibot/strategies/strategy.py +5 -6
- lumibot/strategies/strategy_executor.py +2 -2
- lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
- lumibot/tools/databento_helper.py +384 -133
- lumibot/tools/databento_helper_polars.py +218 -156
- lumibot/tools/databento_roll.py +216 -0
- lumibot/tools/lumibot_logger.py +32 -17
- lumibot/tools/polygon_helper.py +65 -0
- lumibot/tools/thetadata_helper.py +588 -70
- lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
- lumibot/traders/trader.py +1 -1
- lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
- {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/METADATA +1 -2
- {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/RECORD +164 -46
- tests/backtest/check_timing_offset.py +198 -0
- tests/backtest/check_volume_spike.py +112 -0
- tests/backtest/comprehensive_comparison.py +166 -0
- tests/backtest/debug_comparison.py +91 -0
- tests/backtest/diagnose_price_difference.py +97 -0
- tests/backtest/direct_api_comparison.py +203 -0
- tests/backtest/profile_thetadata_vs_polygon.py +255 -0
- tests/backtest/root_cause_analysis.py +109 -0
- tests/backtest/test_accuracy_verification.py +244 -0
- tests/backtest/test_daily_data_timestamp_comparison.py +801 -0
- tests/backtest/test_databento.py +57 -0
- tests/backtest/test_databento_comprehensive_trading.py +564 -0
- tests/backtest/test_debug_avg_fill_price.py +112 -0
- tests/backtest/test_dividends.py +8 -3
- tests/backtest/test_example_strategies.py +54 -47
- tests/backtest/test_futures_edge_cases.py +451 -0
- tests/backtest/test_futures_single_trade.py +270 -0
- tests/backtest/test_futures_ultra_simple.py +191 -0
- tests/backtest/test_index_data_verification.py +348 -0
- tests/backtest/test_polygon.py +45 -24
- tests/backtest/test_thetadata.py +246 -60
- tests/backtest/test_thetadata_comprehensive.py +729 -0
- tests/backtest/test_thetadata_vs_polygon.py +557 -0
- tests/backtest/test_yahoo.py +1 -2
- tests/conftest.py +20 -0
- tests/test_backtesting_data_source_env.py +249 -0
- tests/test_backtesting_quiet_logs_complete.py +10 -11
- tests/test_databento_helper.py +73 -86
- tests/test_databento_live.py +10 -10
- tests/test_databento_timezone_fixes.py +21 -4
- tests/test_get_historical_prices.py +6 -6
- tests/test_options_helper.py +162 -40
- tests/test_polygon_helper.py +21 -13
- tests/test_quiet_logs_requirements.py +5 -5
- tests/test_thetadata_helper.py +487 -171
- tests/test_yahoo_data.py +125 -0
- {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/LICENSE +0 -0
- {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/WHEEL +0 -0
- {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/top_level.txt +0 -0
|
@@ -4,7 +4,7 @@ import re
|
|
|
4
4
|
from datetime import date, datetime, timedelta, timezone
|
|
5
5
|
from decimal import Decimal
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Dict, List, Optional, Union
|
|
7
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
8
8
|
|
|
9
9
|
import pytz
|
|
10
10
|
|
|
@@ -13,6 +13,7 @@ from polars.datatypes import Datetime as PlDatetime
|
|
|
13
13
|
|
|
14
14
|
from lumibot.constants import LUMIBOT_CACHE_FOLDER, LUMIBOT_DEFAULT_PYTZ
|
|
15
15
|
from lumibot.entities import Asset
|
|
16
|
+
from lumibot.tools import databento_helper, databento_roll
|
|
16
17
|
|
|
17
18
|
# Set up module-specific logger
|
|
18
19
|
from lumibot.tools.lumibot_logger import get_logger
|
|
@@ -31,7 +32,7 @@ except ImportError:
|
|
|
31
32
|
logger.warning("DataBento package not available. Please install with: pip install databento")
|
|
32
33
|
|
|
33
34
|
# Cache settings
|
|
34
|
-
CACHE_SUBFOLDER = "
|
|
35
|
+
CACHE_SUBFOLDER = "databento_polars_v2"
|
|
35
36
|
LUMIBOT_DATABENTO_CACHE_FOLDER = os.path.join(LUMIBOT_CACHE_FOLDER, CACHE_SUBFOLDER)
|
|
36
37
|
RECENT_FILE_TOLERANCE_DAYS = 14
|
|
37
38
|
MAX_DATABENTO_DAYS = 365 # DataBento can handle larger date ranges than some providers
|
|
@@ -43,15 +44,8 @@ if not os.path.exists(LUMIBOT_DATABENTO_CACHE_FOLDER):
|
|
|
43
44
|
except Exception as e:
|
|
44
45
|
logger.warning(f"Could not create DataBento cache folder: {e}")
|
|
45
46
|
|
|
46
|
-
#
|
|
47
|
-
|
|
48
|
-
# ============================================================================
|
|
49
|
-
# These caches dramatically reduce overhead for high-frequency function calls
|
|
50
|
-
# Symbol resolution cache: saves ~2.5s on 362k calls (10-20x speedup)
|
|
51
|
-
_SYMBOL_RESOLUTION_CACHE = {} # {(asset_symbol, asset_type, dt_str): resolved_symbol}
|
|
52
|
-
|
|
53
|
-
# Datetime normalization cache: saves ~1.2s on 362k calls (5-10x speedup)
|
|
54
|
-
_DATETIME_NORMALIZATION_CACHE = {} # {dt_timestamp: normalized_dt}
|
|
47
|
+
# Instrument definition cache: stores multipliers and contract specs
|
|
48
|
+
_INSTRUMENT_DEFINITION_CACHE = {} # {(symbol, dataset): definition_dict}
|
|
55
49
|
|
|
56
50
|
|
|
57
51
|
class DataBentoClientPolars:
|
|
@@ -412,7 +406,15 @@ class DataBentoClientPolars:
|
|
|
412
406
|
pandas_df = pandas_df.rename(columns={index_name: 'datetime'})
|
|
413
407
|
# Convert to polars
|
|
414
408
|
df = pl.from_pandas(pandas_df)
|
|
415
|
-
logger.
|
|
409
|
+
logger.info(f"[DataBentoClientPolars] Converted to polars, shape: {df.shape}, columns: {df.columns}")
|
|
410
|
+
|
|
411
|
+
# DEBUG: Check for duplicates immediately after conversion
|
|
412
|
+
if 'datetime' in df.columns:
|
|
413
|
+
dup_count = df.filter(df['datetime'].is_duplicated()).height
|
|
414
|
+
if dup_count > 0:
|
|
415
|
+
logger.warning(f"[DataBentoClientPolars] ⚠️ FOUND {dup_count} DUPLICATE TIMESTAMPS AFTER CONVERSION!")
|
|
416
|
+
else:
|
|
417
|
+
logger.info(f"[DataBentoClientPolars] ✓ No duplicates after conversion")
|
|
416
418
|
# Ensure datetime column is datetime type
|
|
417
419
|
if 'datetime' in df.columns:
|
|
418
420
|
df = df.with_columns(pl.col('datetime').cast(pl.Datetime))
|
|
@@ -640,124 +642,28 @@ def _build_cache_filename(
|
|
|
640
642
|
return path
|
|
641
643
|
|
|
642
644
|
|
|
643
|
-
def
|
|
644
|
-
"""
|
|
645
|
-
Normalize datetime to the default Lumibot timezone and drop tzinfo.
|
|
646
|
-
|
|
647
|
-
PERFORMANCE OPTIMIZATION: This function is called 362k+ times during backtesting.
|
|
648
|
-
Caching provides 5-10x speedup, saving ~1.2s per backtest.
|
|
649
|
-
"""
|
|
650
|
-
if dt is None:
|
|
651
|
-
return dt
|
|
652
|
-
|
|
653
|
-
# Cache key: use timestamp for faster lookup than full datetime
|
|
654
|
-
cache_key = dt.timestamp() if hasattr(dt, 'timestamp') else None
|
|
655
|
-
|
|
656
|
-
if cache_key is not None and cache_key in _DATETIME_NORMALIZATION_CACHE:
|
|
657
|
-
return _DATETIME_NORMALIZATION_CACHE[cache_key]
|
|
658
|
-
|
|
659
|
-
# Perform normalization
|
|
660
|
-
if dt.tzinfo is not None:
|
|
661
|
-
normalized = dt.astimezone(LUMIBOT_DEFAULT_PYTZ).replace(tzinfo=None)
|
|
662
|
-
else:
|
|
663
|
-
normalized = dt
|
|
664
|
-
|
|
665
|
-
# Cache the result
|
|
666
|
-
if cache_key is not None:
|
|
667
|
-
_DATETIME_NORMALIZATION_CACHE[cache_key] = normalized
|
|
668
|
-
|
|
669
|
-
return normalized
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
def _resolve_databento_symbol_for_datetime(asset: Asset, dt: datetime) -> str:
|
|
673
|
-
"""
|
|
674
|
-
Resolve the expected DataBento symbol for a datetime using the strategy roll rules.
|
|
675
|
-
|
|
676
|
-
PERFORMANCE OPTIMIZATION: This function is called 362k+ times during backtesting.
|
|
677
|
-
Caching provides 10-20x speedup, saving ~2.5s per backtest.
|
|
678
|
-
"""
|
|
679
|
-
# Create cache key from asset and datetime
|
|
680
|
-
# Use normalized datetime string for consistent caching
|
|
681
|
-
dt_timestamp = dt.timestamp() if hasattr(dt, 'timestamp') else str(dt)
|
|
682
|
-
cache_key = (asset.symbol, asset.asset_type, dt_timestamp)
|
|
683
|
-
|
|
684
|
-
if cache_key in _SYMBOL_RESOLUTION_CACHE:
|
|
685
|
-
return _SYMBOL_RESOLUTION_CACHE[cache_key]
|
|
686
|
-
|
|
687
|
-
# Perform symbol resolution
|
|
688
|
-
reference_dt = _normalize_reference_datetime(dt)
|
|
689
|
-
variants = asset.resolve_continuous_futures_contract_variants(reference_date=reference_dt)
|
|
690
|
-
contract = variants[2]
|
|
691
|
-
resolved_symbol = _generate_databento_symbol_alternatives(asset.symbol, contract)[0]
|
|
692
|
-
|
|
693
|
-
# Cache the result
|
|
694
|
-
_SYMBOL_RESOLUTION_CACHE[cache_key] = resolved_symbol
|
|
695
|
-
|
|
696
|
-
return resolved_symbol
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
def _resolve_databento_symbols_for_range(
|
|
700
|
-
asset: Asset,
|
|
701
|
-
start: datetime,
|
|
702
|
-
end: datetime,
|
|
703
|
-
) -> List[str]:
|
|
704
|
-
"""Resolve all DataBento symbols necessary to cover a time range for continuous futures."""
|
|
705
|
-
if asset.asset_type != Asset.AssetType.CONT_FUTURE:
|
|
706
|
-
return [_format_futures_symbol_for_databento(asset)]
|
|
707
|
-
|
|
708
|
-
start_ref = _normalize_reference_datetime(start)
|
|
709
|
-
end_ref = _normalize_reference_datetime(end)
|
|
710
|
-
if start_ref is None or end_ref is None:
|
|
711
|
-
return [_format_futures_symbol_for_databento(asset)]
|
|
712
|
-
|
|
713
|
-
symbols: List[str] = []
|
|
714
|
-
seen = set()
|
|
715
|
-
cursor = start_ref
|
|
716
|
-
# Step roughly every 45 days to guarantee we cross quarter roll boundaries
|
|
717
|
-
step = timedelta(days=45)
|
|
718
|
-
while cursor <= end_ref + timedelta(days=45):
|
|
719
|
-
symbol = _resolve_databento_symbol_for_datetime(asset, cursor)
|
|
720
|
-
if symbol not in seen:
|
|
721
|
-
seen.add(symbol)
|
|
722
|
-
symbols.append(symbol)
|
|
723
|
-
cursor += step
|
|
724
|
-
|
|
725
|
-
# Ensure the end of the range is covered
|
|
726
|
-
end_symbol = _resolve_databento_symbol_for_datetime(asset, end_ref)
|
|
727
|
-
if end_symbol not in seen:
|
|
728
|
-
symbols.append(end_symbol)
|
|
729
|
-
|
|
730
|
-
return symbols
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
def _filter_front_month_rows(asset: Asset, df: pl.DataFrame) -> pl.DataFrame:
|
|
734
|
-
"""
|
|
735
|
-
Keep only rows matching the expected continuous contract for each timestamp.
|
|
736
|
-
|
|
737
|
-
PERFORMANCE OPTIMIZATION: Uses cached symbol resolution to avoid
|
|
738
|
-
repeated computation for the same datetime values.
|
|
739
|
-
"""
|
|
645
|
+
def _filter_front_month_rows(df: pl.DataFrame, schedule: List[Tuple[str, datetime, datetime]]) -> pl.DataFrame:
|
|
646
|
+
"""Filter a polars DataFrame so that each timestamp uses the scheduled contract."""
|
|
740
647
|
if df.is_empty() or "symbol" not in df.columns or "datetime" not in df.columns:
|
|
741
648
|
return df
|
|
742
649
|
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
return _resolve_databento_symbol_for_datetime(asset, dt)
|
|
650
|
+
if not schedule:
|
|
651
|
+
return df
|
|
746
652
|
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
.
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
if
|
|
755
|
-
return filtered
|
|
756
|
-
except Exception as filtering_err:
|
|
757
|
-
logger.debug(f"Continuous futures filtering fallback due to: {filtering_err}")
|
|
653
|
+
mask = None
|
|
654
|
+
for symbol, start_dt, end_dt in schedule:
|
|
655
|
+
condition = pl.col("symbol") == symbol
|
|
656
|
+
if start_dt is not None:
|
|
657
|
+
condition = condition & (pl.col("datetime") >= pl.lit(start_dt))
|
|
658
|
+
if end_dt is not None:
|
|
659
|
+
condition = condition & (pl.col("datetime") < pl.lit(end_dt))
|
|
660
|
+
mask = condition if mask is None else mask | condition
|
|
758
661
|
|
|
759
|
-
|
|
760
|
-
|
|
662
|
+
if mask is None:
|
|
663
|
+
return df
|
|
664
|
+
|
|
665
|
+
filtered = df.filter(mask)
|
|
666
|
+
return filtered if not filtered.is_empty() else df
|
|
761
667
|
|
|
762
668
|
|
|
763
669
|
def _load_cache(cache_file: Path) -> Optional[pl.LazyFrame]:
|
|
@@ -798,17 +704,19 @@ def _save_cache(df: pl.DataFrame, cache_file: Path) -> None:
|
|
|
798
704
|
def _normalize_databento_dataframe(df: pl.DataFrame) -> pl.DataFrame:
|
|
799
705
|
"""
|
|
800
706
|
Normalize DataBento DataFrame to Lumibot standard format using polars
|
|
801
|
-
|
|
707
|
+
|
|
802
708
|
Parameters
|
|
803
709
|
----------
|
|
804
710
|
df : pl.DataFrame
|
|
805
711
|
Raw DataBento DataFrame
|
|
806
|
-
|
|
712
|
+
|
|
807
713
|
Returns
|
|
808
714
|
-------
|
|
809
715
|
pl.DataFrame
|
|
810
716
|
Normalized DataFrame with standard OHLCV columns
|
|
811
717
|
"""
|
|
718
|
+
logger.info(f"[_normalize_databento_dataframe] INPUT: shape={df.shape}, has duplicates={'datetime' in df.columns and df.filter(df['datetime'].is_duplicated()).height > 0}")
|
|
719
|
+
|
|
812
720
|
if df.is_empty():
|
|
813
721
|
return df
|
|
814
722
|
|
|
@@ -866,9 +774,111 @@ def _normalize_databento_dataframe(df: pl.DataFrame) -> pl.DataFrame:
|
|
|
866
774
|
df_norm = _ensure_polars_datetime_timezone(df_norm)
|
|
867
775
|
df_norm = df_norm.sort('datetime')
|
|
868
776
|
|
|
777
|
+
logger.info(f"[_normalize_databento_dataframe] OUTPUT: shape={df_norm.shape}, has duplicates={'datetime' in df_norm.columns and df_norm.filter(df_norm['datetime'].is_duplicated()).height > 0}")
|
|
778
|
+
|
|
869
779
|
return df_norm
|
|
870
780
|
|
|
871
781
|
|
|
782
|
+
def _fetch_and_update_futures_multiplier(
|
|
783
|
+
api_key: str,
|
|
784
|
+
asset: Asset,
|
|
785
|
+
resolved_symbol: str,
|
|
786
|
+
dataset: str = "GLBX.MDP3",
|
|
787
|
+
reference_date: Optional[datetime] = None
|
|
788
|
+
) -> None:
|
|
789
|
+
"""
|
|
790
|
+
Fetch futures contract multiplier from DataBento and update the asset in-place.
|
|
791
|
+
Uses caching to avoid repeated API calls.
|
|
792
|
+
|
|
793
|
+
Parameters
|
|
794
|
+
----------
|
|
795
|
+
api_key : str
|
|
796
|
+
DataBento API key
|
|
797
|
+
asset : Asset
|
|
798
|
+
Futures asset to fetch multiplier for (will be updated in-place)
|
|
799
|
+
resolved_symbol : str
|
|
800
|
+
The resolved contract symbol (e.g., "MESH4" for MES continuous)
|
|
801
|
+
dataset : str
|
|
802
|
+
DataBento dataset (default: GLBX.MDP3 for CME futures)
|
|
803
|
+
reference_date : datetime, optional
|
|
804
|
+
Reference date for fetching definition. If None, uses yesterday.
|
|
805
|
+
"""
|
|
806
|
+
# Only fetch for futures contracts
|
|
807
|
+
if asset.asset_type not in (Asset.AssetType.FUTURE, Asset.AssetType.CONT_FUTURE):
|
|
808
|
+
logger.info(f"[POLARS-MULTIPLIER] Skipping {asset.symbol} - not a futures contract (type={asset.asset_type})")
|
|
809
|
+
return
|
|
810
|
+
|
|
811
|
+
logger.info(f"[POLARS-MULTIPLIER] Starting fetch for {asset.symbol}, current multiplier={asset.multiplier}")
|
|
812
|
+
|
|
813
|
+
# Skip if multiplier already set (and not default value of 1)
|
|
814
|
+
if asset.multiplier != 1:
|
|
815
|
+
logger.info(f"[POLARS-MULTIPLIER] Asset {asset.symbol} already has multiplier={asset.multiplier}, skipping fetch")
|
|
816
|
+
return
|
|
817
|
+
|
|
818
|
+
# Use the resolved symbol for cache key
|
|
819
|
+
cache_key = (resolved_symbol, dataset)
|
|
820
|
+
logger.info(f"[POLARS-MULTIPLIER] Cache key: {cache_key}, cache has {len(_INSTRUMENT_DEFINITION_CACHE)} entries")
|
|
821
|
+
if cache_key in _INSTRUMENT_DEFINITION_CACHE:
|
|
822
|
+
cached_def = _INSTRUMENT_DEFINITION_CACHE[cache_key]
|
|
823
|
+
if 'unit_of_measure_qty' in cached_def:
|
|
824
|
+
asset.multiplier = int(cached_def['unit_of_measure_qty'])
|
|
825
|
+
logger.info(f"[POLARS-MULTIPLIER] ✓ Using cached multiplier for {resolved_symbol}: {asset.multiplier}")
|
|
826
|
+
return
|
|
827
|
+
else:
|
|
828
|
+
logger.warning(f"[POLARS-MULTIPLIER] Cache entry exists but missing unit_of_measure_qty field")
|
|
829
|
+
|
|
830
|
+
try:
|
|
831
|
+
# Use yesterday if no reference date provided
|
|
832
|
+
if reference_date is None:
|
|
833
|
+
reference_date = datetime.now() - timedelta(days=1)
|
|
834
|
+
|
|
835
|
+
# Convert to datetime if needed
|
|
836
|
+
if not isinstance(reference_date, datetime):
|
|
837
|
+
if isinstance(reference_date, str):
|
|
838
|
+
reference_date = datetime.strptime(reference_date, "%Y-%m-%d")
|
|
839
|
+
|
|
840
|
+
# DataBento requires start < end, so add 1 day to end
|
|
841
|
+
start_date = reference_date.strftime("%Y-%m-%d")
|
|
842
|
+
end_date = (reference_date + timedelta(days=1)).strftime("%Y-%m-%d")
|
|
843
|
+
|
|
844
|
+
logger.info(f"Fetching instrument definition for {resolved_symbol} from DataBento")
|
|
845
|
+
|
|
846
|
+
# Create client
|
|
847
|
+
client = DataBentoClientPolars(api_key)
|
|
848
|
+
|
|
849
|
+
# Fetch definition data using the RESOLVED symbol
|
|
850
|
+
df = client.get_historical_data(
|
|
851
|
+
dataset=dataset,
|
|
852
|
+
symbols=[resolved_symbol],
|
|
853
|
+
schema="definition",
|
|
854
|
+
start=start_date,
|
|
855
|
+
end=end_date,
|
|
856
|
+
)
|
|
857
|
+
|
|
858
|
+
if df is None or df.is_empty():
|
|
859
|
+
logger.warning(f"No instrument definition found for {resolved_symbol}")
|
|
860
|
+
return
|
|
861
|
+
|
|
862
|
+
# Convert first row to dict
|
|
863
|
+
definition = df.to_dicts()[0]
|
|
864
|
+
|
|
865
|
+
# Cache the definition
|
|
866
|
+
_INSTRUMENT_DEFINITION_CACHE[cache_key] = definition
|
|
867
|
+
|
|
868
|
+
# Update asset multiplier
|
|
869
|
+
if 'unit_of_measure_qty' in definition:
|
|
870
|
+
multiplier = int(definition['unit_of_measure_qty'])
|
|
871
|
+
logger.info(f"[POLARS-MULTIPLIER] BEFORE update: asset.multiplier = {asset.multiplier}")
|
|
872
|
+
asset.multiplier = multiplier
|
|
873
|
+
logger.info(f"[POLARS-MULTIPLIER] ✓✓✓ SUCCESS! Set multiplier for {asset.symbol} (resolved to {resolved_symbol}): {multiplier}")
|
|
874
|
+
logger.info(f"[POLARS-MULTIPLIER] AFTER update: asset.multiplier = {asset.multiplier}")
|
|
875
|
+
else:
|
|
876
|
+
logger.error(f"[POLARS-MULTIPLIER] ✗ Definition missing unit_of_measure_qty field! Fields: {list(definition.keys())}")
|
|
877
|
+
|
|
878
|
+
except Exception as e:
|
|
879
|
+
logger.warning(f"Could not fetch multiplier for {resolved_symbol}: {str(e)}")
|
|
880
|
+
|
|
881
|
+
|
|
872
882
|
def get_price_data_from_databento_polars(
|
|
873
883
|
api_key: str,
|
|
874
884
|
asset: Asset,
|
|
@@ -918,16 +928,33 @@ def get_price_data_from_databento_polars(
|
|
|
918
928
|
# Ensure start and end are timezone-naive for DataBento API
|
|
919
929
|
start_naive = start.replace(tzinfo=None) if start.tzinfo is not None else start
|
|
920
930
|
end_naive = end.replace(tzinfo=None) if end.tzinfo is not None else end
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
+
|
|
932
|
+
if asset.asset_type == Asset.AssetType.CONT_FUTURE:
|
|
933
|
+
schedule_start = start
|
|
934
|
+
symbols_to_fetch = databento_roll.resolve_symbols_for_range(asset, schedule_start, end)
|
|
935
|
+
front_symbol = databento_roll.resolve_symbol_for_datetime(asset, reference_date or start)
|
|
936
|
+
if front_symbol not in symbols_to_fetch:
|
|
937
|
+
symbols_to_fetch.insert(0, front_symbol)
|
|
938
|
+
logger.info(
|
|
939
|
+
f"Resolved continuous future {asset.symbol} for range "
|
|
940
|
+
f"{schedule_start.strftime('%Y-%m-%d')} → {end.strftime('%Y-%m-%d')} -> {symbols_to_fetch}"
|
|
941
|
+
)
|
|
942
|
+
else:
|
|
943
|
+
schedule_start = start
|
|
944
|
+
front_symbol = _format_futures_symbol_for_databento(asset)
|
|
945
|
+
symbols_to_fetch = [front_symbol]
|
|
946
|
+
|
|
947
|
+
# Fetch and cache futures multiplier from DataBento if needed (after symbol resolution)
|
|
948
|
+
_fetch_and_update_futures_multiplier(
|
|
949
|
+
api_key=api_key,
|
|
950
|
+
asset=asset,
|
|
951
|
+
resolved_symbol=symbols_to_fetch[0],
|
|
952
|
+
dataset=dataset,
|
|
953
|
+
reference_date=reference_date or start
|
|
954
|
+
)
|
|
955
|
+
|
|
956
|
+
logger.info(
|
|
957
|
+
f"[get_price_data_from_databento_polars] Fetching {len(symbols_to_fetch)} symbol(s) for {asset.symbol}: {symbols_to_fetch}"
|
|
931
958
|
)
|
|
932
959
|
|
|
933
960
|
# Inspect cache for each symbol
|
|
@@ -944,6 +971,9 @@ def get_price_data_from_databento_polars(
|
|
|
944
971
|
continue
|
|
945
972
|
# Keep as lazy frame for now, collect later in batch
|
|
946
973
|
cached_lazy_frames.append((symbol_code, cached_lazy))
|
|
974
|
+
else:
|
|
975
|
+
# If forcing cache update, mark all symbols as missing
|
|
976
|
+
symbols_missing = list(symbols_to_fetch)
|
|
947
977
|
|
|
948
978
|
# Collect all lazy frames at once for better performance
|
|
949
979
|
cached_frames: List[pl.DataFrame] = []
|
|
@@ -959,9 +989,9 @@ def get_price_data_from_databento_polars(
|
|
|
959
989
|
)
|
|
960
990
|
cached_frames.append(_ensure_polars_datetime_timezone(cached_df))
|
|
961
991
|
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
992
|
+
logger.info(
|
|
993
|
+
f"[get_price_data_from_databento_polars] Cache check done: cached_frames={len(cached_frames)}, symbols_missing={symbols_missing}"
|
|
994
|
+
)
|
|
965
995
|
frames: List[pl.DataFrame] = list(cached_frames)
|
|
966
996
|
|
|
967
997
|
# Fetch missing symbols from DataBento
|
|
@@ -1004,7 +1034,9 @@ def get_price_data_from_databento_polars(
|
|
|
1004
1034
|
continue
|
|
1005
1035
|
|
|
1006
1036
|
df_normalized = _normalize_databento_dataframe(df)
|
|
1037
|
+
logger.info(f"[get_price_data_from_databento_polars] BEFORE append: frames has {len(frames)} items, normalized shape={df_normalized.shape}")
|
|
1007
1038
|
frames.append(df_normalized)
|
|
1039
|
+
logger.info(f"[get_price_data_from_databento_polars] AFTER append: frames has {len(frames)} items")
|
|
1008
1040
|
|
|
1009
1041
|
cache_path = _build_cache_filename(asset, start, end, timestep, symbol_override=symbol_code)
|
|
1010
1042
|
_save_cache(df_normalized, cache_path)
|
|
@@ -1020,25 +1052,55 @@ def get_price_data_from_databento_polars(
|
|
|
1020
1052
|
logger.error(f"DataBento symbol resolution failed for {asset.symbol}")
|
|
1021
1053
|
return None
|
|
1022
1054
|
|
|
1055
|
+
logger.info(
|
|
1056
|
+
f"[get_price_data_from_databento_polars] BEFORE concat: {len(frames)} frames with shapes: {[f.shape for f in frames]}"
|
|
1057
|
+
)
|
|
1023
1058
|
combined = pl.concat(frames, how="vertical", rechunk=True)
|
|
1024
1059
|
combined = combined.sort("datetime")
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1060
|
+
logger.info(f"[get_price_data_from_databento_polars] AFTER concat+sort: combined shape={combined.shape}")
|
|
1061
|
+
|
|
1062
|
+
primary_definition_cache = databento_helper._INSTRUMENT_DEFINITION_CACHE
|
|
1063
|
+
definition_client = None
|
|
1064
|
+
|
|
1065
|
+
def get_definition(symbol_code: str) -> Optional[Dict]:
|
|
1066
|
+
nonlocal definition_client
|
|
1067
|
+
cache_key = (symbol_code, dataset)
|
|
1068
|
+
if cache_key in primary_definition_cache:
|
|
1069
|
+
return primary_definition_cache[cache_key]
|
|
1070
|
+
if cache_key in _INSTRUMENT_DEFINITION_CACHE:
|
|
1071
|
+
definition = _INSTRUMENT_DEFINITION_CACHE[cache_key]
|
|
1072
|
+
primary_definition_cache[cache_key] = definition
|
|
1073
|
+
return definition
|
|
1074
|
+
if definition_client is None:
|
|
1075
|
+
try:
|
|
1076
|
+
definition_client = databento_helper.DataBentoClient(api_key=api_key)
|
|
1077
|
+
except Exception as exc:
|
|
1078
|
+
logger.warning(f"Unable to initialize DataBento definition client: {exc}")
|
|
1079
|
+
return None
|
|
1080
|
+
try:
|
|
1081
|
+
definition = definition_client.get_instrument_definition(
|
|
1082
|
+
dataset=dataset,
|
|
1083
|
+
symbol=symbol_code,
|
|
1084
|
+
reference_date=reference_date or start,
|
|
1085
|
+
)
|
|
1086
|
+
except Exception as exc:
|
|
1087
|
+
logger.warning(f"Failed to fetch definition for {symbol_code}: {exc}")
|
|
1088
|
+
return None
|
|
1089
|
+
if definition:
|
|
1090
|
+
primary_definition_cache[cache_key] = definition
|
|
1091
|
+
_INSTRUMENT_DEFINITION_CACHE[cache_key] = definition
|
|
1092
|
+
return definition
|
|
1093
|
+
|
|
1094
|
+
schedule = databento_roll.build_roll_schedule(
|
|
1095
|
+
asset,
|
|
1096
|
+
schedule_start,
|
|
1097
|
+
end,
|
|
1098
|
+
definition_provider=get_definition,
|
|
1099
|
+
roll_days=databento_roll.ROLL_DAYS_BEFORE_EXPIRATION,
|
|
1100
|
+
)
|
|
1039
1101
|
|
|
1040
|
-
if
|
|
1041
|
-
combined = _filter_front_month_rows(
|
|
1102
|
+
if schedule:
|
|
1103
|
+
combined = _filter_front_month_rows(combined, schedule)
|
|
1042
1104
|
|
|
1043
1105
|
if combined.is_empty():
|
|
1044
1106
|
logger.warning("[get_price_data_from_databento_polars] Combined dataset empty after filtering")
|