PyPI - lumibot - Versions diffs - 4.1.2__py3-none-any.whl → 4.2.0__py3-none-any.whl - Mend

lumibot 4.1.2py3-none-any.whl → 4.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lumibot might be problematic. Click here for more details.

Files changed (164) hide show

tests/test_backtesting_data_source_env.py CHANGED Viewed

@@ -98,11 +98,14 @@ class TestBacktestingDataSourceEnv:
             )
             # Verify the log message shows polygon was selected
-            assert any("Auto-selected backtesting data source from BACKTESTING_DATA_SOURCE env var: polygon" in record.message
+            assert any("Using BACKTESTING_DATA_SOURCE setting for backtest data: polygon" in record.message
                       for record in caplog.records)
     def test_auto_select_thetadata_case_insensitive(self, clean_environment, restore_theta_credentials, caplog):
         """Test that BACKTESTING_DATA_SOURCE=THETADATA (uppercase) selects ThetaDataBacktesting."""
+        import logging
+        caplog.set_level(logging.INFO, logger='lumibot.strategies._strategy')
         with patch.dict(os.environ, {'BACKTESTING_DATA_SOURCE': 'THETADATA'}):
             # Re-import credentials to pick up env change
             from importlib import reload
@@ -130,7 +133,7 @@ class TestBacktestingDataSourceEnv:
                 pass
             # Verify the log message shows thetadata was selected OR check for ThetaData error
-            thetadata_selected = any("Auto-selected backtesting data source from BACKTESTING_DATA_SOURCE env var: THETADATA" in record.message
+            thetadata_selected = any("Using BACKTESTING_DATA_SOURCE setting for backtest data: THETADATA" in record.message
                                     for record in caplog.records)
             thetadata_attempted = any("Cannot connect to Theta Data" in record.message or "ThetaData" in record.message
                                      for record in caplog.records)
@@ -183,8 +186,11 @@ class TestBacktestingDataSourceEnv:
                     show_indicators=False,
                 )
-    def test_explicit_datasource_overrides_env(self, clean_environment, restore_theta_credentials, caplog):
-        """Test that explicit datasource_class overrides BACKTESTING_DATA_SOURCE env var."""
+    def test_env_override_wins_over_explicit_datasource(self, clean_environment, restore_theta_credentials, caplog):
+        """Test that BACKTESTING_DATA_SOURCE env var takes precedence over explicit datasource_class."""
+        import logging
+        caplog.set_level(logging.INFO, logger='lumibot.strategies._strategy')
         with patch.dict(os.environ, {'BACKTESTING_DATA_SOURCE': 'polygon'}):
             # Re-import credentials to pick up env change
             from importlib import reload
@@ -205,15 +211,45 @@ class TestBacktestingDataSourceEnv:
                 show_progress_bar=False,
             )
-            # Verify the auto-select message was NOT logged (explicit datasource was used)
-            assert not any("Auto-selected backtesting data source" in record.message
-                          for record in caplog.records)
+            # Verify the env override message was logged (env var wins)
+            assert any("Using BACKTESTING_DATA_SOURCE setting for backtest data: polygon" in record.message
+                       for record in caplog.records)
+    def test_explicit_datasource_used_when_env_none(self, clean_environment, restore_theta_credentials, caplog):
+        """Test that setting BACKTESTING_DATA_SOURCE to 'none' defers to the explicit datasource_class."""
+        import logging
+        caplog.set_level(logging.INFO, logger='lumibot.strategies._strategy')
+        with patch.dict(os.environ, {'BACKTESTING_DATA_SOURCE': 'none'}):
+            from importlib import reload
+            import lumibot.credentials
+            reload(lumibot.credentials)
+            backtesting_start = datetime(2023, 1, 1)
+            backtesting_end = datetime(2023, 1, 10)  # Shorter backtest for speed
+            SimpleTestStrategy.run_backtest(
+                YahooDataBacktesting,
+                backtesting_start=backtesting_start,
+                backtesting_end=backtesting_end,
+                show_plot=False,
+                show_tearsheet=False,
+                show_indicators=False,
+                show_progress_bar=False,
+            )
+            # Confirm no override occurred
+            assert not any("Using BACKTESTING_DATA_SOURCE setting for backtest data" in record.message
+                           for record in caplog.records)
     def test_default_thetadata_when_no_env_set(self, clean_environment, restore_theta_credentials, caplog):
         """Test that ThetaData is the default when BACKTESTING_DATA_SOURCE is not set."""
         # Remove BACKTESTING_DATA_SOURCE from env
         env_without_datasource = {k: v for k, v in os.environ.items() if k != 'BACKTESTING_DATA_SOURCE'}
+        import logging
+        caplog.set_level(logging.INFO, logger='lumibot.strategies._strategy')
         with patch.dict(os.environ, env_without_datasource, clear=True):
             # Re-import credentials to pick up env change
             from importlib import reload
@@ -240,9 +276,13 @@ class TestBacktestingDataSourceEnv:
                 # Expected to fail with test credentials - that's okay
                 pass
-            # Verify ThetaData was attempted (no auto-select message since it's the default)
-            assert any("Cannot connect to Theta Data" in record.message or "ThetaData" in record.message
-                      for record in caplog.records), "ThetaData was not used as default"
+            # Verify ThetaData was attempted (look for override message or Theta-specific logs)
+            assert any(
+                "Using BACKTESTING_DATA_SOURCE setting for backtest data: ThetaData" in record.message
+                or "Cannot connect to Theta Data" in record.message
+                or "ThetaData" in record.message
+                for record in caplog.records
+            ), "ThetaData was not used as default"
 if __name__ == "__main__":

tests/test_continuous_futures_resolution.py CHANGED Viewed

@@ -9,6 +9,7 @@ from lumibot.tools.databento_helper import (
     _format_futures_symbol_for_databento,
 )
 from lumibot.entities import Asset
+from lumibot.entities.asset import FUTURES_MONTH_CODES
 class TestContinuousFuturesResolution(unittest.TestCase):
@@ -107,14 +108,26 @@ class TestContinuousFuturesResolution(unittest.TestCase):
         """Test contract generation around year boundaries with expiration-aware logic."""
         asset = Asset("ES", asset_type=Asset.AssetType.CONT_FUTURE)
+        from lumibot.tools import futures_roll
         contract = asset.resolve_continuous_futures_contract(reference_date=datetime(2025, 12, 31))
         self.assertEqual(contract, 'ESH26')
         contract = asset.resolve_continuous_futures_contract(reference_date=datetime(2026, 1, 1))
         self.assertEqual(contract, 'ESH26')
-        contract = asset.resolve_continuous_futures_contract(reference_date=datetime(2025, 12, 14))
-        self.assertEqual(contract, 'ESZ25')
+        pre_trigger = datetime(2025, 12, 8)
+        post_trigger = datetime(2025, 12, 9)
+        year_pre, month_pre = futures_roll.determine_contract_year_month("ES", pre_trigger)
+        expected_pre = asset._build_contract_variants(f"ES{FUTURES_MONTH_CODES[month_pre]}", year_pre)[2]
+        contract = asset.resolve_continuous_futures_contract(reference_date=pre_trigger)
+        self.assertEqual(contract, expected_pre)
+        year_post, month_post = futures_roll.determine_contract_year_month("ES", post_trigger)
+        expected_post = asset._build_contract_variants(f"ES{FUTURES_MONTH_CODES[month_post]}", year_post)[2]
+        contract = asset.resolve_continuous_futures_contract(reference_date=post_trigger)
+        self.assertEqual(contract, expected_post)
     def test_different_symbol_formats(self):
         """Test continuous futures resolution with different symbol formats."""
@@ -229,34 +242,32 @@ class TestContinuousFuturesResolution(unittest.TestCase):
         """
         asset = Asset("MES", asset_type=Asset.AssetType.CONT_FUTURE)
-        # Test that contract resolution properly accounts for 3rd Friday expiration
-        # Rollover happens on 15th of expiry month to avoid expired contracts
-        quarterly_tests = [
-            # Q1: Jan-Feb should resolve to March (H), Mar 15+ should roll to June (M)
-            (datetime(2024, 1, 15), 'H24'),
-            (datetime(2024, 2, 15), 'H24'),
-            (datetime(2024, 3, 14), 'H24'),  # Before rollover
-            (datetime(2024, 3, 15), 'M24'),  # After rollover (Mar expires ~21st)
-            # Q2: Apr-May should resolve to June (M), Jun 15+ should roll to Sep (U)
-            (datetime(2024, 4, 15), 'M24'),
-            (datetime(2024, 5, 15), 'M24'),
-            (datetime(2024, 6, 14), 'M24'),  # Before rollover
-            (datetime(2024, 6, 15), 'U24'),  # After rollover (Jun expires ~20th)
-            # Q3: Jul-Aug should resolve to September (U), Sep 15+ should roll to Dec (Z)
-            (datetime(2024, 7, 15), 'U24'),
-            (datetime(2024, 8, 15), 'U24'),
-            (datetime(2024, 9, 14), 'U24'),  # Before rollover
-            (datetime(2024, 9, 15), 'Z24'),  # After rollover (Sep expires ~19th)
-            # Q4: Oct-Nov should resolve to December (Z), Dec 15+ should roll to Mar next year (H)
-            (datetime(2024, 10, 15), 'Z24'),
-            (datetime(2024, 11, 15), 'Z24'),
-            (datetime(2024, 12, 14), 'Z24'),  # Before rollover
-            (datetime(2024, 12, 15), 'H25'),  # After rollover (Dec expires ~19th)
+        from lumibot.tools import futures_roll
+        quarterly_dates = [
+            datetime(2024, 1, 15),
+            datetime(2024, 2, 15),
+            datetime(2024, 3, 4),
+            datetime(2024, 3, 5),
+            datetime(2024, 4, 15),
+            datetime(2024, 5, 15),
+            datetime(2024, 6, 10),
+            datetime(2024, 6, 11),
+            datetime(2024, 7, 15),
+            datetime(2024, 8, 15),
+            datetime(2024, 9, 9),
+            datetime(2024, 9, 10),
+            datetime(2024, 10, 15),
+            datetime(2024, 11, 15),
+            datetime(2024, 12, 9),
+            datetime(2024, 12, 10),
         ]
-        for test_date, expected_suffix in quarterly_tests:
+        for test_date in quarterly_dates:
+            year, month = futures_roll.determine_contract_year_month("MES", test_date)
+            month_code = FUTURES_MONTH_CODES[month]
+            expected_contract = asset._build_contract_variants(f"MES{month_code}", year)[2]
             contract = asset.resolve_continuous_futures_contract(reference_date=test_date)
-            expected_contract = f"MES{expected_suffix}"
             self.assertEqual(
                 contract,
                 expected_contract,
@@ -270,30 +281,31 @@ class TestContinuousFuturesResolution(unittest.TestCase):
         """
         asset = Asset("ES", asset_type=Asset.AssetType.CONT_FUTURE)
-        # Test around March 2025 expiration (3rd Friday is March 21st)
-        test_cases = [
-            (datetime(2025, 3, 14), 'ESH25'),  # Before rollover - still March
-            (datetime(2025, 3, 15), 'ESM25'),  # Rollover day - move to June
-            (datetime(2025, 3, 21), 'ESM25'),  # Actual expiry day - already rolled
-            (datetime(2025, 3, 22), 'ESM25'),  # After expiry - definitely rolled
-            # Test around June 2025 expiration (3rd Friday is June 20th)
-            (datetime(2025, 6, 14), 'ESM25'),  # Before rollover - still June
-            (datetime(2025, 6, 15), 'ESU25'),  # Rollover day - move to September
-            (datetime(2025, 6, 20), 'ESU25'),  # Actual expiry day - already rolled
-            # Test around December 2025 expiration (3rd Friday is December 19th)
-            (datetime(2025, 12, 14), 'ESZ25'),  # Before rollover - still December
-            (datetime(2025, 12, 15), 'ESH26'),  # Rollover day - move to March next year
-            (datetime(2025, 12, 19), 'ESH26'),  # Actual expiry day - already rolled
+        from lumibot.tools import futures_roll
+        check_dates = [
+            datetime(2025, 3, 10),
+            datetime(2025, 3, 11),
+            datetime(2025, 3, 21),
+            datetime(2025, 3, 22),
+            datetime(2025, 6, 9),
+            datetime(2025, 6, 10),
+            datetime(2025, 6, 20),
+            datetime(2025, 12, 8),
+            datetime(2025, 12, 9),
+            datetime(2025, 12, 19),
         ]
-        for test_date, expected_contract in test_cases:
+        for test_date in check_dates:
+            year, month = futures_roll.determine_contract_year_month("ES", test_date)
+            month_code = FUTURES_MONTH_CODES[month]
+            expected = asset._build_contract_variants(f"ES{month_code}", year)[2]
             contract = asset.resolve_continuous_futures_contract(reference_date=test_date)
             self.assertEqual(
                 contract,
-                expected_contract,
-                f"Date {test_date.strftime('%Y-%m-%d')} should resolve to {expected_contract}, got {contract}",
+                expected,
+                f"Date {test_date.strftime('%Y-%m-%d')} should resolve to {expected}, got {contract}",
             )

tests/test_data_polars_parity.py ADDED Viewed

@@ -0,0 +1,160 @@
+"""
+Regression test for Data vs DataPolars parity bug.
+This test isolates the issue where DataPolars returns 234 rows when asked for 2 rows
+with timeshift=-2 parameter.
+"""
+from datetime import datetime, timedelta, timezone
+import pandas as pd
+import polars as pl
+import pytest
+from lumibot.entities import Data, DataPolars, Asset
+def _create_mock_ohlc_data(start: datetime, periods: int = 300) -> pd.DataFrame:
+    """Create mock OHLC data for testing.
+    Args:
+        start: Starting datetime (must be timezone-aware)
+        periods: Number of minute bars to generate
+    Returns:
+        DataFrame with OHLC data indexed by timestamp
+    """
+    index = pd.date_range(start=start, periods=periods, freq="1min", tz=timezone.utc)
+    data = {
+        "open": [200 + i * 0.1 for i in range(periods)],
+        "high": [201 + i * 0.1 for i in range(periods)],
+        "low": [199 + i * 0.1 for i in range(periods)],
+        "close": [200.5 + i * 0.1 for i in range(periods)],
+        "volume": [10000 + i * 100 for i in range(periods)],
+    }
+    return pd.DataFrame(data, index=index)
+def test_data_polars_row_count_parity():
+    """
+    Test that Data and DataPolars return the same number of rows for identical requests.
+    This reproduces the bug where:
+    - Data.get_bars(length=2, timeshift=-2) returns 2 rows
+    - DataPolars.get_bars(length=2, timeshift=-2) returns 234 rows
+    """
+    # Create mock data starting at market open
+    start = datetime(2024, 7, 18, 9, 30, tzinfo=timezone.utc)
+    mock_df = _create_mock_ohlc_data(start, periods=300)
+    # Create asset
+    asset = Asset("HIMS", asset_type=Asset.AssetType.STOCK)
+    # Create Data instance (pandas mode)
+    data_pandas = Data(
+        asset=asset,
+        df=mock_df.copy(),
+        timestep="minute",
+        quote=asset,
+    )
+    # Create DataPolars instance (polars mode)
+    # Convert to polars format with datetime column
+    mock_df_reset = mock_df.reset_index()
+    mock_df_reset.columns = ["datetime", "open", "high", "low", "close", "volume"]
+    mock_polars = pl.from_pandas(mock_df_reset)
+    data_polars = DataPolars(
+        asset=asset,
+        df=mock_polars,
+        timestep="minute",
+        quote=asset,
+    )
+    # Test at a specific datetime (10:00 AM = 30 minutes after market open)
+    test_dt = datetime(2024, 7, 18, 10, 0, tzinfo=timezone.utc)
+    # Request 2 bars with timeshift=-2
+    # This should return bars at 09:58 and 09:59
+    # get_bars() returns DataFrames directly
+    df_pandas = data_pandas.get_bars(
+        dt=test_dt,
+        length=2,
+        timestep="minute",
+        timeshift=-2
+    )
+    df_polars = data_polars.get_bars(
+        dt=test_dt,
+        length=2,
+        timestep="minute",
+        timeshift=-2
+    )
+    # CRITICAL ASSERTIONS
+    assert len(df_pandas) == 2, f"Pandas should return 2 rows, got {len(df_pandas)}"
+    assert len(df_polars) == 2, f"Polars should return 2 rows, got {len(df_polars)}"
+    assert len(df_pandas) == len(df_polars), (
+        f"Row count mismatch! Pandas returned {len(df_pandas)} rows, "
+        f"Polars returned {len(df_polars)} rows"
+    )
+def test_data_polars_timeshift_timedelta():
+    """
+    Test timeshift parameter handling when passed as timedelta.
+    Tests the conversion of timedelta(minutes=-2) to integer offset.
+    """
+    start = datetime(2024, 7, 18, 9, 30, tzinfo=timezone.utc)
+    mock_df = _create_mock_ohlc_data(start, periods=300)
+    asset = Asset("HIMS", asset_type=Asset.AssetType.STOCK)
+    # Create Data instance
+    data_pandas = Data(
+        asset=asset,
+        df=mock_df.copy(),
+        timestep="minute",
+        quote=asset,
+    )
+    # Create DataPolars instance
+    mock_df_reset = mock_df.reset_index()
+    mock_df_reset.columns = ["datetime", "open", "high", "low", "close", "volume"]
+    mock_polars = pl.from_pandas(mock_df_reset)
+    data_polars = DataPolars(
+        asset=asset,
+        df=mock_polars,
+        timestep="minute",
+        quote=asset,
+    )
+    test_dt = datetime(2024, 7, 18, 10, 0, tzinfo=timezone.utc)
+    # Test with timedelta parameter (this is what the backtest engine uses)
+    timeshift_td = timedelta(minutes=-2)
+    # get_bars() returns DataFrames directly
+    df_pandas = data_pandas.get_bars(
+        dt=test_dt,
+        length=2,
+        timestep="minute",
+        timeshift=timeshift_td
+    )
+    df_polars = data_polars.get_bars(
+        dt=test_dt,
+        length=2,
+        timestep="minute",
+        timeshift=timeshift_td
+    )
+    assert len(df_pandas) == 2, f"Pandas should return 2 rows with timedelta timeshift"
+    assert len(df_polars) == 2, f"Polars should return 2 rows with timedelta timeshift"
+    assert len(df_pandas) == len(df_polars), "Row count mismatch with timedelta timeshift"
+if __name__ == "__main__":
+    # Run tests with verbose output
+    pytest.main([__file__, "-v", "-s"])

tests/test_databento_asset_validation.py CHANGED Viewed

@@ -2,6 +2,8 @@
 Tests for DataBento asset type validation
 """
 import pytest
+import pandas as pd
+import polars as pl
 from datetime import datetime, timedelta
 from unittest.mock import Mock, patch
@@ -26,8 +28,19 @@ class TestDataBentoAssetValidation:
         for asset in future_assets:
             # Should not raise an exception during validation
             # (We'll mock the actual API call)
-            with patch('lumibot.data_sources.databento_data.databento_helper.get_price_data_from_databento') as mock_get_data:
-                mock_get_data.return_value = Mock()
+            with patch(
+                'lumibot.data_sources.databento_data_pandas.databento_helper_polars.get_price_data_from_databento_polars'
+            ) as mock_get_data:
+                mock_get_data.return_value = pl.DataFrame(
+                    {
+                        "datetime": [datetime.now()],
+                        "open": [100.0],
+                        "high": [101.0],
+                        "low": [99.0],
+                        "close": [100.5],
+                        "volume": [1000],
+                    }
+                )
                 try:
                     data_source.get_historical_prices(asset, 10, "minute")
                     # If we get here, validation passed
@@ -49,9 +62,14 @@ class TestDataBentoAssetValidation:
             Asset("SPY", "stock"),  # string format
         ]
-        for asset in equity_assets:
-            with pytest.raises(ValueError, match="only supports futures assets"):
-                data_source.get_historical_prices(asset, 10, "minute")
+        with patch(
+            'lumibot.data_sources.databento_data_pandas.databento_helper_polars.get_price_data_from_databento_polars'
+        ) as mock_get_data:
+            for asset in equity_assets:
+                result = data_source.get_historical_prices(asset, 10, "minute")
+                assert result is None
+        mock_get_data.assert_not_called()
     def test_helper_function_allows_all_assets(self):
         """Test that helper function allows all asset types (validation is only in live data source)"""

tests/test_databento_backtesting.py CHANGED Viewed

@@ -4,7 +4,7 @@ from datetime import datetime, timedelta
 import pandas as pd
 import pytz
-from lumibot.backtesting.databento_backtesting import DataBentoDataBacktesting
+from lumibot.backtesting.databento_backtesting_pandas import DataBentoDataBacktestingPandas as DataBentoDataBacktesting
 from lumibot.entities import Asset, Data

lumibot 4.1.2__py3-none-any.whl → 4.2.0__py3-none-any.whl

Potentially problematic release.

lumibot 4.1.2py3-none-any.whl → 4.2.0py3-none-any.whl