lumibot 4.1.3__py3-none-any.whl → 4.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lumibot might be problematic. Click here for more details.
- lumibot/backtesting/__init__.py +19 -5
- lumibot/backtesting/backtesting_broker.py +98 -18
- lumibot/backtesting/databento_backtesting.py +5 -686
- lumibot/backtesting/databento_backtesting_pandas.py +738 -0
- lumibot/backtesting/databento_backtesting_polars.py +860 -546
- lumibot/backtesting/fix_debug.py +37 -0
- lumibot/backtesting/thetadata_backtesting.py +9 -355
- lumibot/backtesting/thetadata_backtesting_pandas.py +1178 -0
- lumibot/brokers/alpaca.py +8 -1
- lumibot/brokers/schwab.py +12 -2
- lumibot/credentials.py +13 -0
- lumibot/data_sources/__init__.py +5 -8
- lumibot/data_sources/data_source.py +6 -2
- lumibot/data_sources/data_source_backtesting.py +30 -0
- lumibot/data_sources/databento_data.py +5 -390
- lumibot/data_sources/databento_data_pandas.py +440 -0
- lumibot/data_sources/databento_data_polars.py +15 -9
- lumibot/data_sources/pandas_data.py +30 -17
- lumibot/data_sources/polars_data.py +986 -0
- lumibot/data_sources/polars_mixin.py +472 -96
- lumibot/data_sources/polygon_data_polars.py +5 -0
- lumibot/data_sources/yahoo_data.py +9 -2
- lumibot/data_sources/yahoo_data_polars.py +5 -0
- lumibot/entities/__init__.py +15 -0
- lumibot/entities/asset.py +5 -28
- lumibot/entities/bars.py +89 -20
- lumibot/entities/data.py +29 -6
- lumibot/entities/data_polars.py +668 -0
- lumibot/entities/position.py +38 -4
- lumibot/strategies/_strategy.py +2 -1
- lumibot/strategies/strategy.py +61 -49
- lumibot/tools/backtest_cache.py +284 -0
- lumibot/tools/databento_helper.py +35 -35
- lumibot/tools/databento_helper_polars.py +738 -775
- lumibot/tools/futures_roll.py +251 -0
- lumibot/tools/indicators.py +135 -104
- lumibot/tools/polars_utils.py +142 -0
- lumibot/tools/thetadata_helper.py +1068 -134
- {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/METADATA +9 -1
- {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/RECORD +71 -147
- tests/backtest/test_databento.py +37 -6
- tests/backtest/test_databento_comprehensive_trading.py +8 -4
- tests/backtest/test_databento_parity.py +4 -2
- tests/backtest/test_debug_avg_fill_price.py +1 -1
- tests/backtest/test_example_strategies.py +11 -1
- tests/backtest/test_futures_edge_cases.py +3 -3
- tests/backtest/test_futures_single_trade.py +2 -2
- tests/backtest/test_futures_ultra_simple.py +2 -2
- tests/backtest/test_polars_lru_eviction.py +470 -0
- tests/backtest/test_yahoo.py +42 -0
- tests/test_asset.py +4 -4
- tests/test_backtest_cache_manager.py +149 -0
- tests/test_backtesting_data_source_env.py +6 -0
- tests/test_continuous_futures_resolution.py +60 -48
- tests/test_data_polars_parity.py +160 -0
- tests/test_databento_asset_validation.py +23 -5
- tests/test_databento_backtesting.py +1 -1
- tests/test_databento_backtesting_polars.py +312 -192
- tests/test_databento_data.py +220 -463
- tests/test_databento_live.py +10 -10
- tests/test_futures_roll.py +38 -0
- tests/test_indicator_subplots.py +101 -0
- tests/test_market_infinite_loop_bug.py +77 -3
- tests/test_polars_resample.py +67 -0
- tests/test_polygon_helper.py +46 -0
- tests/test_thetadata_backwards_compat.py +97 -0
- tests/test_thetadata_helper.py +222 -23
- tests/test_thetadata_pandas_verification.py +186 -0
- lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/__pycache__/constants.cpython-312.pyc +0 -0
- lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
- lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
- lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
- lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
- {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/WHEEL +0 -0
- {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/licenses/LICENSE +0 -0
- {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,668 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from decimal import Decimal
|
|
3
|
+
from typing import Optional, Union
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import polars as pl
|
|
7
|
+
|
|
8
|
+
from lumibot.constants import LUMIBOT_DEFAULT_PYTZ as DEFAULT_PYTZ
|
|
9
|
+
from lumibot.tools.helpers import parse_timestep_qty_and_unit, to_datetime_aware
|
|
10
|
+
from lumibot.tools.lumibot_logger import get_logger
|
|
11
|
+
|
|
12
|
+
from .asset import Asset
|
|
13
|
+
from .dataline import Dataline
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
# Set the option to raise an error if downcasting is not possible (if available in this pandas version)
|
|
18
|
+
try:
|
|
19
|
+
pd.set_option('future.no_silent_downcasting', True)
|
|
20
|
+
except (pd._config.config.OptionError, AttributeError):
|
|
21
|
+
# Option not available in this pandas version, skip it
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DataPolars:
|
|
26
|
+
"""Input and manage Polars dataframes for backtesting.
|
|
27
|
+
|
|
28
|
+
This is a polars-optimized version of the Data class that stores data as polars
|
|
29
|
+
DataFrames internally and only converts to pandas when explicitly requested.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
asset : Asset Object
|
|
34
|
+
Asset to which this data is attached.
|
|
35
|
+
df : polars.DataFrame
|
|
36
|
+
Polars DataFrame containing OHLCV etc. trade data.
|
|
37
|
+
Must have a 'datetime' column with datetime type.
|
|
38
|
+
Other columns are strictly ["open", "high", "low", "close", "volume"]
|
|
39
|
+
quote : Asset Object
|
|
40
|
+
The quote asset for this data. If not provided, then the quote asset will default to USD.
|
|
41
|
+
date_start : Datetime or None
|
|
42
|
+
Starting date for this data, if not provided then first date in
|
|
43
|
+
the dataframe.
|
|
44
|
+
date_end : Datetime or None
|
|
45
|
+
Ending date for this data, if not provided then last date in
|
|
46
|
+
the dataframe.
|
|
47
|
+
trading_hours_start : datetime.time or None
|
|
48
|
+
If not supplied, then default is 0001 hrs.
|
|
49
|
+
trading_hours_end : datetime.time or None
|
|
50
|
+
If not supplied, then default is 2359 hrs.
|
|
51
|
+
timestep : str
|
|
52
|
+
Either "minute" (default) or "day"
|
|
53
|
+
localize_timezone : str or None
|
|
54
|
+
If not None, then localize the timezone of the dataframe to the
|
|
55
|
+
given timezone as a string. The values can be any supported by tz_localize,
|
|
56
|
+
e.g. "US/Eastern", "UTC", etc.
|
|
57
|
+
|
|
58
|
+
Attributes
|
|
59
|
+
----------
|
|
60
|
+
asset : Asset Object
|
|
61
|
+
Asset object to which this data is attached.
|
|
62
|
+
symbol : str
|
|
63
|
+
The underlying or stock symbol as a string.
|
|
64
|
+
polars_df : polars.DataFrame
|
|
65
|
+
Polars DataFrame containing OHLCV etc trade data.
|
|
66
|
+
Has a 'datetime' column with datetime type.
|
|
67
|
+
Other columns are strictly ["open", "high", "low", "close", "volume"]
|
|
68
|
+
df : pandas.DataFrame (property)
|
|
69
|
+
Pandas DataFrame conversion for compatibility.
|
|
70
|
+
This is computed on-demand and cached.
|
|
71
|
+
date_start : Datetime or None
|
|
72
|
+
Starting date for this data.
|
|
73
|
+
date_end : Datetime or None
|
|
74
|
+
Ending date for this data.
|
|
75
|
+
trading_hours_start : datetime.time or None
|
|
76
|
+
Trading hours start time.
|
|
77
|
+
trading_hours_end : datetime.time or None
|
|
78
|
+
Trading hours end time.
|
|
79
|
+
timestep : str
|
|
80
|
+
Either "minute" (default) or "day"
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
MIN_TIMESTEP = "minute"
|
|
84
|
+
TIMESTEP_MAPPING = [
|
|
85
|
+
{"timestep": "day", "representations": ["1D", "day"]},
|
|
86
|
+
{"timestep": "minute", "representations": ["1M", "minute"]},
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
def __init__(
|
|
90
|
+
self,
|
|
91
|
+
asset,
|
|
92
|
+
df,
|
|
93
|
+
date_start=None,
|
|
94
|
+
date_end=None,
|
|
95
|
+
trading_hours_start=datetime.time(0, 0),
|
|
96
|
+
trading_hours_end=datetime.time(23, 59),
|
|
97
|
+
timestep="minute",
|
|
98
|
+
quote=None,
|
|
99
|
+
timezone=None,
|
|
100
|
+
):
|
|
101
|
+
self.asset = asset
|
|
102
|
+
self.symbol = self.asset.symbol
|
|
103
|
+
|
|
104
|
+
if self.asset.asset_type == "crypto" and quote is None:
|
|
105
|
+
raise ValueError(
|
|
106
|
+
f"A crypto asset {self.symbol} was added to data without a corresponding"
|
|
107
|
+
f"`quote` asset. Please add the quote asset. For example, if trying to add "
|
|
108
|
+
f"`BTCUSD` to data, you would need to add `USD` as the quote asset."
|
|
109
|
+
f"Quote must be provided for crypto assets."
|
|
110
|
+
)
|
|
111
|
+
else:
|
|
112
|
+
self.quote = quote
|
|
113
|
+
|
|
114
|
+
# Throw an error if the quote is not an asset object
|
|
115
|
+
if self.quote is not None and not isinstance(self.quote, Asset):
|
|
116
|
+
raise ValueError(
|
|
117
|
+
f"The quote asset for DataPolars must be an Asset object. You provided a {type(self.quote)} object."
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if timestep not in ["minute", "day"]:
|
|
121
|
+
raise ValueError(
|
|
122
|
+
f"Timestep must be either 'minute' or 'day', the value you entered ({timestep}) is not currently supported."
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
self.timestep = timestep
|
|
126
|
+
|
|
127
|
+
# Store the polars DataFrame
|
|
128
|
+
self.polars_df = self._columns(df)
|
|
129
|
+
|
|
130
|
+
# Ensure datetime column exists and is properly typed
|
|
131
|
+
if "datetime" not in self.polars_df.columns:
|
|
132
|
+
raise ValueError("Polars DataFrame must have a 'datetime' column")
|
|
133
|
+
|
|
134
|
+
# Convert datetime column to proper type if needed
|
|
135
|
+
# CRITICAL: Preserve timezone if it already exists (e.g., UTC from DataBento)
|
|
136
|
+
dtype = self.polars_df.schema["datetime"]
|
|
137
|
+
if isinstance(dtype, pl.datatypes.Datetime) and dtype.time_zone:
|
|
138
|
+
# Column already has timezone, preserve it during cast
|
|
139
|
+
desired = pl.datatypes.Datetime(time_unit=dtype.time_unit, time_zone=dtype.time_zone)
|
|
140
|
+
self.polars_df = self.polars_df.with_columns(pl.col("datetime").cast(desired))
|
|
141
|
+
elif self.polars_df["datetime"].dtype != pl.Datetime:
|
|
142
|
+
# No timezone, cast to naive datetime
|
|
143
|
+
self.polars_df = self.polars_df.with_columns(
|
|
144
|
+
pl.col("datetime").cast(pl.Datetime(time_unit="ns"))
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Apply timezone if specified
|
|
148
|
+
if timezone is not None:
|
|
149
|
+
# For polars, we'll handle timezone in the pandas conversion
|
|
150
|
+
self._timezone = timezone
|
|
151
|
+
else:
|
|
152
|
+
self._timezone = None
|
|
153
|
+
|
|
154
|
+
# Set dates and times
|
|
155
|
+
self.polars_df = self.polars_df.sort("datetime")
|
|
156
|
+
|
|
157
|
+
self.trading_hours_start, self.trading_hours_end = self.set_times(trading_hours_start, trading_hours_end)
|
|
158
|
+
self.date_start, self.date_end = self.set_dates(date_start, date_end)
|
|
159
|
+
|
|
160
|
+
self.polars_df = self.trim_data(
|
|
161
|
+
self.polars_df,
|
|
162
|
+
self.date_start,
|
|
163
|
+
self.date_end,
|
|
164
|
+
self.trading_hours_start,
|
|
165
|
+
self.trading_hours_end,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Set datetime start and end from polars DataFrame
|
|
169
|
+
self.datetime_start = self.polars_df["datetime"][0]
|
|
170
|
+
self.datetime_end = self.polars_df["datetime"][-1]
|
|
171
|
+
|
|
172
|
+
# Convert polars datetime to pandas datetime for compatibility
|
|
173
|
+
if hasattr(self.datetime_start, 'to_pydatetime'):
|
|
174
|
+
self.datetime_start = self.datetime_start.to_pydatetime()
|
|
175
|
+
if hasattr(self.datetime_end, 'to_pydatetime'):
|
|
176
|
+
self.datetime_end = self.datetime_end.to_pydatetime()
|
|
177
|
+
|
|
178
|
+
# Cached pandas DataFrame (lazy conversion)
|
|
179
|
+
self._pandas_df = None
|
|
180
|
+
|
|
181
|
+
@property
|
|
182
|
+
def df(self):
|
|
183
|
+
"""Return pandas DataFrame for compatibility. Converts from polars on-demand."""
|
|
184
|
+
if self._pandas_df is None:
|
|
185
|
+
logger.debug(f"[CONVERSION] DataPolars.df | polars → pandas | {self.symbol}")
|
|
186
|
+
|
|
187
|
+
# Check if polars datetime has timezone
|
|
188
|
+
polars_tz = None
|
|
189
|
+
if "datetime" in self.polars_df.columns:
|
|
190
|
+
polars_tz = self.polars_df["datetime"].dtype.time_zone
|
|
191
|
+
|
|
192
|
+
# Convert polars to pandas and set datetime as index
|
|
193
|
+
self._pandas_df = self.polars_df.to_pandas()
|
|
194
|
+
|
|
195
|
+
if "datetime" in self._pandas_df.columns:
|
|
196
|
+
self._pandas_df.set_index("datetime", inplace=True)
|
|
197
|
+
|
|
198
|
+
# Apply timezone conversion: UTC → America/New_York
|
|
199
|
+
if self._timezone is not None:
|
|
200
|
+
# Explicit timezone parameter takes priority
|
|
201
|
+
if not self._pandas_df.index.tzinfo:
|
|
202
|
+
self._pandas_df.index = self._pandas_df.index.tz_localize(self._timezone)
|
|
203
|
+
else:
|
|
204
|
+
self._pandas_df.index = self._pandas_df.index.tz_convert(self._timezone)
|
|
205
|
+
elif polars_tz is not None:
|
|
206
|
+
# Polars had timezone (e.g., UTC from DataBento), convert to DEFAULT_PYTZ
|
|
207
|
+
if not self._pandas_df.index.tzinfo:
|
|
208
|
+
# Timezone lost during conversion, re-localize then convert
|
|
209
|
+
self._pandas_df.index = self._pandas_df.index.tz_localize(polars_tz)
|
|
210
|
+
self._pandas_df.index = self._pandas_df.index.tz_convert(DEFAULT_PYTZ)
|
|
211
|
+
elif str(self._pandas_df.index.tz) != str(DEFAULT_PYTZ):
|
|
212
|
+
# Timezone preserved, just convert
|
|
213
|
+
self._pandas_df.index = self._pandas_df.index.tz_convert(DEFAULT_PYTZ)
|
|
214
|
+
elif not self._pandas_df.index.tzinfo:
|
|
215
|
+
# No timezone info, localize to DEFAULT_PYTZ
|
|
216
|
+
self._pandas_df.index = self._pandas_df.index.tz_localize(DEFAULT_PYTZ)
|
|
217
|
+
elif str(self._pandas_df.index.tz) != str(DEFAULT_PYTZ):
|
|
218
|
+
# Different timezone, convert to DEFAULT_PYTZ
|
|
219
|
+
self._pandas_df.index = self._pandas_df.index.tz_convert(DEFAULT_PYTZ)
|
|
220
|
+
|
|
221
|
+
return self._pandas_df
|
|
222
|
+
|
|
223
|
+
def set_times(self, trading_hours_start, trading_hours_end):
|
|
224
|
+
"""Set the start and end times for the data. The default is 0001 hrs to 2359 hrs."""
|
|
225
|
+
if self.timestep == "minute":
|
|
226
|
+
ts = trading_hours_start
|
|
227
|
+
te = trading_hours_end
|
|
228
|
+
else:
|
|
229
|
+
ts = datetime.time(0, 0)
|
|
230
|
+
te = datetime.time(23, 59, 59, 999999)
|
|
231
|
+
return ts, te
|
|
232
|
+
|
|
233
|
+
def _columns(self, df):
|
|
234
|
+
"""Adjust column names to lower case."""
|
|
235
|
+
# Rename columns to lowercase if they match OHLCV
|
|
236
|
+
rename_map = {}
|
|
237
|
+
for col in df.columns:
|
|
238
|
+
if col.lower() in ["open", "high", "low", "close", "volume"]:
|
|
239
|
+
rename_map[col] = col.lower()
|
|
240
|
+
|
|
241
|
+
if rename_map:
|
|
242
|
+
df = df.rename(rename_map)
|
|
243
|
+
|
|
244
|
+
return df
|
|
245
|
+
|
|
246
|
+
def set_dates(self, date_start, date_end):
|
|
247
|
+
"""Set the start and end dates of the data."""
|
|
248
|
+
for dt in [date_start, date_end]:
|
|
249
|
+
if dt and not isinstance(dt, datetime.datetime):
|
|
250
|
+
raise TypeError(f"Start and End dates must be entered as full datetimes. {dt} was entered")
|
|
251
|
+
|
|
252
|
+
if not date_start:
|
|
253
|
+
date_start = self.polars_df["datetime"].min()
|
|
254
|
+
if hasattr(date_start, 'to_pydatetime'):
|
|
255
|
+
date_start = date_start.to_pydatetime()
|
|
256
|
+
if not date_end:
|
|
257
|
+
date_end = self.polars_df["datetime"].max()
|
|
258
|
+
if hasattr(date_end, 'to_pydatetime'):
|
|
259
|
+
date_end = date_end.to_pydatetime()
|
|
260
|
+
|
|
261
|
+
date_start = to_datetime_aware(date_start)
|
|
262
|
+
date_end = to_datetime_aware(date_end)
|
|
263
|
+
|
|
264
|
+
date_start = date_start.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
265
|
+
date_end = date_end.replace(hour=23, minute=59, second=59, microsecond=999999)
|
|
266
|
+
|
|
267
|
+
return date_start, date_end
|
|
268
|
+
|
|
269
|
+
def trim_data(self, df, date_start, date_end, trading_hours_start, trading_hours_end):
|
|
270
|
+
"""Trim the polars dataframe to match the desired backtesting dates."""
|
|
271
|
+
# Align date comparisons to polars datetime column timezone (matching pandas approach)
|
|
272
|
+
datetime_tz = df["datetime"].dtype.time_zone if "datetime" in df.columns else None
|
|
273
|
+
|
|
274
|
+
# Convert comparison timestamps to match column timezone
|
|
275
|
+
if datetime_tz is not None:
|
|
276
|
+
# Column has timezone, align dates to it
|
|
277
|
+
date_start_aligned = pd.Timestamp(date_start).tz_convert(datetime_tz) if hasattr(pd.Timestamp(date_start), 'tz_convert') else pd.Timestamp(date_start).tz_localize(datetime_tz)
|
|
278
|
+
date_end_aligned = pd.Timestamp(date_end).tz_convert(datetime_tz) if hasattr(pd.Timestamp(date_end), 'tz_convert') else pd.Timestamp(date_end).tz_localize(datetime_tz)
|
|
279
|
+
else:
|
|
280
|
+
# Column is naive, make dates naive too
|
|
281
|
+
date_start_aligned = pd.Timestamp(date_start).tz_localize(None) if hasattr(pd.Timestamp(date_start), 'tz') and pd.Timestamp(date_start).tz else pd.Timestamp(date_start)
|
|
282
|
+
date_end_aligned = pd.Timestamp(date_end).tz_localize(None) if hasattr(pd.Timestamp(date_end), 'tz') and pd.Timestamp(date_end).tz else pd.Timestamp(date_end)
|
|
283
|
+
|
|
284
|
+
# Filter by date range
|
|
285
|
+
df = df.filter(
|
|
286
|
+
(pl.col("datetime") >= date_start_aligned) & (pl.col("datetime") <= date_end_aligned)
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Filter by trading hours if minute data
|
|
290
|
+
if self.timestep == "minute":
|
|
291
|
+
df = df.filter(
|
|
292
|
+
(pl.col("datetime").dt.time() >= trading_hours_start) &
|
|
293
|
+
(pl.col("datetime").dt.time() <= trading_hours_end)
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
if df.height == 0:
|
|
297
|
+
raise ValueError(
|
|
298
|
+
f"When attempting to load a dataframe for {self.asset}, "
|
|
299
|
+
f"an empty dataframe was returned. This is likely due "
|
|
300
|
+
f"to your backtesting start and end dates not being "
|
|
301
|
+
f"within the start and end dates of the data provided. "
|
|
302
|
+
f"\nPlease check that at least one of your start "
|
|
303
|
+
f"or end dates for backtesting is within the range of "
|
|
304
|
+
f"your start and end dates for your data. "
|
|
305
|
+
)
|
|
306
|
+
return df
|
|
307
|
+
|
|
308
|
+
def repair_times_and_fill(self, idx):
|
|
309
|
+
"""Create datalines and fill missing values.
|
|
310
|
+
|
|
311
|
+
This converts to pandas for compatibility with the existing dataline system.
|
|
312
|
+
"""
|
|
313
|
+
# Get pandas DataFrame
|
|
314
|
+
df = self.df
|
|
315
|
+
|
|
316
|
+
# OPTIMIZATION: Use searchsorted instead of expensive boolean indexing
|
|
317
|
+
start_pos = idx.searchsorted(self.datetime_start, side='left')
|
|
318
|
+
end_pos = idx.searchsorted(self.datetime_end, side='right')
|
|
319
|
+
idx = idx[start_pos:end_pos]
|
|
320
|
+
|
|
321
|
+
# OPTIMIZATION: More efficient duplicate removal
|
|
322
|
+
if df.index.has_duplicates:
|
|
323
|
+
df = df[~df.index.duplicated(keep='first')]
|
|
324
|
+
|
|
325
|
+
# Reindex the DataFrame with the new index and forward-fill missing values.
|
|
326
|
+
df = df.reindex(idx, method="ffill")
|
|
327
|
+
|
|
328
|
+
# Check if we have a volume column, if not then add it and fill with 0 or NaN.
|
|
329
|
+
if "volume" in df.columns:
|
|
330
|
+
df.loc[df["volume"].isna(), "volume"] = 0
|
|
331
|
+
else:
|
|
332
|
+
df["volume"] = None
|
|
333
|
+
|
|
334
|
+
# OPTIMIZATION: More efficient column selection and forward fill
|
|
335
|
+
ohlc_cols = ["open", "high", "low"]
|
|
336
|
+
non_ohlc_cols = [col for col in df.columns if col not in ohlc_cols]
|
|
337
|
+
if non_ohlc_cols:
|
|
338
|
+
df[non_ohlc_cols] = df[non_ohlc_cols].ffill()
|
|
339
|
+
|
|
340
|
+
# If any of close, open, high, low columns are missing, add them with NaN.
|
|
341
|
+
for col in ["close", "open", "high", "low"]:
|
|
342
|
+
if col not in df.columns:
|
|
343
|
+
df[col] = None
|
|
344
|
+
|
|
345
|
+
# OPTIMIZATION: Vectorized NaN filling for OHLC columns
|
|
346
|
+
if "close" in df.columns:
|
|
347
|
+
for col in ["open", "high", "low"]:
|
|
348
|
+
if col in df.columns:
|
|
349
|
+
try:
|
|
350
|
+
# More efficient: compute mask once, use where
|
|
351
|
+
mask = df[col].isna()
|
|
352
|
+
if mask.any():
|
|
353
|
+
df[col] = df[col].where(~mask, df["close"])
|
|
354
|
+
except Exception as e:
|
|
355
|
+
logger.error(f"Error filling {col} column: {e}")
|
|
356
|
+
|
|
357
|
+
# Update the cached pandas DataFrame
|
|
358
|
+
self._pandas_df = df
|
|
359
|
+
|
|
360
|
+
# Set up iter_index and iter_index_dict for later use.
|
|
361
|
+
iter_index = pd.Series(df.index)
|
|
362
|
+
self.iter_index = pd.Series(iter_index.index, index=iter_index)
|
|
363
|
+
self.iter_index_dict = self.iter_index.to_dict()
|
|
364
|
+
|
|
365
|
+
# Populate the datalines dictionary.
|
|
366
|
+
self.datalines = dict()
|
|
367
|
+
self.to_datalines()
|
|
368
|
+
|
|
369
|
+
def to_datalines(self):
|
|
370
|
+
"""Create datalines from the pandas DataFrame."""
|
|
371
|
+
df = self.df
|
|
372
|
+
|
|
373
|
+
self.datalines.update(
|
|
374
|
+
{
|
|
375
|
+
"datetime": Dataline(
|
|
376
|
+
self.asset,
|
|
377
|
+
"datetime",
|
|
378
|
+
df.index.to_numpy(),
|
|
379
|
+
df.index.dtype,
|
|
380
|
+
)
|
|
381
|
+
}
|
|
382
|
+
)
|
|
383
|
+
self.datetime = self.datalines["datetime"].dataline
|
|
384
|
+
|
|
385
|
+
for column in df.columns:
|
|
386
|
+
self.datalines.update(
|
|
387
|
+
{
|
|
388
|
+
column: Dataline(
|
|
389
|
+
self.asset,
|
|
390
|
+
column,
|
|
391
|
+
df[column].to_numpy(),
|
|
392
|
+
df[column].dtype,
|
|
393
|
+
)
|
|
394
|
+
}
|
|
395
|
+
)
|
|
396
|
+
setattr(self, column, self.datalines[column].dataline)
|
|
397
|
+
|
|
398
|
+
def get_iter_count(self, dt):
|
|
399
|
+
"""Return the index location for a given datetime."""
|
|
400
|
+
i = None
|
|
401
|
+
|
|
402
|
+
# Check if we have the iter_index_dict, if not then repair the times and fill
|
|
403
|
+
if getattr(self, "iter_index_dict", None) is None:
|
|
404
|
+
self.repair_times_and_fill(self.df.index)
|
|
405
|
+
|
|
406
|
+
# Search for dt in self.iter_index_dict
|
|
407
|
+
if dt in self.iter_index_dict:
|
|
408
|
+
i = self.iter_index_dict[dt]
|
|
409
|
+
else:
|
|
410
|
+
# If not found, get the last known data
|
|
411
|
+
i = self.iter_index.asof(dt)
|
|
412
|
+
|
|
413
|
+
return i
|
|
414
|
+
|
|
415
|
+
def check_data(func):
|
|
416
|
+
"""Validates if the provided date, length, timeshift, and timestep will return data."""
|
|
417
|
+
def checker(self, *args, **kwargs):
|
|
418
|
+
if type(kwargs.get("length", 1)) not in [int, float]:
|
|
419
|
+
raise TypeError(f"Length must be an integer. {type(kwargs.get('length', 1))} was provided.")
|
|
420
|
+
|
|
421
|
+
dt = args[0]
|
|
422
|
+
|
|
423
|
+
# Check if the iter date is outside of this data's date range.
|
|
424
|
+
if dt < self.datetime_start:
|
|
425
|
+
raise ValueError(
|
|
426
|
+
f"The date you are looking for ({dt}) for ({self.asset}) is outside of the data's date range ({self.datetime_start} to {self.datetime_end}). This could be because the data for this asset does not exist for the date you are looking for, or something else."
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
# Search for dt in self.iter_index_dict
|
|
430
|
+
if getattr(self, "iter_index_dict", None) is None:
|
|
431
|
+
self.repair_times_and_fill(self.df.index)
|
|
432
|
+
|
|
433
|
+
if dt in self.iter_index_dict:
|
|
434
|
+
i = self.iter_index_dict[dt]
|
|
435
|
+
else:
|
|
436
|
+
# If not found, get the last known data
|
|
437
|
+
i = self.iter_index.asof(dt)
|
|
438
|
+
|
|
439
|
+
length = kwargs.get("length", 1)
|
|
440
|
+
timeshift = kwargs.get("timeshift", 0)
|
|
441
|
+
# Convert timeshift to integer if it's a timedelta
|
|
442
|
+
if isinstance(timeshift, datetime.timedelta):
|
|
443
|
+
timestep = kwargs.get("timestep", self.timestep)
|
|
444
|
+
if timestep == "day":
|
|
445
|
+
timeshift = timeshift.days
|
|
446
|
+
else: # minute
|
|
447
|
+
timeshift = int(timeshift.total_seconds() / 60)
|
|
448
|
+
data_index = i + 1 - length - timeshift
|
|
449
|
+
is_data = data_index >= 0
|
|
450
|
+
if not is_data:
|
|
451
|
+
logger.warning(
|
|
452
|
+
f"The date you are looking for ({dt}) is outside of the data's date range ({self.datetime_start} to {self.datetime_end}) after accounting for a length of {kwargs.get('length', 1)} and a timeshift of {kwargs.get('timeshift', 0)}. Keep in mind that the length you are requesting must also be available in your data, in this case we are {data_index} rows away from the data you need."
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
res = func(self, *args, **kwargs)
|
|
456
|
+
return res
|
|
457
|
+
|
|
458
|
+
return checker
|
|
459
|
+
|
|
460
|
+
@check_data
|
|
461
|
+
def get_last_price(self, dt, length=1, timeshift=0) -> Union[float, Decimal, None]:
|
|
462
|
+
"""Returns the last known price of the data."""
|
|
463
|
+
iter_count = self.get_iter_count(dt)
|
|
464
|
+
open_price = self.datalines["open"].dataline[iter_count]
|
|
465
|
+
close_price = self.datalines["close"].dataline[iter_count]
|
|
466
|
+
price = close_price if dt > self.datalines["datetime"].dataline[iter_count] else open_price
|
|
467
|
+
return price
|
|
468
|
+
|
|
469
|
+
@check_data
|
|
470
|
+
def get_quote(self, dt, length=1, timeshift=0):
|
|
471
|
+
"""Returns the last known quote data."""
|
|
472
|
+
required_price_cols = ["open", "high", "low", "close", "volume"]
|
|
473
|
+
missing_price_cols = [col for col in required_price_cols if col not in self.datalines]
|
|
474
|
+
if missing_price_cols:
|
|
475
|
+
logger.warning(
|
|
476
|
+
"DataPolars object %s is missing price columns %s required for quote retrieval.",
|
|
477
|
+
self.asset,
|
|
478
|
+
missing_price_cols,
|
|
479
|
+
)
|
|
480
|
+
return {}
|
|
481
|
+
|
|
482
|
+
quote_fields = {
|
|
483
|
+
"open": ("open", 2),
|
|
484
|
+
"high": ("high", 2),
|
|
485
|
+
"low": ("low", 2),
|
|
486
|
+
"close": ("close", 2),
|
|
487
|
+
"volume": ("volume", 0),
|
|
488
|
+
"bid": ("bid", 2),
|
|
489
|
+
"ask": ("ask", 2),
|
|
490
|
+
"bid_size": ("bid_size", 0),
|
|
491
|
+
"bid_condition": ("bid_condition", 0),
|
|
492
|
+
"bid_exchange": ("bid_exchange", 0),
|
|
493
|
+
"ask_size": ("ask_size", 0),
|
|
494
|
+
"ask_condition": ("ask_condition", 0),
|
|
495
|
+
"ask_exchange": ("ask_exchange", 0),
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
missing_quote_cols = [
|
|
499
|
+
col for col in ["bid", "ask", "bid_size", "ask_size", "bid_condition", "ask_condition",
|
|
500
|
+
"bid_exchange", "ask_exchange"]
|
|
501
|
+
if col not in self.datalines
|
|
502
|
+
]
|
|
503
|
+
if missing_quote_cols:
|
|
504
|
+
logger.warning(
|
|
505
|
+
"DataPolars object %s is missing quote columns %s; returning None for those values.",
|
|
506
|
+
self.asset,
|
|
507
|
+
missing_quote_cols,
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
iter_count = self.get_iter_count(dt)
|
|
511
|
+
|
|
512
|
+
def _get_value(column: str, round_digits: Optional[int]):
|
|
513
|
+
if column not in self.datalines:
|
|
514
|
+
return None
|
|
515
|
+
value = self.datalines[column].dataline[iter_count]
|
|
516
|
+
try:
|
|
517
|
+
if round_digits is None:
|
|
518
|
+
return value
|
|
519
|
+
return round(value, round_digits)
|
|
520
|
+
except TypeError:
|
|
521
|
+
return value
|
|
522
|
+
|
|
523
|
+
quote_dict = {
|
|
524
|
+
name: _get_value(column, digits) for name, (column, digits) in quote_fields.items()
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
return quote_dict
|
|
528
|
+
|
|
529
|
+
@check_data
|
|
530
|
+
def _get_bars_dict(self, dt, length=1, timestep=None, timeshift=0):
|
|
531
|
+
"""Returns a dictionary of the data."""
|
|
532
|
+
# Convert timeshift to integer if it's a timedelta
|
|
533
|
+
if isinstance(timeshift, datetime.timedelta):
|
|
534
|
+
logger.debug(f"[TIMESHIFT_CONVERT] asset={self.symbol} input_timeshift={timeshift} type={type(timeshift)} repr={repr(timeshift)}")
|
|
535
|
+
ts = timestep if timestep is not None else self.timestep
|
|
536
|
+
if ts == "day":
|
|
537
|
+
timeshift_converted = int(timeshift.total_seconds() / (24 * 3600))
|
|
538
|
+
logger.debug(f"[TIMESHIFT_CONVERT] asset={self.symbol} timestep=day total_seconds={timeshift.total_seconds()} converted={timeshift_converted}")
|
|
539
|
+
timeshift = timeshift_converted
|
|
540
|
+
else: # minute
|
|
541
|
+
timeshift_converted = int(timeshift.total_seconds() / 60)
|
|
542
|
+
logger.debug(f"[TIMESHIFT_CONVERT] asset={self.symbol} timestep=minute total_seconds={timeshift.total_seconds()} converted={timeshift_converted}")
|
|
543
|
+
timeshift = timeshift_converted
|
|
544
|
+
|
|
545
|
+
# Get bars.
|
|
546
|
+
end_row = self.get_iter_count(dt) - timeshift
|
|
547
|
+
start_row = end_row - length
|
|
548
|
+
|
|
549
|
+
if start_row < 0:
|
|
550
|
+
start_row = 0
|
|
551
|
+
|
|
552
|
+
# Cast both start_row and end_row to int
|
|
553
|
+
start_row = int(start_row)
|
|
554
|
+
end_row = int(end_row)
|
|
555
|
+
|
|
556
|
+
dict = {}
|
|
557
|
+
for dl_name, dl in self.datalines.items():
|
|
558
|
+
dict[dl_name] = dl.dataline[start_row:end_row]
|
|
559
|
+
|
|
560
|
+
return dict
|
|
561
|
+
|
|
562
|
+
def _get_bars_between_dates_dict(self, timestep=None, start_date=None, end_date=None):
|
|
563
|
+
"""Returns a dictionary of all the data available between the start and end dates."""
|
|
564
|
+
end_row = self.get_iter_count(end_date)
|
|
565
|
+
start_row = self.get_iter_count(start_date)
|
|
566
|
+
|
|
567
|
+
if start_row < 0:
|
|
568
|
+
start_row = 0
|
|
569
|
+
|
|
570
|
+
# Cast both start_row and end_row to int
|
|
571
|
+
start_row = int(start_row)
|
|
572
|
+
end_row = int(end_row)
|
|
573
|
+
|
|
574
|
+
dict = {}
|
|
575
|
+
for dl_name, dl in self.datalines.items():
|
|
576
|
+
dict[dl_name] = dl.dataline[start_row:end_row]
|
|
577
|
+
|
|
578
|
+
return dict
|
|
579
|
+
|
|
580
|
+
def get_bars(self, dt, length=1, timestep=MIN_TIMESTEP, timeshift=0):
|
|
581
|
+
"""Returns a dataframe of the data."""
|
|
582
|
+
# Parse the timestep
|
|
583
|
+
quantity, timestep = parse_timestep_qty_and_unit(timestep)
|
|
584
|
+
num_periods = length
|
|
585
|
+
|
|
586
|
+
if timestep == "minute" and self.timestep == "day":
|
|
587
|
+
raise ValueError("You are requesting minute data from a daily data source. This is not supported.")
|
|
588
|
+
|
|
589
|
+
if timestep != "minute" and timestep != "day":
|
|
590
|
+
raise ValueError(f"Only minute and day are supported for timestep. You provided: {timestep}")
|
|
591
|
+
|
|
592
|
+
agg_column_map = {
|
|
593
|
+
"open": "first",
|
|
594
|
+
"high": "max",
|
|
595
|
+
"low": "min",
|
|
596
|
+
"close": "last",
|
|
597
|
+
"volume": "sum",
|
|
598
|
+
}
|
|
599
|
+
if timestep == "day" and self.timestep == "minute":
|
|
600
|
+
length = length * 1440
|
|
601
|
+
unit = "D"
|
|
602
|
+
data = self._get_bars_dict(dt, length=length, timestep="minute", timeshift=timeshift)
|
|
603
|
+
|
|
604
|
+
elif timestep == 'day' and self.timestep == 'day':
|
|
605
|
+
unit = "D"
|
|
606
|
+
data = self._get_bars_dict(dt, length=length, timestep=timestep, timeshift=timeshift)
|
|
607
|
+
|
|
608
|
+
else:
|
|
609
|
+
unit = "min"
|
|
610
|
+
length = length * quantity
|
|
611
|
+
data = self._get_bars_dict(dt, length=length, timestep=timestep, timeshift=timeshift)
|
|
612
|
+
|
|
613
|
+
if data is None:
|
|
614
|
+
return None
|
|
615
|
+
|
|
616
|
+
df = pd.DataFrame(data).assign(datetime=lambda df: pd.to_datetime(df['datetime'])).set_index('datetime')
|
|
617
|
+
if "dividend" in df.columns:
|
|
618
|
+
agg_column_map["dividend"] = "sum"
|
|
619
|
+
df_result = df.resample(f"{quantity}{unit}").agg(agg_column_map)
|
|
620
|
+
|
|
621
|
+
# Drop any rows that have NaN values
|
|
622
|
+
df_result = df_result.dropna()
|
|
623
|
+
|
|
624
|
+
# Remove partial day data from the current day
|
|
625
|
+
if timestep == "day" and self.timestep == "minute":
|
|
626
|
+
df_result = df_result[df_result.index < dt.replace(hour=0, minute=0, second=0, microsecond=0)]
|
|
627
|
+
|
|
628
|
+
# Only return the last n rows
|
|
629
|
+
df_result = df_result.tail(n=int(num_periods))
|
|
630
|
+
|
|
631
|
+
return df_result
|
|
632
|
+
|
|
633
|
+
def get_bars_between_dates(self, timestep=MIN_TIMESTEP, exchange=None, start_date=None, end_date=None):
|
|
634
|
+
"""Returns a dataframe of all the data available between the start and end dates."""
|
|
635
|
+
if timestep == "minute" and self.timestep == "day":
|
|
636
|
+
raise ValueError("You are requesting minute data from a daily data source. This is not supported.")
|
|
637
|
+
|
|
638
|
+
if timestep != "minute" and timestep != "day":
|
|
639
|
+
raise ValueError(f"Only minute and day are supported for timestep. You provided: {timestep}")
|
|
640
|
+
|
|
641
|
+
if timestep == "day" and self.timestep == "minute":
|
|
642
|
+
dict = self._get_bars_between_dates_dict(timestep=timestep, start_date=start_date, end_date=end_date)
|
|
643
|
+
|
|
644
|
+
if dict is None:
|
|
645
|
+
return None
|
|
646
|
+
|
|
647
|
+
df = pd.DataFrame(dict).set_index("datetime")
|
|
648
|
+
|
|
649
|
+
df_result = df.resample("D").agg(
|
|
650
|
+
{
|
|
651
|
+
"open": "first",
|
|
652
|
+
"high": "max",
|
|
653
|
+
"low": "min",
|
|
654
|
+
"close": "last",
|
|
655
|
+
"volume": "sum",
|
|
656
|
+
}
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
return df_result
|
|
660
|
+
|
|
661
|
+
else:
|
|
662
|
+
dict = self._get_bars_between_dates_dict(timestep=timestep, start_date=start_date, end_date=end_date)
|
|
663
|
+
|
|
664
|
+
if dict is None:
|
|
665
|
+
return None
|
|
666
|
+
|
|
667
|
+
df = pd.DataFrame(dict).set_index("datetime")
|
|
668
|
+
return df
|