investing-algorithm-framework 6.9.1__py3-none-any.whl → 7.19.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of investing-algorithm-framework might be problematic. Click here for more details.
- investing_algorithm_framework/__init__.py +147 -44
- investing_algorithm_framework/app/__init__.py +23 -6
- investing_algorithm_framework/app/algorithm/algorithm.py +5 -41
- investing_algorithm_framework/app/algorithm/algorithm_factory.py +17 -10
- investing_algorithm_framework/app/analysis/__init__.py +15 -0
- investing_algorithm_framework/app/analysis/backtest_data_ranges.py +121 -0
- investing_algorithm_framework/app/analysis/backtest_utils.py +107 -0
- investing_algorithm_framework/app/analysis/permutation.py +116 -0
- investing_algorithm_framework/app/analysis/ranking.py +297 -0
- investing_algorithm_framework/app/app.py +1322 -707
- investing_algorithm_framework/app/context.py +196 -88
- investing_algorithm_framework/app/eventloop.py +590 -0
- investing_algorithm_framework/app/reporting/__init__.py +16 -5
- investing_algorithm_framework/app/reporting/ascii.py +57 -202
- investing_algorithm_framework/app/reporting/backtest_report.py +284 -170
- investing_algorithm_framework/app/reporting/charts/__init__.py +10 -2
- investing_algorithm_framework/app/reporting/charts/entry_exist_signals.py +66 -0
- investing_algorithm_framework/app/reporting/charts/equity_curve.py +37 -0
- investing_algorithm_framework/app/reporting/charts/equity_curve_drawdown.py +11 -26
- investing_algorithm_framework/app/reporting/charts/line_chart.py +11 -0
- investing_algorithm_framework/app/reporting/charts/ohlcv_data_completeness.py +51 -0
- investing_algorithm_framework/app/reporting/charts/rolling_sharp_ratio.py +1 -1
- investing_algorithm_framework/app/reporting/generate.py +100 -114
- investing_algorithm_framework/app/reporting/tables/key_metrics_table.py +40 -32
- investing_algorithm_framework/app/reporting/tables/time_metrics_table.py +34 -27
- investing_algorithm_framework/app/reporting/tables/trade_metrics_table.py +23 -19
- investing_algorithm_framework/app/reporting/tables/trades_table.py +1 -1
- investing_algorithm_framework/app/reporting/tables/utils.py +1 -0
- investing_algorithm_framework/app/reporting/templates/report_template.html.j2 +10 -16
- investing_algorithm_framework/app/strategy.py +315 -175
- investing_algorithm_framework/app/task.py +5 -3
- investing_algorithm_framework/cli/cli.py +30 -12
- investing_algorithm_framework/cli/deploy_to_aws_lambda.py +131 -34
- investing_algorithm_framework/cli/initialize_app.py +20 -1
- investing_algorithm_framework/cli/templates/app_aws_lambda_function.py.template +18 -6
- investing_algorithm_framework/cli/templates/aws_lambda_dockerfile.template +22 -0
- investing_algorithm_framework/cli/templates/aws_lambda_dockerignore.template +92 -0
- investing_algorithm_framework/cli/templates/aws_lambda_requirements.txt.template +2 -2
- investing_algorithm_framework/cli/templates/azure_function_requirements.txt.template +1 -1
- investing_algorithm_framework/create_app.py +3 -5
- investing_algorithm_framework/dependency_container.py +25 -39
- investing_algorithm_framework/domain/__init__.py +45 -38
- investing_algorithm_framework/domain/backtesting/__init__.py +21 -0
- investing_algorithm_framework/domain/backtesting/backtest.py +503 -0
- investing_algorithm_framework/domain/backtesting/backtest_date_range.py +96 -0
- investing_algorithm_framework/domain/backtesting/backtest_evaluation_focuss.py +242 -0
- investing_algorithm_framework/domain/backtesting/backtest_metrics.py +459 -0
- investing_algorithm_framework/domain/backtesting/backtest_permutation_test.py +275 -0
- investing_algorithm_framework/domain/backtesting/backtest_run.py +605 -0
- investing_algorithm_framework/domain/backtesting/backtest_summary_metrics.py +162 -0
- investing_algorithm_framework/domain/backtesting/combine_backtests.py +280 -0
- investing_algorithm_framework/domain/config.py +27 -0
- investing_algorithm_framework/domain/constants.py +6 -34
- investing_algorithm_framework/domain/data_provider.py +200 -56
- investing_algorithm_framework/domain/exceptions.py +34 -1
- investing_algorithm_framework/domain/models/__init__.py +10 -19
- investing_algorithm_framework/domain/models/base_model.py +0 -6
- investing_algorithm_framework/domain/models/data/__init__.py +7 -0
- investing_algorithm_framework/domain/models/data/data_source.py +214 -0
- investing_algorithm_framework/domain/models/{market_data_type.py → data/data_type.py} +7 -7
- investing_algorithm_framework/domain/models/market/market_credential.py +6 -0
- investing_algorithm_framework/domain/models/order/order.py +34 -13
- investing_algorithm_framework/domain/models/order/order_status.py +1 -1
- investing_algorithm_framework/domain/models/order/order_type.py +1 -1
- investing_algorithm_framework/domain/models/portfolio/portfolio.py +14 -1
- investing_algorithm_framework/domain/models/portfolio/portfolio_configuration.py +5 -1
- investing_algorithm_framework/domain/models/portfolio/portfolio_snapshot.py +51 -11
- investing_algorithm_framework/domain/models/position/__init__.py +2 -1
- investing_algorithm_framework/domain/models/position/position.py +9 -0
- investing_algorithm_framework/domain/models/position/position_size.py +41 -0
- investing_algorithm_framework/domain/models/risk_rules/__init__.py +7 -0
- investing_algorithm_framework/domain/models/risk_rules/stop_loss_rule.py +51 -0
- investing_algorithm_framework/domain/models/risk_rules/take_profit_rule.py +55 -0
- investing_algorithm_framework/domain/models/snapshot_interval.py +0 -1
- investing_algorithm_framework/domain/models/strategy_profile.py +19 -151
- investing_algorithm_framework/domain/models/time_frame.py +7 -0
- investing_algorithm_framework/domain/models/time_interval.py +33 -0
- investing_algorithm_framework/domain/models/time_unit.py +63 -1
- investing_algorithm_framework/domain/models/trade/__init__.py +0 -2
- investing_algorithm_framework/domain/models/trade/trade.py +56 -32
- investing_algorithm_framework/domain/models/trade/trade_status.py +8 -2
- investing_algorithm_framework/domain/models/trade/trade_stop_loss.py +106 -41
- investing_algorithm_framework/domain/models/trade/trade_take_profit.py +161 -99
- investing_algorithm_framework/domain/order_executor.py +19 -0
- investing_algorithm_framework/domain/portfolio_provider.py +20 -1
- investing_algorithm_framework/domain/services/__init__.py +0 -13
- investing_algorithm_framework/domain/strategy.py +1 -29
- investing_algorithm_framework/domain/utils/__init__.py +5 -1
- investing_algorithm_framework/domain/utils/custom_tqdm.py +22 -0
- investing_algorithm_framework/domain/utils/jupyter_notebook_detection.py +19 -0
- investing_algorithm_framework/domain/utils/polars.py +17 -14
- investing_algorithm_framework/download_data.py +40 -10
- investing_algorithm_framework/infrastructure/__init__.py +13 -25
- investing_algorithm_framework/infrastructure/data_providers/__init__.py +7 -4
- investing_algorithm_framework/infrastructure/data_providers/ccxt.py +811 -546
- investing_algorithm_framework/infrastructure/data_providers/csv.py +433 -122
- investing_algorithm_framework/infrastructure/data_providers/pandas.py +599 -0
- investing_algorithm_framework/infrastructure/database/__init__.py +6 -2
- investing_algorithm_framework/infrastructure/database/sql_alchemy.py +81 -0
- investing_algorithm_framework/infrastructure/models/__init__.py +0 -13
- investing_algorithm_framework/infrastructure/models/order/order.py +9 -3
- investing_algorithm_framework/infrastructure/models/trades/trade_stop_loss.py +27 -8
- investing_algorithm_framework/infrastructure/models/trades/trade_take_profit.py +21 -7
- investing_algorithm_framework/infrastructure/order_executors/__init__.py +2 -0
- investing_algorithm_framework/infrastructure/order_executors/backtest_oder_executor.py +28 -0
- investing_algorithm_framework/infrastructure/repositories/repository.py +16 -2
- investing_algorithm_framework/infrastructure/repositories/trade_repository.py +2 -2
- investing_algorithm_framework/infrastructure/repositories/trade_stop_loss_repository.py +6 -0
- investing_algorithm_framework/infrastructure/repositories/trade_take_profit_repository.py +6 -0
- investing_algorithm_framework/infrastructure/services/__init__.py +0 -4
- investing_algorithm_framework/services/__init__.py +105 -8
- investing_algorithm_framework/services/backtesting/backtest_service.py +536 -476
- investing_algorithm_framework/services/configuration_service.py +14 -4
- investing_algorithm_framework/services/data_providers/__init__.py +5 -0
- investing_algorithm_framework/services/data_providers/data_provider_service.py +850 -0
- investing_algorithm_framework/{app/reporting → services}/metrics/__init__.py +48 -17
- investing_algorithm_framework/{app/reporting → services}/metrics/drawdown.py +10 -10
- investing_algorithm_framework/{app/reporting → services}/metrics/equity_curve.py +2 -2
- investing_algorithm_framework/{app/reporting → services}/metrics/exposure.py +60 -2
- investing_algorithm_framework/services/metrics/generate.py +358 -0
- investing_algorithm_framework/{app/reporting → services}/metrics/profit_factor.py +36 -0
- investing_algorithm_framework/{app/reporting → services}/metrics/recovery.py +2 -2
- investing_algorithm_framework/{app/reporting → services}/metrics/returns.py +146 -147
- investing_algorithm_framework/services/metrics/risk_free_rate.py +28 -0
- investing_algorithm_framework/{app/reporting/metrics/sharp_ratio.py → services/metrics/sharpe_ratio.py} +6 -10
- investing_algorithm_framework/{app/reporting → services}/metrics/sortino_ratio.py +3 -7
- investing_algorithm_framework/services/metrics/trades.py +500 -0
- investing_algorithm_framework/services/metrics/volatility.py +97 -0
- investing_algorithm_framework/{app/reporting → services}/metrics/win_rate.py +70 -3
- investing_algorithm_framework/services/order_service/order_backtest_service.py +21 -31
- investing_algorithm_framework/services/order_service/order_service.py +9 -71
- investing_algorithm_framework/services/portfolios/portfolio_provider_lookup.py +0 -2
- investing_algorithm_framework/services/portfolios/portfolio_service.py +3 -13
- investing_algorithm_framework/services/portfolios/portfolio_snapshot_service.py +62 -96
- investing_algorithm_framework/services/portfolios/portfolio_sync_service.py +0 -3
- investing_algorithm_framework/services/repository_service.py +5 -2
- investing_algorithm_framework/services/trade_order_evaluator/__init__.py +9 -0
- investing_algorithm_framework/services/trade_order_evaluator/backtest_trade_oder_evaluator.py +113 -0
- investing_algorithm_framework/services/trade_order_evaluator/default_trade_order_evaluator.py +51 -0
- investing_algorithm_framework/services/trade_order_evaluator/trade_order_evaluator.py +80 -0
- investing_algorithm_framework/services/trade_service/__init__.py +7 -1
- investing_algorithm_framework/services/trade_service/trade_service.py +51 -29
- investing_algorithm_framework/services/trade_service/trade_stop_loss_service.py +39 -0
- investing_algorithm_framework/services/trade_service/trade_take_profit_service.py +41 -0
- investing_algorithm_framework-7.19.15.dist-info/METADATA +537 -0
- {investing_algorithm_framework-6.9.1.dist-info → investing_algorithm_framework-7.19.15.dist-info}/RECORD +159 -148
- investing_algorithm_framework/app/reporting/evaluation.py +0 -243
- investing_algorithm_framework/app/reporting/metrics/risk_free_rate.py +0 -8
- investing_algorithm_framework/app/reporting/metrics/volatility.py +0 -69
- investing_algorithm_framework/cli/templates/requirements_azure_function.txt.template +0 -3
- investing_algorithm_framework/domain/models/backtesting/__init__.py +0 -9
- investing_algorithm_framework/domain/models/backtesting/backtest_date_range.py +0 -47
- investing_algorithm_framework/domain/models/backtesting/backtest_position.py +0 -120
- investing_algorithm_framework/domain/models/backtesting/backtest_reports_evaluation.py +0 -0
- investing_algorithm_framework/domain/models/backtesting/backtest_results.py +0 -440
- investing_algorithm_framework/domain/models/data_source.py +0 -21
- investing_algorithm_framework/domain/models/date_range.py +0 -64
- investing_algorithm_framework/domain/models/trade/trade_risk_type.py +0 -34
- investing_algorithm_framework/domain/models/trading_data_types.py +0 -48
- investing_algorithm_framework/domain/models/trading_time_frame.py +0 -223
- investing_algorithm_framework/domain/services/market_data_sources.py +0 -543
- investing_algorithm_framework/domain/services/market_service.py +0 -153
- investing_algorithm_framework/domain/services/observable.py +0 -51
- investing_algorithm_framework/domain/services/observer.py +0 -19
- investing_algorithm_framework/infrastructure/models/market_data_sources/__init__.py +0 -16
- investing_algorithm_framework/infrastructure/models/market_data_sources/ccxt.py +0 -746
- investing_algorithm_framework/infrastructure/models/market_data_sources/csv.py +0 -270
- investing_algorithm_framework/infrastructure/models/market_data_sources/pandas.py +0 -312
- investing_algorithm_framework/infrastructure/services/market_service/__init__.py +0 -5
- investing_algorithm_framework/infrastructure/services/market_service/ccxt_market_service.py +0 -471
- investing_algorithm_framework/infrastructure/services/performance_service/__init__.py +0 -7
- investing_algorithm_framework/infrastructure/services/performance_service/backtest_performance_service.py +0 -2
- investing_algorithm_framework/infrastructure/services/performance_service/performance_service.py +0 -322
- investing_algorithm_framework/services/market_data_source_service/__init__.py +0 -10
- investing_algorithm_framework/services/market_data_source_service/backtest_market_data_source_service.py +0 -269
- investing_algorithm_framework/services/market_data_source_service/data_provider_service.py +0 -350
- investing_algorithm_framework/services/market_data_source_service/market_data_source_service.py +0 -377
- investing_algorithm_framework/services/strategy_orchestrator_service.py +0 -296
- investing_algorithm_framework-6.9.1.dist-info/METADATA +0 -440
- /investing_algorithm_framework/{app/reporting → services}/metrics/alpha.py +0 -0
- /investing_algorithm_framework/{app/reporting → services}/metrics/beta.py +0 -0
- /investing_algorithm_framework/{app/reporting → services}/metrics/cagr.py +0 -0
- /investing_algorithm_framework/{app/reporting → services}/metrics/calmar_ratio.py +0 -0
- /investing_algorithm_framework/{app/reporting → services}/metrics/mean_daily_return.py +0 -0
- /investing_algorithm_framework/{app/reporting → services}/metrics/price_efficiency.py +0 -0
- /investing_algorithm_framework/{app/reporting → services}/metrics/standard_deviation.py +0 -0
- /investing_algorithm_framework/{app/reporting → services}/metrics/treynor_ratio.py +0 -0
- /investing_algorithm_framework/{app/reporting → services}/metrics/ulcer.py +0 -0
- /investing_algorithm_framework/{app/reporting → services}/metrics/value_at_risk.py +0 -0
- {investing_algorithm_framework-6.9.1.dist-info → investing_algorithm_framework-7.19.15.dist-info}/LICENSE +0 -0
- {investing_algorithm_framework-6.9.1.dist-info → investing_algorithm_framework-7.19.15.dist-info}/WHEEL +0 -0
- {investing_algorithm_framework-6.9.1.dist-info → investing_algorithm_framework-7.19.15.dist-info}/entry_points.txt +0 -0
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
import os.path
|
|
3
3
|
from datetime import datetime, timedelta, timezone
|
|
4
4
|
from time import sleep
|
|
5
|
-
from typing import Union
|
|
5
|
+
from typing import Union, List
|
|
6
6
|
|
|
7
7
|
import ccxt
|
|
8
8
|
import pandas as pd
|
|
@@ -10,119 +10,148 @@ import polars as pl
|
|
|
10
10
|
from dateutil import parser
|
|
11
11
|
|
|
12
12
|
from investing_algorithm_framework.domain import OperationalException, \
|
|
13
|
-
DATETIME_FORMAT, DataProvider,
|
|
14
|
-
NetworkError, TimeFrame, MarketCredential
|
|
13
|
+
DATETIME_FORMAT, DataProvider, convert_polars_to_pandas, \
|
|
14
|
+
NetworkError, TimeFrame, MarketCredential, DataType, DataSource, \
|
|
15
|
+
RESOURCE_DIRECTORY, CCXT_DATETIME_FORMAT, DATA_DIRECTORY, \
|
|
16
|
+
DATETIME_FORMAT_FILE_NAME
|
|
15
17
|
|
|
16
18
|
logger = logging.getLogger("investing_algorithm_framework")
|
|
17
19
|
|
|
18
20
|
|
|
19
|
-
class
|
|
21
|
+
class CCXTOHLCVDataProvider(DataProvider):
|
|
20
22
|
"""
|
|
23
|
+
Implementation of Data Provider for OHLCV data. OHLCV data
|
|
24
|
+
will be downloaded with the CCXT library.
|
|
25
|
+
|
|
26
|
+
If in backtest mode, and the data is already
|
|
27
|
+
available in the storage path, it will be loaded from there. If the
|
|
28
|
+
data is not available in the storage path, it will be fetched from the
|
|
29
|
+
CCXT library and saved to the storage path in csv format.
|
|
30
|
+
|
|
31
|
+
If the get_data method is called with a start and end date, the
|
|
32
|
+
data provider will look if the data is already available in the
|
|
33
|
+
storage directory. If this is the case, it will read the data
|
|
34
|
+
from the csv file and return it.
|
|
35
|
+
|
|
36
|
+
The CSV file should contain the following
|
|
37
|
+
columns: Datetime, Open, High, Low, Close, Volume.
|
|
38
|
+
The Datetime column should be in UTC timezone and in milliseconds.
|
|
39
|
+
The data will be loaded into a Polars DataFrame and will be kept in memory.
|
|
40
|
+
|
|
41
|
+
Attributes:
|
|
42
|
+
data_type (DataType): The type of data provided by this provider,
|
|
43
|
+
which is OHLCV.
|
|
44
|
+
data_provider_identifier (str): Identifier for the CSV OHLCV data
|
|
45
|
+
provider.
|
|
46
|
+
_start_date_data_source (datetime): The start date of the data
|
|
47
|
+
source, determined from the first row of the data.
|
|
48
|
+
_end_date_data_source (datetime): The end date of the data
|
|
49
|
+
source, determined from the last row of the data.
|
|
50
|
+
data (polars.DataFrame): The OHLCV data loaded from the CSV file when
|
|
51
|
+
in backtest mode.
|
|
21
52
|
"""
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
column_names = ["Datetime", "Open", "High", "Low", "Close", "Volume"]
|
|
53
|
+
data_type = DataType.OHLCV
|
|
54
|
+
data_provider_identifier = "ccxt_ohlcv_data_provider"
|
|
55
|
+
storage_directory = None
|
|
26
56
|
|
|
27
57
|
def __init__(
|
|
28
58
|
self,
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
time_frame=None,
|
|
59
|
+
symbol: str = None,
|
|
60
|
+
time_frame: str = None,
|
|
61
|
+
market: str = None,
|
|
33
62
|
window_size=None,
|
|
34
|
-
|
|
63
|
+
data_provider_identifier: str = None,
|
|
64
|
+
storage_directory=None,
|
|
65
|
+
pandas: bool = False,
|
|
66
|
+
config=None
|
|
35
67
|
):
|
|
68
|
+
"""
|
|
69
|
+
Initialize the CCXT OHLCV Data Provider.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
symbol (str): The symbol for which the data is provided.
|
|
73
|
+
time_frame (str): The time frame for the data.
|
|
74
|
+
market (str, optional): The market for the data. Defaults to None.
|
|
75
|
+
window_size (int, optional): The window size for the data.
|
|
76
|
+
Defaults to None.
|
|
77
|
+
data_provider_identifier (str, optional): The identifier for the
|
|
78
|
+
data provider.
|
|
79
|
+
pandas (bool, optional): If True, the data will be returned
|
|
80
|
+
as a pandas DataFrame instead of a Polars DataFrame.
|
|
81
|
+
storage_directory: (str, optional): the storage directory where
|
|
82
|
+
the OHLCV data need to be stored.
|
|
83
|
+
"""
|
|
84
|
+
if data_provider_identifier is None:
|
|
85
|
+
data_provider_identifier = self.data_provider_identifier
|
|
86
|
+
|
|
36
87
|
super().__init__(
|
|
37
|
-
data_type=data_type,
|
|
38
88
|
symbol=symbol,
|
|
89
|
+
market=market,
|
|
39
90
|
time_frame=time_frame,
|
|
40
91
|
window_size=window_size,
|
|
41
|
-
|
|
92
|
+
storage_directory=storage_directory,
|
|
93
|
+
data_provider_identifier=data_provider_identifier,
|
|
94
|
+
config=config
|
|
42
95
|
)
|
|
43
|
-
|
|
44
|
-
self.market = market
|
|
45
|
-
self.data = None
|
|
46
96
|
self._start_date_data_source = None
|
|
47
97
|
self._end_date_data_source = None
|
|
48
|
-
self.
|
|
49
|
-
self.
|
|
98
|
+
self._columns = ["Datetime", "Open", "High", "Low", "Close", "Volume"]
|
|
99
|
+
self.pandas = pandas
|
|
50
100
|
self.window_cache = {}
|
|
101
|
+
self.data = None
|
|
102
|
+
self.total_number_of_data_points = 0
|
|
103
|
+
self.missing_data_point_dates = []
|
|
104
|
+
self.data_file_path = None
|
|
51
105
|
|
|
52
|
-
def
|
|
106
|
+
def has_data(
|
|
107
|
+
self,
|
|
108
|
+
data_source: DataSource,
|
|
109
|
+
start_date: datetime = None,
|
|
110
|
+
end_date: datetime = None
|
|
111
|
+
) -> bool:
|
|
53
112
|
"""
|
|
54
|
-
|
|
113
|
+
Implementation of the has_data method to check if
|
|
114
|
+
the data provider has data for the given data source.
|
|
115
|
+
|
|
116
|
+
If start_date and/or end_date are provided, first the
|
|
117
|
+
storage_directory_will be checked for existence of the data.
|
|
118
|
+
|
|
119
|
+
If nothing is found or start_date and/or end_date are not provided
|
|
120
|
+
the ccxt library will be directly queried.
|
|
55
121
|
|
|
56
122
|
Args:
|
|
57
|
-
|
|
58
|
-
|
|
123
|
+
data_source (DataSource): The data source to check.
|
|
124
|
+
start_date (datetime, optional): The start date for the data.
|
|
125
|
+
Defaults to None.
|
|
126
|
+
end_date (datetime, optional): The end date for the data.
|
|
127
|
+
Defaults to None.
|
|
59
128
|
|
|
60
129
|
Returns:
|
|
61
|
-
|
|
130
|
+
bool: True if the data provider has data for the given data source,
|
|
131
|
+
False otherwise.
|
|
62
132
|
"""
|
|
133
|
+
market = data_source.market
|
|
134
|
+
symbol = data_source.symbol
|
|
135
|
+
data_type = data_source.data_type
|
|
136
|
+
start_date = start_date or data_source.start_date
|
|
137
|
+
end_date = end_date or data_source.end_date
|
|
63
138
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
raise OperationalException(
|
|
67
|
-
f"No exchange found for market id {market}"
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
exchange_class = getattr(ccxt, market)
|
|
139
|
+
if not DataType.OHLCV.equals(data_type):
|
|
140
|
+
return False
|
|
71
141
|
|
|
72
|
-
if
|
|
73
|
-
|
|
74
|
-
|
|
142
|
+
if start_date is not None and end_date is not None:
|
|
143
|
+
# Check if the data is available in the storage path
|
|
144
|
+
data = self._get_data_from_storage(
|
|
145
|
+
symbol=symbol,
|
|
146
|
+
market=market,
|
|
147
|
+
time_frame=data_source.time_frame,
|
|
148
|
+
storage_path=data_source.storage_path,
|
|
149
|
+
start_date=start_date,
|
|
150
|
+
end_date=end_date
|
|
75
151
|
)
|
|
76
152
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
'apiKey': market_credential.api_key,
|
|
80
|
-
'secret': market_credential.secret_key,
|
|
81
|
-
})
|
|
82
|
-
else:
|
|
83
|
-
exchange = exchange_class({})
|
|
84
|
-
|
|
85
|
-
return exchange
|
|
86
|
-
|
|
87
|
-
def pre_pare_backtest_data(
|
|
88
|
-
self,
|
|
89
|
-
backtest_start_date,
|
|
90
|
-
backtest_end_date,
|
|
91
|
-
symbol: str = None,
|
|
92
|
-
market: str = None,
|
|
93
|
-
time_frame: str = None,
|
|
94
|
-
window_size=None
|
|
95
|
-
) -> None:
|
|
96
|
-
pass
|
|
97
|
-
|
|
98
|
-
def get_backtest_data(
|
|
99
|
-
self,
|
|
100
|
-
date: datetime = None,
|
|
101
|
-
symbol: str = None,
|
|
102
|
-
market: str = None,
|
|
103
|
-
time_frame: str = None,
|
|
104
|
-
backtest_start_date: datetime = None,
|
|
105
|
-
backtest_end_date: datetime = None,
|
|
106
|
-
window_size=None,
|
|
107
|
-
pandas=False
|
|
108
|
-
) -> None:
|
|
109
|
-
pass
|
|
110
|
-
|
|
111
|
-
def has_data(
|
|
112
|
-
self,
|
|
113
|
-
data_type: str = None,
|
|
114
|
-
symbol: str = None,
|
|
115
|
-
market: str = None,
|
|
116
|
-
time_frame: str = None,
|
|
117
|
-
start_date: datetime = None,
|
|
118
|
-
end_date: datetime = None,
|
|
119
|
-
window_size=None,
|
|
120
|
-
) -> bool:
|
|
121
|
-
|
|
122
|
-
if TradingDataType.CUSTOM.equals(data_type):
|
|
123
|
-
raise OperationalException(
|
|
124
|
-
"Custom data type is not supported for CCXTOHLCVDataProvider"
|
|
125
|
-
)
|
|
153
|
+
if data is not None:
|
|
154
|
+
return True
|
|
126
155
|
|
|
127
156
|
if market is None:
|
|
128
157
|
market = "binance"
|
|
@@ -137,289 +166,118 @@ class CCXTDataProvider(DataProvider):
|
|
|
137
166
|
return symbol in symbols
|
|
138
167
|
|
|
139
168
|
except ccxt.NetworkError:
|
|
140
|
-
|
|
141
|
-
"Network error occurred, make sure you have "
|
|
142
|
-
"an active internet connection"
|
|
143
|
-
)
|
|
169
|
+
pass
|
|
144
170
|
|
|
145
|
-
except Exception:
|
|
171
|
+
except Exception as e:
|
|
172
|
+
logger.error(e)
|
|
146
173
|
return False
|
|
147
174
|
|
|
148
|
-
def
|
|
175
|
+
def prepare_backtest_data(
|
|
149
176
|
self,
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
market: str = None,
|
|
154
|
-
time_frame: str = None,
|
|
155
|
-
start_date: datetime = None,
|
|
156
|
-
end_date: datetime = None,
|
|
157
|
-
storage_path=None,
|
|
158
|
-
window_size=None,
|
|
159
|
-
pandas=False,
|
|
160
|
-
):
|
|
161
|
-
|
|
162
|
-
if market is None:
|
|
163
|
-
market = self.market
|
|
164
|
-
|
|
165
|
-
if market is None:
|
|
166
|
-
raise OperationalException(
|
|
167
|
-
"Market is not set. Please set the market "
|
|
168
|
-
"before calling get_data."
|
|
169
|
-
)
|
|
170
|
-
|
|
171
|
-
if symbol is None:
|
|
172
|
-
symbol = self.symbol
|
|
173
|
-
|
|
174
|
-
if symbol is None:
|
|
175
|
-
raise OperationalException(
|
|
176
|
-
"Symbol is not set. Please set the symbol "
|
|
177
|
-
"before calling get_data."
|
|
178
|
-
)
|
|
179
|
-
|
|
180
|
-
if data_type is None:
|
|
181
|
-
data_type = self.data_type
|
|
182
|
-
|
|
183
|
-
if TradingDataType.OHLCV.equals(data_type):
|
|
184
|
-
|
|
185
|
-
if time_frame is None:
|
|
186
|
-
time_frame = self.time_frame
|
|
187
|
-
|
|
188
|
-
if time_frame is None:
|
|
189
|
-
raise OperationalException(
|
|
190
|
-
"Time frame is not set. Please set the time frame "
|
|
191
|
-
"before requesting ohlcv data."
|
|
192
|
-
)
|
|
193
|
-
|
|
194
|
-
if end_date is None and window_size is None:
|
|
195
|
-
raise OperationalException(
|
|
196
|
-
"A window size is required or a start and end date "
|
|
197
|
-
"to retrieve ohlcv data."
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
if end_date is None:
|
|
201
|
-
end_date = datetime.now(tz=timezone.utc)
|
|
202
|
-
|
|
203
|
-
if start_date is None:
|
|
204
|
-
|
|
205
|
-
if date is not None:
|
|
206
|
-
start_date = date
|
|
207
|
-
else:
|
|
208
|
-
start_date = self.create_start_date(
|
|
209
|
-
end_date=end_date,
|
|
210
|
-
time_frame=time_frame,
|
|
211
|
-
window_size=window_size
|
|
212
|
-
)
|
|
213
|
-
|
|
214
|
-
data = self.get_ohlcv(
|
|
215
|
-
symbol=symbol,
|
|
216
|
-
time_frame=time_frame,
|
|
217
|
-
from_timestamp=start_date,
|
|
218
|
-
market=market,
|
|
219
|
-
to_timestamp=end_date
|
|
220
|
-
)
|
|
221
|
-
|
|
222
|
-
if pandas:
|
|
223
|
-
data = convert_polars_to_pandas(data)
|
|
224
|
-
|
|
225
|
-
return data
|
|
226
|
-
|
|
227
|
-
raise OperationalException(
|
|
228
|
-
f"Data type {data_type} is not supported for CCXTDataProvider"
|
|
229
|
-
)
|
|
230
|
-
|
|
231
|
-
def get_ohlcv(
|
|
232
|
-
self, symbol, time_frame, from_timestamp, market, to_timestamp=None
|
|
233
|
-
) -> pl.DataFrame:
|
|
177
|
+
backtest_start_date,
|
|
178
|
+
backtest_end_date,
|
|
179
|
+
) -> None:
|
|
234
180
|
"""
|
|
235
|
-
|
|
181
|
+
Prepares backtest data for a given symbol and date range.
|
|
236
182
|
|
|
237
183
|
Args:
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
184
|
+
backtest_start_date (datetime): The start date for the
|
|
185
|
+
backtest data.
|
|
186
|
+
backtest_end_date (datetime): The end date for the
|
|
187
|
+
backtest data.
|
|
188
|
+
|
|
189
|
+
Raises:
|
|
190
|
+
OperationalException: If the backtest start date is before the
|
|
191
|
+
start date of the data source or if the backtest end date is
|
|
192
|
+
after the end date of the data source.
|
|
243
193
|
|
|
244
194
|
Returns:
|
|
245
|
-
|
|
246
|
-
in polars DataFrame format
|
|
195
|
+
None
|
|
247
196
|
"""
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
if self.config is not None and "DATETIME_FORMAT" in self.config:
|
|
258
|
-
datetime_format = self.config["DATETIME_FORMAT"]
|
|
197
|
+
# There must be at least backtest_start_date - window_size * time_frame
|
|
198
|
+
# data available to create a sliding window.
|
|
199
|
+
if self.window_size is not None:
|
|
200
|
+
required_start_date = backtest_start_date - \
|
|
201
|
+
timedelta(
|
|
202
|
+
minutes=TimeFrame.from_value(
|
|
203
|
+
self.time_frame
|
|
204
|
+
).amount_of_minutes * self.window_size
|
|
205
|
+
)
|
|
259
206
|
else:
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
207
|
+
required_start_date = backtest_start_date
|
|
208
|
+
|
|
209
|
+
storage_directory_path = self.get_storage_directory()
|
|
210
|
+
|
|
211
|
+
# Check if the data source is already available in the storage path
|
|
212
|
+
data = self._get_data_from_storage(
|
|
213
|
+
symbol=self.symbol,
|
|
214
|
+
market=self.market,
|
|
215
|
+
time_frame=self.time_frame,
|
|
216
|
+
storage_path=storage_directory_path,
|
|
217
|
+
start_date=required_start_date,
|
|
218
|
+
end_date=backtest_end_date
|
|
270
219
|
)
|
|
271
220
|
|
|
272
|
-
if
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
221
|
+
if data is None:
|
|
222
|
+
# Disable pandas if it is set to True, because logic
|
|
223
|
+
# depends on polars DataFrame
|
|
224
|
+
has_pandas_flag = self.pandas
|
|
225
|
+
self.pandas = False
|
|
226
|
+
|
|
227
|
+
# If the data is not available in the storage path,
|
|
228
|
+
# retrieve it from the CCXT data provider
|
|
229
|
+
data = self.get_data(
|
|
230
|
+
start_date=required_start_date,
|
|
231
|
+
end_date=backtest_end_date,
|
|
232
|
+
save=True,
|
|
277
233
|
)
|
|
278
|
-
data = []
|
|
279
|
-
|
|
280
|
-
while from_time_stamp < to_timestamp:
|
|
281
|
-
ohlcv = exchange.fetch_ohlcv(symbol, time_frame, from_time_stamp)
|
|
282
|
-
|
|
283
|
-
if len(ohlcv) > 0:
|
|
284
|
-
from_time_stamp = \
|
|
285
|
-
ohlcv[-1][0] + exchange.parse_timeframe(time_frame) * 1000
|
|
286
|
-
else:
|
|
287
|
-
from_time_stamp = to_timestamp
|
|
288
234
|
|
|
289
|
-
|
|
290
|
-
datetime_stamp = parser.parse(exchange.iso8601(candle[0]))
|
|
235
|
+
self.pandas = has_pandas_flag
|
|
291
236
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
if datetime_stamp <= to_timestamp_datetime:
|
|
297
|
-
datetime_stamp = datetime_stamp \
|
|
298
|
-
.strftime(datetime_format)
|
|
299
|
-
|
|
300
|
-
data.append(
|
|
301
|
-
[datetime_stamp] +
|
|
302
|
-
[float(value) for value in candle[1:]]
|
|
303
|
-
)
|
|
237
|
+
self.data = data
|
|
238
|
+
self._start_date_data_source = self.data["Datetime"].min()
|
|
239
|
+
self._end_date_data_source = self.data["Datetime"].max()
|
|
240
|
+
self.total_number_of_data_points = len(self.data)
|
|
304
241
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
242
|
+
if required_start_date < self._start_date_data_source:
|
|
243
|
+
self.number_of_missing_data_points = (
|
|
244
|
+
self._start_date_data_source - required_start_date
|
|
245
|
+
).total_seconds() / (
|
|
246
|
+
TimeFrame.from_value(self.time_frame).amount_of_minutes * 60
|
|
247
|
+
)
|
|
309
248
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
249
|
+
if self.window_size is not None:
|
|
250
|
+
# Create cache with sliding windows
|
|
251
|
+
self._precompute_sliding_windows(
|
|
252
|
+
data=data,
|
|
253
|
+
window_size=self.window_size,
|
|
254
|
+
time_frame=self.time_frame,
|
|
255
|
+
start_date=backtest_start_date,
|
|
256
|
+
end_date=backtest_end_date
|
|
257
|
+
)
|
|
313
258
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
259
|
+
n_min = TimeFrame.from_value(self.time_frame).amount_of_minutes
|
|
260
|
+
# Assume self.data is a Polars DataFrame with a "Datetime" column
|
|
261
|
+
expected_dates = pl.datetime_range(
|
|
262
|
+
start=required_start_date,
|
|
263
|
+
end=backtest_end_date,
|
|
264
|
+
interval=f"{n_min}m",
|
|
265
|
+
eager=True
|
|
266
|
+
).to_list()
|
|
317
267
|
|
|
268
|
+
actual_dates = self.data["Datetime"].to_list()
|
|
318
269
|
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
CCXT library to retrieve OHLCV data from various cryptocurrency
|
|
323
|
-
markets. It supports multiple markets and symbols,
|
|
324
|
-
"""
|
|
325
|
-
backtest_data_directory = None
|
|
326
|
-
backtest_data_end_date = None
|
|
327
|
-
total_minutes_time_frame = None
|
|
328
|
-
column_names = ["Datetime", "Open", "High", "Low", "Close", "Volume"]
|
|
329
|
-
|
|
330
|
-
def __init__(
|
|
331
|
-
self,
|
|
332
|
-
market=None,
|
|
333
|
-
symbol=None,
|
|
334
|
-
time_frame=None,
|
|
335
|
-
window_size=None,
|
|
336
|
-
priority=1
|
|
337
|
-
):
|
|
338
|
-
super().__init__(
|
|
339
|
-
data_type=TradingDataType.OHLCV.value,
|
|
340
|
-
symbol=symbol,
|
|
341
|
-
time_frame=time_frame,
|
|
342
|
-
window_size=window_size,
|
|
343
|
-
priority=priority
|
|
270
|
+
# Find missing dates
|
|
271
|
+
self.missing_data_point_dates = sorted(
|
|
272
|
+
set(expected_dates) - set(actual_dates)
|
|
344
273
|
)
|
|
345
274
|
|
|
346
|
-
self.market = market
|
|
347
|
-
self.data = None
|
|
348
|
-
self._start_date_data_source = None
|
|
349
|
-
self._end_date_data_source = None
|
|
350
|
-
self.backtest_end_index = self.window_size
|
|
351
|
-
self.backtest_start_index = 0
|
|
352
|
-
self.window_cache = {}
|
|
353
|
-
|
|
354
|
-
def pre_pare_backtest_data(
|
|
355
|
-
self,
|
|
356
|
-
backtest_start_date,
|
|
357
|
-
backtest_end_date,
|
|
358
|
-
symbol: str = None,
|
|
359
|
-
market: str = None,
|
|
360
|
-
time_frame: str = None,
|
|
361
|
-
window_size=None
|
|
362
|
-
) -> None:
|
|
363
|
-
pass
|
|
364
|
-
|
|
365
|
-
def get_backtest_data(
|
|
366
|
-
self,
|
|
367
|
-
date: datetime = None,
|
|
368
|
-
symbol: str = None,
|
|
369
|
-
market: str = None,
|
|
370
|
-
time_frame: str = None,
|
|
371
|
-
backtest_start_date: datetime = None,
|
|
372
|
-
backtest_end_date: datetime = None,
|
|
373
|
-
window_size=None,
|
|
374
|
-
pandas=False
|
|
375
|
-
) -> None:
|
|
376
|
-
pass
|
|
377
|
-
|
|
378
|
-
def has_data(
|
|
379
|
-
self,
|
|
380
|
-
data_type: str = None,
|
|
381
|
-
symbol: str = None,
|
|
382
|
-
market: str = None,
|
|
383
|
-
time_frame: str = None,
|
|
384
|
-
start_date: datetime = None,
|
|
385
|
-
end_date: datetime = None,
|
|
386
|
-
window_size=None,
|
|
387
|
-
) -> bool:
|
|
388
|
-
|
|
389
|
-
if market is None:
|
|
390
|
-
market = "binance"
|
|
391
|
-
|
|
392
|
-
# Check if ccxt has an exchange for the given market
|
|
393
|
-
try:
|
|
394
|
-
market = market.lower()
|
|
395
|
-
exchange_class = getattr(ccxt, market)
|
|
396
|
-
exchange = exchange_class()
|
|
397
|
-
symbols = exchange.load_markets()
|
|
398
|
-
symbols = list(symbols.keys())
|
|
399
|
-
return symbol in symbols
|
|
400
|
-
|
|
401
|
-
except ccxt.NetworkError:
|
|
402
|
-
raise NetworkError(
|
|
403
|
-
"Network error occurred, make sure you have "
|
|
404
|
-
"an active internet connection"
|
|
405
|
-
)
|
|
406
|
-
|
|
407
|
-
except Exception:
|
|
408
|
-
return False
|
|
409
|
-
|
|
410
275
|
def get_data(
|
|
411
276
|
self,
|
|
412
|
-
data_type: str = None,
|
|
413
277
|
date: datetime = None,
|
|
414
|
-
symbol: str = None,
|
|
415
|
-
market: str = None,
|
|
416
|
-
time_frame: str = None,
|
|
417
278
|
start_date: datetime = None,
|
|
418
279
|
end_date: datetime = None,
|
|
419
|
-
|
|
420
|
-
window_size=None,
|
|
421
|
-
pandas=False,
|
|
422
|
-
save: bool = True
|
|
280
|
+
save: bool = False,
|
|
423
281
|
) -> Union[pl.DataFrame, pd.DataFrame]:
|
|
424
282
|
"""
|
|
425
283
|
Function to retrieve data from the CCXT data provider.
|
|
@@ -429,127 +287,240 @@ class CCXTOHLCVDataProvider(DataProvider):
|
|
|
429
287
|
converts the polars DataFrame to a pandas DataFrame.
|
|
430
288
|
|
|
431
289
|
Args:
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
start_date (datetime): The start date to retrieve data from.
|
|
438
|
-
end_date (datetime): The end date to retrieve data to.
|
|
439
|
-
storage_path (str): The path to store the data.
|
|
440
|
-
window_size (int): The size of the data window.
|
|
441
|
-
pandas (bool): Whether to return the data as a pandas DataFrame.
|
|
442
|
-
save (bool): Whether to save the data to the storage path.
|
|
290
|
+
date (datetime, optional): The date for which to retrieve the data.
|
|
291
|
+
start_date (datetime): The start date for the data.
|
|
292
|
+
end_date (datetime): The end date for the data.
|
|
293
|
+
save (bool): If True, the data will be saved to the storage path
|
|
294
|
+
if it is not already available. Defaults to False.
|
|
443
295
|
|
|
444
296
|
Returns:
|
|
445
|
-
|
|
446
|
-
Polars DataFrame format, or converted to pandas DataFrame
|
|
297
|
+
DataFrame: The data for the given symbol and market.
|
|
447
298
|
"""
|
|
448
|
-
if market is None:
|
|
449
|
-
market = self.market
|
|
450
299
|
|
|
451
|
-
if market is None:
|
|
300
|
+
if self.market is None:
|
|
452
301
|
raise OperationalException(
|
|
453
302
|
"Market is not set. Please set the market "
|
|
454
303
|
"before calling get_data."
|
|
455
304
|
)
|
|
456
305
|
|
|
457
|
-
if symbol is None:
|
|
458
|
-
symbol = self.symbol
|
|
459
|
-
|
|
460
|
-
if symbol is None:
|
|
306
|
+
if self.symbol is None:
|
|
461
307
|
raise OperationalException(
|
|
462
308
|
"Symbol is not set. Please set the symbol "
|
|
463
309
|
"before calling get_data."
|
|
464
310
|
)
|
|
465
311
|
|
|
466
|
-
if
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
if time_frame is None:
|
|
472
|
-
time_frame = self.time_frame
|
|
473
|
-
|
|
474
|
-
if time_frame is None:
|
|
475
|
-
raise OperationalException(
|
|
476
|
-
"Time frame is not set. Please set the time frame "
|
|
477
|
-
"before requesting ohlcv data."
|
|
478
|
-
)
|
|
312
|
+
if self.time_frame is None:
|
|
313
|
+
raise OperationalException(
|
|
314
|
+
"Time frame is not set. Please set the time frame "
|
|
315
|
+
"before requesting ohlcv data."
|
|
316
|
+
)
|
|
479
317
|
|
|
480
|
-
|
|
318
|
+
if date is not None and self.window_size is not None \
|
|
319
|
+
and self.time_frame is not None:
|
|
320
|
+
start_date = self.create_start_date(
|
|
321
|
+
end_date=date,
|
|
322
|
+
time_frame=self.time_frame,
|
|
323
|
+
window_size=self.window_size
|
|
324
|
+
)
|
|
325
|
+
end_date = date
|
|
326
|
+
else:
|
|
327
|
+
if (end_date is None and start_date is None
|
|
328
|
+
and self.window_size is None):
|
|
481
329
|
raise OperationalException(
|
|
482
|
-
"A
|
|
330
|
+
"A start date or end date or window size is required "
|
|
483
331
|
"to retrieve ohlcv data."
|
|
484
332
|
)
|
|
485
333
|
|
|
486
|
-
if end_date is None
|
|
334
|
+
if (start_date is not None and end_date is None
|
|
335
|
+
and self.window_size is None):
|
|
487
336
|
end_date = datetime.now(tz=timezone.utc)
|
|
488
337
|
|
|
489
|
-
if start_date is None
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
end_date=end_date,
|
|
496
|
-
time_frame=time_frame,
|
|
497
|
-
window_size=window_size
|
|
498
|
-
)
|
|
338
|
+
if (end_date is not None and start_date is None
|
|
339
|
+
and self.window_size is None):
|
|
340
|
+
raise OperationalException(
|
|
341
|
+
"A window size is required when using an end date "
|
|
342
|
+
"to retrieve ohlcv data."
|
|
343
|
+
)
|
|
499
344
|
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
# Here you would implement the logic to check if the data
|
|
503
|
-
# exists in the storage path and return it if it does.
|
|
504
|
-
# This is a placeholder for that logic.
|
|
505
|
-
data = self.retrieve_data_from_storage(
|
|
506
|
-
storage_path=storage_path,
|
|
507
|
-
symbol=symbol,
|
|
508
|
-
market=market,
|
|
509
|
-
time_frame=time_frame,
|
|
345
|
+
if start_date is not None and end_date is None:
|
|
346
|
+
end_date = self.create_end_date(
|
|
510
347
|
start_date=start_date,
|
|
511
|
-
|
|
348
|
+
time_frame=self.time_frame,
|
|
349
|
+
window_size=self.window_size
|
|
512
350
|
)
|
|
513
351
|
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
to_timestamp=end_date
|
|
521
|
-
)
|
|
522
|
-
else:
|
|
523
|
-
data = self.get_ohlcv(
|
|
524
|
-
symbol=symbol,
|
|
525
|
-
time_frame=time_frame,
|
|
526
|
-
from_timestamp=start_date,
|
|
527
|
-
market=market,
|
|
528
|
-
to_timestamp=end_date
|
|
352
|
+
if end_date is not None and start_date is None \
|
|
353
|
+
and self.window_size is not None:
|
|
354
|
+
start_date = self.create_start_date(
|
|
355
|
+
end_date=end_date,
|
|
356
|
+
time_frame=self.time_frame,
|
|
357
|
+
window_size=self.window_size
|
|
529
358
|
)
|
|
530
359
|
|
|
360
|
+
if start_date is None and end_date is None:
|
|
361
|
+
end_date = datetime.now(tz=timezone.utc)
|
|
362
|
+
start_date = self.create_start_date(
|
|
363
|
+
end_date=end_date,
|
|
364
|
+
time_frame=self.time_frame,
|
|
365
|
+
window_size=self.window_size
|
|
366
|
+
)
|
|
367
|
+
data = self._get_data_from_storage(
|
|
368
|
+
symbol=self.symbol,
|
|
369
|
+
market=self.market,
|
|
370
|
+
time_frame=self.time_frame,
|
|
371
|
+
storage_path=self.get_storage_directory(),
|
|
372
|
+
start_date=start_date,
|
|
373
|
+
end_date=end_date
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
if data is None:
|
|
377
|
+
data = self.get_ohlcv(
|
|
378
|
+
symbol=self.symbol,
|
|
379
|
+
time_frame=self.time_frame,
|
|
380
|
+
from_timestamp=start_date,
|
|
381
|
+
market=self.market,
|
|
382
|
+
to_timestamp=end_date
|
|
383
|
+
)
|
|
384
|
+
|
|
531
385
|
if save:
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
386
|
+
storage_directory = self.get_storage_directory()
|
|
387
|
+
|
|
388
|
+
if storage_directory is None:
|
|
389
|
+
raise OperationalException(
|
|
390
|
+
"Storage directory is not set for "
|
|
391
|
+
"the CCXTOHLCVDataProvider. Make sure to set the "
|
|
392
|
+
"storage directory in the configuration or "
|
|
393
|
+
"in the constructor."
|
|
394
|
+
)
|
|
395
|
+
|
|
535
396
|
self.save_data_to_storage(
|
|
536
|
-
symbol=symbol,
|
|
537
|
-
market=market,
|
|
397
|
+
symbol=self.symbol,
|
|
398
|
+
market=self.market,
|
|
399
|
+
time_frame=self.time_frame,
|
|
538
400
|
start_date=start_date,
|
|
539
401
|
end_date=end_date,
|
|
540
|
-
time_frame=time_frame,
|
|
541
402
|
data=data,
|
|
542
|
-
|
|
403
|
+
storage_directory_path=storage_directory
|
|
543
404
|
)
|
|
544
405
|
|
|
545
|
-
|
|
546
|
-
|
|
406
|
+
if self.pandas:
|
|
407
|
+
data = convert_polars_to_pandas(data)
|
|
547
408
|
|
|
548
|
-
|
|
409
|
+
return data
|
|
549
410
|
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
411
|
+
def get_backtest_data(
|
|
412
|
+
self,
|
|
413
|
+
backtest_index_date: datetime,
|
|
414
|
+
backtest_start_date: datetime = None,
|
|
415
|
+
backtest_end_date: datetime = None,
|
|
416
|
+
data_source: DataSource = None
|
|
417
|
+
) -> None:
|
|
418
|
+
"""
|
|
419
|
+
Fetches backtest data for a given datasource
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
backtest_index_date (datetime): The date for which to fetch
|
|
423
|
+
backtest data.
|
|
424
|
+
backtest_start_date (datetime): The start date for the
|
|
425
|
+
backtest data.
|
|
426
|
+
backtest_end_date (datetime): The end date for the
|
|
427
|
+
backtest data.
|
|
428
|
+
data_source (Optional[Datasource]): The data source for which to
|
|
429
|
+
fetch backtest data. Defaults to None.
|
|
430
|
+
|
|
431
|
+
Returns:
|
|
432
|
+
pl.DataFrame: The backtest data for the given datasource.
|
|
433
|
+
"""
|
|
434
|
+
|
|
435
|
+
if backtest_start_date is not None and \
|
|
436
|
+
backtest_end_date is not None:
|
|
437
|
+
|
|
438
|
+
if backtest_start_date < self._start_date_data_source:
|
|
439
|
+
|
|
440
|
+
if data_source is not None:
|
|
441
|
+
raise OperationalException(
|
|
442
|
+
f"Request data date {backtest_start_date} "
|
|
443
|
+
f"is before the range of "
|
|
444
|
+
f"the available data "
|
|
445
|
+
f"{self._start_date_data_source} "
|
|
446
|
+
f"- {self._end_date_data_source}."
|
|
447
|
+
f" for data source {data_source.identifier}."
|
|
448
|
+
f" Data source file path: "
|
|
449
|
+
f"{self.get_data_source_file_path()}"
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
raise OperationalException(
|
|
453
|
+
f"Request data date {backtest_start_date} "
|
|
454
|
+
f"is before the range of "
|
|
455
|
+
f"the available data "
|
|
456
|
+
f"{self._start_date_data_source} "
|
|
457
|
+
f"- {self._end_date_data_source}."
|
|
458
|
+
f" Data source file path: "
|
|
459
|
+
f"{self.get_data_source_file_path()}"
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
if backtest_end_date > self._end_date_data_source:
|
|
463
|
+
|
|
464
|
+
if data_source is not None:
|
|
465
|
+
raise OperationalException(
|
|
466
|
+
f"Request data date {backtest_end_date} "
|
|
467
|
+
f"is after the range of "
|
|
468
|
+
f"the available data "
|
|
469
|
+
f"{self._start_date_data_source} "
|
|
470
|
+
f"- {self._end_date_data_source}."
|
|
471
|
+
f" for data source {data_source.identifier}."
|
|
472
|
+
f" Data source file path: "
|
|
473
|
+
f"{self.get_data_source_file_path()}"
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
raise OperationalException(
|
|
477
|
+
f"Request data date {backtest_end_date} "
|
|
478
|
+
f"is after the range of "
|
|
479
|
+
f"the available data "
|
|
480
|
+
f"{self._start_date_data_source} "
|
|
481
|
+
f"- {self._end_date_data_source}."
|
|
482
|
+
f" Data source file path: "
|
|
483
|
+
f"{self.get_data_source_file_path()}"
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
data = self.data.filter(
|
|
487
|
+
(pl.col("Datetime") >= backtest_start_date) &
|
|
488
|
+
(pl.col("Datetime") <= backtest_end_date)
|
|
489
|
+
)
|
|
490
|
+
else:
|
|
491
|
+
try:
|
|
492
|
+
data = self.window_cache[backtest_index_date]
|
|
493
|
+
except KeyError:
|
|
494
|
+
|
|
495
|
+
try:
|
|
496
|
+
# Return the key in the cache that is closest to the
|
|
497
|
+
# backtest_index_date but not after it.
|
|
498
|
+
closest_key = min(
|
|
499
|
+
[k for k in self.window_cache.keys()
|
|
500
|
+
if k >= backtest_index_date]
|
|
501
|
+
)
|
|
502
|
+
data = self.window_cache[closest_key]
|
|
503
|
+
except ValueError:
|
|
504
|
+
|
|
505
|
+
if data_source is not None:
|
|
506
|
+
raise OperationalException(
|
|
507
|
+
"No OHLCV data available for the "
|
|
508
|
+
f"date: {backtest_index_date} "
|
|
509
|
+
f"within the prepared backtest data "
|
|
510
|
+
f"for data source {data_source.identifier}. "
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
raise OperationalException(
|
|
514
|
+
"No OHLCV data available for the "
|
|
515
|
+
f"date: {backtest_index_date} "
|
|
516
|
+
f"within the prepared backtest data "
|
|
517
|
+
f"for symbol {self.symbol}. "
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
if self.pandas:
|
|
521
|
+
data = convert_polars_to_pandas(data)
|
|
522
|
+
|
|
523
|
+
return data
|
|
553
524
|
|
|
554
525
|
def get_ohlcv(
|
|
555
526
|
self, symbol, time_frame, from_timestamp, market, to_timestamp=None
|
|
@@ -566,21 +537,22 @@ class CCXTOHLCVDataProvider(DataProvider):
|
|
|
566
537
|
|
|
567
538
|
Returns:
|
|
568
539
|
DataFrame: The ohlcv data for the symbol, time frame and market
|
|
569
|
-
|
|
540
|
+
in polars DataFrame format
|
|
570
541
|
"""
|
|
571
542
|
symbol = symbol.upper()
|
|
572
543
|
market_credential = self.get_credential(market)
|
|
573
544
|
exchange = self.initialize_exchange(market, market_credential)
|
|
545
|
+
time_frame = time_frame.value
|
|
574
546
|
|
|
575
547
|
if from_timestamp > to_timestamp:
|
|
576
548
|
raise OperationalException(
|
|
577
549
|
"OHLCV data start date must be before end date"
|
|
578
550
|
)
|
|
579
551
|
|
|
580
|
-
if self.config is not None and
|
|
581
|
-
datetime_format = self.config[
|
|
552
|
+
if self.config is not None and DATETIME_FORMAT in self.config:
|
|
553
|
+
datetime_format = self.config[DATETIME_FORMAT]
|
|
582
554
|
else:
|
|
583
|
-
datetime_format =
|
|
555
|
+
datetime_format = CCXT_DATETIME_FORMAT
|
|
584
556
|
|
|
585
557
|
if not exchange.has['fetchOHLCV']:
|
|
586
558
|
raise OperationalException(
|
|
@@ -588,7 +560,7 @@ class CCXTOHLCVDataProvider(DataProvider):
|
|
|
588
560
|
f"functionality get_ohclvs"
|
|
589
561
|
)
|
|
590
562
|
|
|
591
|
-
|
|
563
|
+
from_timestamp = exchange.parse8601(
|
|
592
564
|
from_timestamp.strftime(datetime_format)
|
|
593
565
|
)
|
|
594
566
|
|
|
@@ -600,67 +572,62 @@ class CCXTOHLCVDataProvider(DataProvider):
|
|
|
600
572
|
)
|
|
601
573
|
data = []
|
|
602
574
|
|
|
603
|
-
|
|
604
|
-
|
|
575
|
+
try:
|
|
576
|
+
while from_timestamp < to_timestamp:
|
|
577
|
+
ohlcv = exchange.fetch_ohlcv(
|
|
578
|
+
symbol, time_frame, from_timestamp
|
|
579
|
+
)
|
|
605
580
|
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
581
|
+
if len(ohlcv) > 0:
|
|
582
|
+
from_timestamp = \
|
|
583
|
+
ohlcv[-1][0] + \
|
|
584
|
+
exchange.parse_timeframe(time_frame) * 1000
|
|
585
|
+
else:
|
|
586
|
+
from_timestamp = to_timestamp
|
|
611
587
|
|
|
612
|
-
|
|
613
|
-
|
|
588
|
+
for candle in ohlcv:
|
|
589
|
+
datetime_stamp = parser.parse(exchange.iso8601(candle[0]))
|
|
614
590
|
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
591
|
+
to_timestamp_datetime = parser.parse(
|
|
592
|
+
exchange.iso8601(to_timestamp),
|
|
593
|
+
)
|
|
618
594
|
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
595
|
+
if datetime_stamp <= to_timestamp_datetime:
|
|
596
|
+
datetime_stamp = datetime_stamp \
|
|
597
|
+
.strftime(datetime_format)
|
|
622
598
|
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
599
|
+
data.append(
|
|
600
|
+
[datetime_stamp] +
|
|
601
|
+
[float(value) for value in candle[1:]]
|
|
602
|
+
)
|
|
627
603
|
|
|
628
|
-
|
|
604
|
+
sleep(exchange.rateLimit / 1000)
|
|
605
|
+
except ccxt.NetworkError as e:
|
|
606
|
+
logger.error(
|
|
607
|
+
f"Network error occurred while fetching OHLCV data for "
|
|
608
|
+
f"{symbol} on {market} with time frame {time_frame}: {e}"
|
|
609
|
+
)
|
|
610
|
+
raise NetworkError(
|
|
611
|
+
"Network error occurred, make sure you have an active "
|
|
612
|
+
"internet connection"
|
|
613
|
+
)
|
|
629
614
|
|
|
630
615
|
# Predefined column names
|
|
631
616
|
col_names = ["Datetime", "Open", "High", "Low", "Close", "Volume"]
|
|
632
617
|
|
|
633
618
|
# Combine the Series into a DataFrame with given column names
|
|
634
|
-
df = pl.DataFrame(data, schema=col_names, orient="row")
|
|
619
|
+
df = pl.DataFrame(data, schema=col_names, orient="row").with_columns(
|
|
620
|
+
pl.col("Datetime").str.to_datetime(time_unit="ms", time_zone="UTC")
|
|
621
|
+
)
|
|
635
622
|
return df
|
|
636
623
|
|
|
637
624
|
def create_start_date(self, end_date, time_frame, window_size):
|
|
638
625
|
minutes = TimeFrame.from_value(time_frame).amount_of_minutes
|
|
639
626
|
return end_date - timedelta(minutes=window_size * minutes)
|
|
640
627
|
|
|
641
|
-
def
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
the given market and symbol.
|
|
645
|
-
|
|
646
|
-
Args:
|
|
647
|
-
market (str): The market to check
|
|
648
|
-
symbol (str): The symbol to check
|
|
649
|
-
|
|
650
|
-
Returns:
|
|
651
|
-
bool: True if the data provider supports the market and symbol,
|
|
652
|
-
False otherwise
|
|
653
|
-
"""
|
|
654
|
-
try:
|
|
655
|
-
exchange_class = getattr(ccxt, market.lower())
|
|
656
|
-
exchange = exchange_class()
|
|
657
|
-
symbols = exchange.load_markets()
|
|
658
|
-
return symbol.upper() in symbols
|
|
659
|
-
except Exception as e:
|
|
660
|
-
logger.error(
|
|
661
|
-
f"Error checking support for {market} and {symbol}: {e}"
|
|
662
|
-
)
|
|
663
|
-
return False
|
|
628
|
+
def create_end_date(self, start_date, time_frame, window_size):
|
|
629
|
+
minutes = TimeFrame.from_value(time_frame).amount_of_minutes
|
|
630
|
+
return start_date + timedelta(minutes=window_size * minutes)
|
|
664
631
|
|
|
665
632
|
@staticmethod
|
|
666
633
|
def initialize_exchange(market, market_credential):
|
|
@@ -673,7 +640,7 @@ class CCXTOHLCVDataProvider(DataProvider):
|
|
|
673
640
|
for the exchange
|
|
674
641
|
|
|
675
642
|
Returns:
|
|
676
|
-
|
|
643
|
+
Exchange: CCXT exchange client
|
|
677
644
|
"""
|
|
678
645
|
market = market.lower()
|
|
679
646
|
|
|
@@ -742,93 +709,48 @@ class CCXTOHLCVDataProvider(DataProvider):
|
|
|
742
709
|
f" named as {market.upper()}_SECRET_KEY"
|
|
743
710
|
)
|
|
744
711
|
|
|
745
|
-
def retrieve_data_from_storage(
|
|
746
|
-
self,
|
|
747
|
-
storage_path: str,
|
|
748
|
-
symbol: str = None,
|
|
749
|
-
market: str = None,
|
|
750
|
-
time_frame: str = None,
|
|
751
|
-
start_date: datetime = None,
|
|
752
|
-
end_date: datetime = None
|
|
753
|
-
) -> pl.DataFrame | None:
|
|
754
|
-
"""
|
|
755
|
-
Function to retrieve data from the storage path.
|
|
756
|
-
|
|
757
|
-
Args:
|
|
758
|
-
storage_path (str): The path to the storage.
|
|
759
|
-
symbol (str): The symbol to retrieve data for.
|
|
760
|
-
market (str): The market to retrieve data from.
|
|
761
|
-
time_frame (str): The time frame to retrieve data for.
|
|
762
|
-
start_date (datetime): The start date to retrieve data from.
|
|
763
|
-
end_date (datetime): The end date to retrieve data to.
|
|
764
|
-
|
|
765
|
-
Returns:
|
|
766
|
-
pl.DataFrame: The retrieved data in Polars DataFrame format.
|
|
767
|
-
"""
|
|
768
|
-
|
|
769
|
-
if not os.path.isdir(storage_path):
|
|
770
|
-
return None
|
|
771
|
-
|
|
772
|
-
file_name = self._create_filename(
|
|
773
|
-
symbol=symbol,
|
|
774
|
-
market=market,
|
|
775
|
-
time_frame=time_frame,
|
|
776
|
-
start_date=start_date,
|
|
777
|
-
end_date=end_date
|
|
778
|
-
)
|
|
779
|
-
|
|
780
|
-
file_path = os.path.join(storage_path, file_name)
|
|
781
|
-
|
|
782
|
-
if os.path.exists(file_path):
|
|
783
|
-
try:
|
|
784
|
-
data = pl.read_csv(file_path, has_header=True)
|
|
785
|
-
return data
|
|
786
|
-
except Exception as e:
|
|
787
|
-
logger.error(
|
|
788
|
-
f"Error reading data from {file_path}: {e}"
|
|
789
|
-
)
|
|
790
|
-
return None
|
|
791
|
-
|
|
792
|
-
return None
|
|
793
|
-
|
|
794
712
|
def save_data_to_storage(
|
|
795
713
|
self,
|
|
796
|
-
symbol,
|
|
797
|
-
market,
|
|
714
|
+
symbol: str,
|
|
715
|
+
market: str,
|
|
716
|
+
time_frame: TimeFrame,
|
|
798
717
|
start_date: datetime,
|
|
799
718
|
end_date: datetime,
|
|
800
|
-
time_frame: str,
|
|
801
719
|
data: pl.DataFrame,
|
|
802
|
-
|
|
720
|
+
storage_directory_path: str,
|
|
803
721
|
):
|
|
804
722
|
"""
|
|
805
723
|
Function to save data to the storage path.
|
|
806
724
|
|
|
807
725
|
Args:
|
|
726
|
+
symbol (str): The symbol for which the data is saved.
|
|
727
|
+
market (str): The market for which the data is saved.
|
|
728
|
+
time_frame (TimeFrame): The time frame for which the data is saved.
|
|
808
729
|
data (pl.DataFrame): The data to save.
|
|
809
|
-
|
|
730
|
+
storage_directory_path (str): The path to the storage directory.
|
|
731
|
+
start_date (datetime): The start date for the data.
|
|
732
|
+
end_date (datetime): The end date for the data.
|
|
810
733
|
|
|
811
734
|
Returns:
|
|
812
735
|
None
|
|
813
736
|
"""
|
|
814
|
-
if
|
|
737
|
+
if storage_directory_path is None:
|
|
815
738
|
raise OperationalException(
|
|
816
739
|
"Storage path is not set. Please set the storage path "
|
|
817
740
|
"before saving data."
|
|
818
741
|
)
|
|
819
742
|
|
|
820
|
-
if not os.path.isdir(
|
|
821
|
-
os.makedirs(
|
|
743
|
+
if not os.path.isdir(storage_directory_path):
|
|
744
|
+
os.makedirs(storage_directory_path)
|
|
822
745
|
|
|
823
|
-
symbol = symbol.upper().replace('/', '_')
|
|
824
746
|
filename = self._create_filename(
|
|
825
747
|
symbol=symbol,
|
|
826
748
|
market=market,
|
|
827
|
-
time_frame=time_frame,
|
|
749
|
+
time_frame=time_frame.value,
|
|
828
750
|
start_date=start_date,
|
|
829
751
|
end_date=end_date
|
|
830
752
|
)
|
|
831
|
-
storage_path = os.path.join(
|
|
753
|
+
storage_path = os.path.join(storage_directory_path, filename)
|
|
832
754
|
if os.path.exists(storage_path):
|
|
833
755
|
os.remove(storage_path)
|
|
834
756
|
|
|
@@ -839,15 +761,8 @@ class CCXTOHLCVDataProvider(DataProvider):
|
|
|
839
761
|
|
|
840
762
|
data.write_csv(storage_path)
|
|
841
763
|
|
|
842
|
-
def __repr__(self):
|
|
843
|
-
return (
|
|
844
|
-
f"CCXTOHLCVDataProvider(market={self.market}, "
|
|
845
|
-
f"symbol={self.symbol}, time_frame={self.time_frame}, "
|
|
846
|
-
f"window_size={self.window_size})"
|
|
847
|
-
)
|
|
848
|
-
|
|
849
|
-
@staticmethod
|
|
850
764
|
def _create_filename(
|
|
765
|
+
self,
|
|
851
766
|
symbol: str,
|
|
852
767
|
market: str,
|
|
853
768
|
time_frame: str,
|
|
@@ -868,11 +783,361 @@ class CCXTOHLCVDataProvider(DataProvider):
|
|
|
868
783
|
Returns:
|
|
869
784
|
str: The generated filename.
|
|
870
785
|
"""
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
786
|
+
datetime_format = self.config[DATETIME_FORMAT_FILE_NAME]
|
|
787
|
+
symbol = symbol.upper().replace('/', '-')
|
|
788
|
+
start_date_str = start_date.strftime(datetime_format)
|
|
789
|
+
end_date_str = end_date.strftime(datetime_format)
|
|
874
790
|
filename = (
|
|
875
|
-
f"{symbol}_{market}_{time_frame}_{start_date_str}_"
|
|
791
|
+
f"OHLCV_{symbol}_{market.upper()}_{time_frame}_{start_date_str}_"
|
|
876
792
|
f"{end_date_str}.csv"
|
|
877
793
|
)
|
|
878
794
|
return filename
|
|
795
|
+
|
|
796
|
+
def _get_data_from_storage(
|
|
797
|
+
self,
|
|
798
|
+
storage_path,
|
|
799
|
+
symbol: str,
|
|
800
|
+
market: str,
|
|
801
|
+
time_frame: TimeFrame,
|
|
802
|
+
start_date: datetime,
|
|
803
|
+
end_date: datetime,
|
|
804
|
+
) -> Union[pl.DataFrame, None]:
|
|
805
|
+
"""
|
|
806
|
+
Helper function to retrieve the data from the storage path if
|
|
807
|
+
it exists. If the data does not exist, it returns None.
|
|
808
|
+
"""
|
|
809
|
+
data = None
|
|
810
|
+
if storage_path is None:
|
|
811
|
+
return None
|
|
812
|
+
|
|
813
|
+
# Loop through all files in the data storage path
|
|
814
|
+
if not os.path.isdir(storage_path):
|
|
815
|
+
logger.error(
|
|
816
|
+
f"Storage path {storage_path} does not exist or is not a "
|
|
817
|
+
"directory."
|
|
818
|
+
)
|
|
819
|
+
return None
|
|
820
|
+
|
|
821
|
+
for file_name in os.listdir(storage_path):
|
|
822
|
+
if file_name.startswith("OHLCV_") and file_name.endswith(".csv"):
|
|
823
|
+
|
|
824
|
+
try:
|
|
825
|
+
data_source_spec = self.\
|
|
826
|
+
_get_data_source_specification_from_file_name(
|
|
827
|
+
file_name
|
|
828
|
+
)
|
|
829
|
+
|
|
830
|
+
if data_source_spec is None:
|
|
831
|
+
continue
|
|
832
|
+
|
|
833
|
+
if data_source_spec.symbol.upper() == symbol.upper() and \
|
|
834
|
+
data_source_spec.market.upper() == market.upper() and \
|
|
835
|
+
data_source_spec.time_frame.equals(time_frame):
|
|
836
|
+
|
|
837
|
+
# Check if the data source specification matches
|
|
838
|
+
# the start and end date if its specified
|
|
839
|
+
if (data_source_spec.start_date is not None and
|
|
840
|
+
data_source_spec.end_date is not None and
|
|
841
|
+
(data_source_spec.start_date <= start_date
|
|
842
|
+
and data_source_spec.end_date >= end_date)):
|
|
843
|
+
|
|
844
|
+
# If the data source specification matches,
|
|
845
|
+
# read the file
|
|
846
|
+
file_path = os.path.join(storage_path, file_name)
|
|
847
|
+
self.data_file_path = file_path
|
|
848
|
+
|
|
849
|
+
# Read CSV as-is first
|
|
850
|
+
data = pl.read_csv(file_path, low_memory=True)
|
|
851
|
+
|
|
852
|
+
# Check what columns we have
|
|
853
|
+
if "Datetime" in data.columns:
|
|
854
|
+
# Try to parse the datetime column
|
|
855
|
+
try:
|
|
856
|
+
# Try the ISO format with timezone first
|
|
857
|
+
data = data.with_columns(
|
|
858
|
+
pl.col("Datetime").str.to_datetime(
|
|
859
|
+
format="%Y-%m-%dT%H:%M:%S%.f%z",
|
|
860
|
+
time_zone="UTC"
|
|
861
|
+
)
|
|
862
|
+
)
|
|
863
|
+
except Exception as e1:
|
|
864
|
+
try:
|
|
865
|
+
# Fallback: let Polars infer the format
|
|
866
|
+
data = data.with_columns(
|
|
867
|
+
pl.col("Datetime").str.to_datetime(
|
|
868
|
+
time_zone="UTC"
|
|
869
|
+
)
|
|
870
|
+
)
|
|
871
|
+
except Exception as e2:
|
|
872
|
+
logger.warning(
|
|
873
|
+
f"Could not parse Datetime "
|
|
874
|
+
f"column in {file_name}: "
|
|
875
|
+
f"Format error: {str(e1)}, "
|
|
876
|
+
f"Infer error: {str(e2)}"
|
|
877
|
+
)
|
|
878
|
+
continue
|
|
879
|
+
else:
|
|
880
|
+
logger.warning(
|
|
881
|
+
f"No 'Datetime' column "
|
|
882
|
+
f"found in {file_name}. "
|
|
883
|
+
f"Available columns: {data.columns}"
|
|
884
|
+
)
|
|
885
|
+
continue
|
|
886
|
+
|
|
887
|
+
# Filter by date range
|
|
888
|
+
data = data.filter(
|
|
889
|
+
(pl.col("Datetime") >= start_date) &
|
|
890
|
+
(pl.col("Datetime") <= end_date)
|
|
891
|
+
)
|
|
892
|
+
break
|
|
893
|
+
|
|
894
|
+
except Exception as e:
|
|
895
|
+
logger.warning(
|
|
896
|
+
f"Error reading data from {file_name}: {str(e)}"
|
|
897
|
+
)
|
|
898
|
+
continue
|
|
899
|
+
|
|
900
|
+
return data
|
|
901
|
+
|
|
902
|
+
def _get_data_source_specification_from_file_name(
|
|
903
|
+
self, file_name: str
|
|
904
|
+
) -> Union[DataSource, None]:
|
|
905
|
+
"""
|
|
906
|
+
Extracts the data source specification from the OHLCV data filename.
|
|
907
|
+
Given that the file name is in the format:
|
|
908
|
+
|
|
909
|
+
"OHLCV_<SYMBOL>_<MARKET>_<TIME_FRAME>_<START_DATE>_<END_DATE>.csv",
|
|
910
|
+
this function extracts all attributes and returns a DataSource object.
|
|
911
|
+
This object can then later be used to compare it to the datasource
|
|
912
|
+
object that is passed to the get_data method.
|
|
913
|
+
|
|
914
|
+
Args:
|
|
915
|
+
file_name (str): The file name from which to extract the DataSource
|
|
916
|
+
|
|
917
|
+
Returns:
|
|
918
|
+
DataSource: The extracted data source specification.
|
|
919
|
+
"""
|
|
920
|
+
|
|
921
|
+
try:
|
|
922
|
+
parts = file_name.split('_')
|
|
923
|
+
|
|
924
|
+
if len(parts) < 3:
|
|
925
|
+
return None
|
|
926
|
+
|
|
927
|
+
data_type = parts[0].upper()
|
|
928
|
+
symbol = parts[1].upper().replace('-', '/')
|
|
929
|
+
market = parts[2].upper()
|
|
930
|
+
time_frame_str = parts[3]
|
|
931
|
+
start_date_str = parts[4]
|
|
932
|
+
end_date_str = parts[5].replace('.csv', '')
|
|
933
|
+
return DataSource(
|
|
934
|
+
data_type=DataType.from_string(data_type),
|
|
935
|
+
symbol=symbol,
|
|
936
|
+
market=market,
|
|
937
|
+
time_frame=TimeFrame.from_string(time_frame_str),
|
|
938
|
+
start_date=parser.parse(
|
|
939
|
+
start_date_str
|
|
940
|
+
).replace(tzinfo=timezone.utc),
|
|
941
|
+
end_date=parser.parse(
|
|
942
|
+
end_date_str
|
|
943
|
+
).replace(tzinfo=timezone.utc)
|
|
944
|
+
)
|
|
945
|
+
except ValueError:
|
|
946
|
+
logger.info(
|
|
947
|
+
f"Could not extract data source attributes from "
|
|
948
|
+
f"file name: {file_name}. "
|
|
949
|
+
f"Expected format 'OHLCV_<SYMBOL>_<MARKET>_<TIME_FRAME>_"
|
|
950
|
+
f"<START_DATE>_<END_DATE>.csv."
|
|
951
|
+
)
|
|
952
|
+
return None
|
|
953
|
+
|
|
954
|
+
def _precompute_sliding_windows(
|
|
955
|
+
self,
|
|
956
|
+
data,
|
|
957
|
+
window_size: int,
|
|
958
|
+
time_frame: TimeFrame,
|
|
959
|
+
start_date: datetime,
|
|
960
|
+
end_date: datetime
|
|
961
|
+
) -> None:
|
|
962
|
+
"""
|
|
963
|
+
Precompute all sliding windows for fast retrieval in backtest mode.
|
|
964
|
+
|
|
965
|
+
A sliding window is calculated as a subset of the data. It will
|
|
966
|
+
take for each timestamp in the data a window of size `window_size`
|
|
967
|
+
and stores it in a cache with the last timestamp of the window.
|
|
968
|
+
|
|
969
|
+
So if the window size is 200, the first window will be
|
|
970
|
+
the first 200 rows of the data, the second window will be
|
|
971
|
+
the rows 1 to 200, the third window will be the rows
|
|
972
|
+
2 to 201, and so on until the last window which will be
|
|
973
|
+
the last 200 rows of the data.
|
|
974
|
+
|
|
975
|
+
Args:
|
|
976
|
+
data (pl.DataFrame): The data to precompute the sliding
|
|
977
|
+
windows for.
|
|
978
|
+
window_size (int): The size of the sliding window to precompute.
|
|
979
|
+
start_date (datetime, optional): The start date for the sliding
|
|
980
|
+
windows.
|
|
981
|
+
end_date (datetime, optional): The end date for the sliding
|
|
982
|
+
windows.
|
|
983
|
+
|
|
984
|
+
Returns:
|
|
985
|
+
None
|
|
986
|
+
"""
|
|
987
|
+
self.window_cache = {}
|
|
988
|
+
timestamps = data["Datetime"].to_list()
|
|
989
|
+
# Only select the entries after the start date
|
|
990
|
+
timestamps = [
|
|
991
|
+
ts for ts in timestamps if start_date <= ts <= end_date
|
|
992
|
+
]
|
|
993
|
+
|
|
994
|
+
# Create sliding windows of size <window_size> for each timestamp
|
|
995
|
+
# in the data with the given the time frame and window size
|
|
996
|
+
for timestamp in timestamps:
|
|
997
|
+
# Use timestamp as key
|
|
998
|
+
self.window_cache[timestamp] = data.filter(
|
|
999
|
+
(data["Datetime"] <= timestamp) &
|
|
1000
|
+
(data["Datetime"] >= timestamp - timedelta(
|
|
1001
|
+
minutes=time_frame.amount_of_minutes * window_size
|
|
1002
|
+
))
|
|
1003
|
+
)
|
|
1004
|
+
|
|
1005
|
+
# Make sure the end datetime of the backtest is included in the
|
|
1006
|
+
# sliding windows cache
|
|
1007
|
+
if end_date not in self.window_cache:
|
|
1008
|
+
self.window_cache[end_date] = data[-window_size:]
|
|
1009
|
+
|
|
1010
|
+
def get_storage_directory(self) -> Union[str, None]:
|
|
1011
|
+
"""
|
|
1012
|
+
Get the storage directory for the OHLCV data provider.
|
|
1013
|
+
|
|
1014
|
+
Returns:
|
|
1015
|
+
Union[str, None]: The storage directory path if set,
|
|
1016
|
+
otherwise None.
|
|
1017
|
+
"""
|
|
1018
|
+
|
|
1019
|
+
if self.storage_directory is not None:
|
|
1020
|
+
return self.storage_directory
|
|
1021
|
+
|
|
1022
|
+
if self.config is not None:
|
|
1023
|
+
resource_directory = self.config.get(RESOURCE_DIRECTORY)
|
|
1024
|
+
data_directory_name = self.config.get(DATA_DIRECTORY)
|
|
1025
|
+
return os.path.join(resource_directory, data_directory_name)
|
|
1026
|
+
|
|
1027
|
+
return None
|
|
1028
|
+
|
|
1029
|
+
def copy(self, data_source) -> "CCXTOHLCVDataProvider":
|
|
1030
|
+
"""
|
|
1031
|
+
Returns a copy of the CCXTOHLCVDataProvider instance based on a
|
|
1032
|
+
given data source. The data source is previously matched
|
|
1033
|
+
with the 'has_data' method. Then a new instance of the data
|
|
1034
|
+
provider must be registered in the framework so that each
|
|
1035
|
+
data source has its own instance of the data provider.
|
|
1036
|
+
|
|
1037
|
+
Args:
|
|
1038
|
+
data_source (DataSource): The data source specification that
|
|
1039
|
+
matches a data provider.
|
|
1040
|
+
|
|
1041
|
+
Returns:
|
|
1042
|
+
DataProvider: A new instance of the data provider with the same
|
|
1043
|
+
configuration.
|
|
1044
|
+
"""
|
|
1045
|
+
# Check that the data source has the required attributes set
|
|
1046
|
+
# for usage with CCXT data providers
|
|
1047
|
+
|
|
1048
|
+
if data_source.market is None or data_source.market == "":
|
|
1049
|
+
raise OperationalException(
|
|
1050
|
+
"DataSource has not `market` attribute specified, "
|
|
1051
|
+
"please specify the market attribute in the "
|
|
1052
|
+
"data source specification before using the "
|
|
1053
|
+
"ccxt OHLCV data provider"
|
|
1054
|
+
)
|
|
1055
|
+
|
|
1056
|
+
if data_source.time_frame is None or data_source.time_frame == "":
|
|
1057
|
+
raise OperationalException(
|
|
1058
|
+
"DataSource has not `time_frame` attribute specified, "
|
|
1059
|
+
"please specify the time_frame attribute in the "
|
|
1060
|
+
"data source specification before using the "
|
|
1061
|
+
"ccxt OHLCV data provider"
|
|
1062
|
+
)
|
|
1063
|
+
|
|
1064
|
+
if data_source.symbol is None or data_source.symbol == "":
|
|
1065
|
+
raise OperationalException(
|
|
1066
|
+
"DataSource has not `symbol` attribute specified, "
|
|
1067
|
+
"please specify the symbol attribute in the "
|
|
1068
|
+
"data source specification before using the "
|
|
1069
|
+
"ccxt OHLCV data provider"
|
|
1070
|
+
)
|
|
1071
|
+
|
|
1072
|
+
storage_path = data_source.storage_path
|
|
1073
|
+
|
|
1074
|
+
if storage_path is None:
|
|
1075
|
+
storage_path = self.get_storage_directory()
|
|
1076
|
+
|
|
1077
|
+
return CCXTOHLCVDataProvider(
|
|
1078
|
+
symbol=data_source.symbol,
|
|
1079
|
+
time_frame=data_source.time_frame,
|
|
1080
|
+
market=data_source.market,
|
|
1081
|
+
window_size=data_source.window_size,
|
|
1082
|
+
data_provider_identifier=data_source.data_provider_identifier,
|
|
1083
|
+
storage_directory=storage_path,
|
|
1084
|
+
config=self.config,
|
|
1085
|
+
pandas=data_source.pandas,
|
|
1086
|
+
)
|
|
1087
|
+
|
|
1088
|
+
def get_number_of_data_points(
|
|
1089
|
+
self,
|
|
1090
|
+
start_date: datetime,
|
|
1091
|
+
end_date: datetime
|
|
1092
|
+
) -> int:
|
|
1093
|
+
|
|
1094
|
+
"""
|
|
1095
|
+
Returns the number of data points available between the given
|
|
1096
|
+
start and end dates.
|
|
1097
|
+
|
|
1098
|
+
Args:
|
|
1099
|
+
start_date (datetime): The start date for checking missing data.
|
|
1100
|
+
end_date (datetime): The end date for checking missing data.
|
|
1101
|
+
|
|
1102
|
+
Returns:
|
|
1103
|
+
int: The number of available data points between the given
|
|
1104
|
+
start and end dates.
|
|
1105
|
+
"""
|
|
1106
|
+
available_dates = [
|
|
1107
|
+
date for date in self.data["Datetime"].to_list()
|
|
1108
|
+
if start_date <= date <= end_date
|
|
1109
|
+
]
|
|
1110
|
+
return len(available_dates)
|
|
1111
|
+
|
|
1112
|
+
def get_missing_data_dates(
|
|
1113
|
+
self,
|
|
1114
|
+
start_date: datetime,
|
|
1115
|
+
end_date: datetime,
|
|
1116
|
+
) -> List[datetime]:
|
|
1117
|
+
"""
|
|
1118
|
+
Returns a list of dates for which data is missing between the
|
|
1119
|
+
given start and end dates.
|
|
1120
|
+
|
|
1121
|
+
Args:
|
|
1122
|
+
start_date (datetime): The start date for checking missing data.
|
|
1123
|
+
end_date (datetime): The end date for checking missing data.
|
|
1124
|
+
|
|
1125
|
+
Returns:
|
|
1126
|
+
List[datetime]: A list of dates for which data is missing
|
|
1127
|
+
between the given start and end dates.
|
|
1128
|
+
"""
|
|
1129
|
+
missing_dates = [
|
|
1130
|
+
date for date in self.missing_data_point_dates
|
|
1131
|
+
if start_date <= date <= end_date
|
|
1132
|
+
]
|
|
1133
|
+
return missing_dates
|
|
1134
|
+
|
|
1135
|
+
def get_data_source_file_path(self) -> Union[str, None]:
|
|
1136
|
+
"""
|
|
1137
|
+
Get the file path of the data source if stored in local storage.
|
|
1138
|
+
|
|
1139
|
+
Returns:
|
|
1140
|
+
Union[str, None]: The file path of the data source if stored
|
|
1141
|
+
locally, otherwise None.
|
|
1142
|
+
"""
|
|
1143
|
+
return self.data_file_path
|