investing-algorithm-framework 6.9.1__py3-none-any.whl → 7.19.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of investing-algorithm-framework might be problematic. Click here for more details.

Files changed (192) hide show
  1. investing_algorithm_framework/__init__.py +147 -44
  2. investing_algorithm_framework/app/__init__.py +23 -6
  3. investing_algorithm_framework/app/algorithm/algorithm.py +5 -41
  4. investing_algorithm_framework/app/algorithm/algorithm_factory.py +17 -10
  5. investing_algorithm_framework/app/analysis/__init__.py +15 -0
  6. investing_algorithm_framework/app/analysis/backtest_data_ranges.py +121 -0
  7. investing_algorithm_framework/app/analysis/backtest_utils.py +107 -0
  8. investing_algorithm_framework/app/analysis/permutation.py +116 -0
  9. investing_algorithm_framework/app/analysis/ranking.py +297 -0
  10. investing_algorithm_framework/app/app.py +1322 -707
  11. investing_algorithm_framework/app/context.py +196 -88
  12. investing_algorithm_framework/app/eventloop.py +590 -0
  13. investing_algorithm_framework/app/reporting/__init__.py +16 -5
  14. investing_algorithm_framework/app/reporting/ascii.py +57 -202
  15. investing_algorithm_framework/app/reporting/backtest_report.py +284 -170
  16. investing_algorithm_framework/app/reporting/charts/__init__.py +10 -2
  17. investing_algorithm_framework/app/reporting/charts/entry_exist_signals.py +66 -0
  18. investing_algorithm_framework/app/reporting/charts/equity_curve.py +37 -0
  19. investing_algorithm_framework/app/reporting/charts/equity_curve_drawdown.py +11 -26
  20. investing_algorithm_framework/app/reporting/charts/line_chart.py +11 -0
  21. investing_algorithm_framework/app/reporting/charts/ohlcv_data_completeness.py +51 -0
  22. investing_algorithm_framework/app/reporting/charts/rolling_sharp_ratio.py +1 -1
  23. investing_algorithm_framework/app/reporting/generate.py +100 -114
  24. investing_algorithm_framework/app/reporting/tables/key_metrics_table.py +40 -32
  25. investing_algorithm_framework/app/reporting/tables/time_metrics_table.py +34 -27
  26. investing_algorithm_framework/app/reporting/tables/trade_metrics_table.py +23 -19
  27. investing_algorithm_framework/app/reporting/tables/trades_table.py +1 -1
  28. investing_algorithm_framework/app/reporting/tables/utils.py +1 -0
  29. investing_algorithm_framework/app/reporting/templates/report_template.html.j2 +10 -16
  30. investing_algorithm_framework/app/strategy.py +315 -175
  31. investing_algorithm_framework/app/task.py +5 -3
  32. investing_algorithm_framework/cli/cli.py +30 -12
  33. investing_algorithm_framework/cli/deploy_to_aws_lambda.py +131 -34
  34. investing_algorithm_framework/cli/initialize_app.py +20 -1
  35. investing_algorithm_framework/cli/templates/app_aws_lambda_function.py.template +18 -6
  36. investing_algorithm_framework/cli/templates/aws_lambda_dockerfile.template +22 -0
  37. investing_algorithm_framework/cli/templates/aws_lambda_dockerignore.template +92 -0
  38. investing_algorithm_framework/cli/templates/aws_lambda_requirements.txt.template +2 -2
  39. investing_algorithm_framework/cli/templates/azure_function_requirements.txt.template +1 -1
  40. investing_algorithm_framework/create_app.py +3 -5
  41. investing_algorithm_framework/dependency_container.py +25 -39
  42. investing_algorithm_framework/domain/__init__.py +45 -38
  43. investing_algorithm_framework/domain/backtesting/__init__.py +21 -0
  44. investing_algorithm_framework/domain/backtesting/backtest.py +503 -0
  45. investing_algorithm_framework/domain/backtesting/backtest_date_range.py +96 -0
  46. investing_algorithm_framework/domain/backtesting/backtest_evaluation_focuss.py +242 -0
  47. investing_algorithm_framework/domain/backtesting/backtest_metrics.py +459 -0
  48. investing_algorithm_framework/domain/backtesting/backtest_permutation_test.py +275 -0
  49. investing_algorithm_framework/domain/backtesting/backtest_run.py +605 -0
  50. investing_algorithm_framework/domain/backtesting/backtest_summary_metrics.py +162 -0
  51. investing_algorithm_framework/domain/backtesting/combine_backtests.py +280 -0
  52. investing_algorithm_framework/domain/config.py +27 -0
  53. investing_algorithm_framework/domain/constants.py +6 -34
  54. investing_algorithm_framework/domain/data_provider.py +200 -56
  55. investing_algorithm_framework/domain/exceptions.py +34 -1
  56. investing_algorithm_framework/domain/models/__init__.py +10 -19
  57. investing_algorithm_framework/domain/models/base_model.py +0 -6
  58. investing_algorithm_framework/domain/models/data/__init__.py +7 -0
  59. investing_algorithm_framework/domain/models/data/data_source.py +214 -0
  60. investing_algorithm_framework/domain/models/{market_data_type.py → data/data_type.py} +7 -7
  61. investing_algorithm_framework/domain/models/market/market_credential.py +6 -0
  62. investing_algorithm_framework/domain/models/order/order.py +34 -13
  63. investing_algorithm_framework/domain/models/order/order_status.py +1 -1
  64. investing_algorithm_framework/domain/models/order/order_type.py +1 -1
  65. investing_algorithm_framework/domain/models/portfolio/portfolio.py +14 -1
  66. investing_algorithm_framework/domain/models/portfolio/portfolio_configuration.py +5 -1
  67. investing_algorithm_framework/domain/models/portfolio/portfolio_snapshot.py +51 -11
  68. investing_algorithm_framework/domain/models/position/__init__.py +2 -1
  69. investing_algorithm_framework/domain/models/position/position.py +9 -0
  70. investing_algorithm_framework/domain/models/position/position_size.py +41 -0
  71. investing_algorithm_framework/domain/models/risk_rules/__init__.py +7 -0
  72. investing_algorithm_framework/domain/models/risk_rules/stop_loss_rule.py +51 -0
  73. investing_algorithm_framework/domain/models/risk_rules/take_profit_rule.py +55 -0
  74. investing_algorithm_framework/domain/models/snapshot_interval.py +0 -1
  75. investing_algorithm_framework/domain/models/strategy_profile.py +19 -151
  76. investing_algorithm_framework/domain/models/time_frame.py +7 -0
  77. investing_algorithm_framework/domain/models/time_interval.py +33 -0
  78. investing_algorithm_framework/domain/models/time_unit.py +63 -1
  79. investing_algorithm_framework/domain/models/trade/__init__.py +0 -2
  80. investing_algorithm_framework/domain/models/trade/trade.py +56 -32
  81. investing_algorithm_framework/domain/models/trade/trade_status.py +8 -2
  82. investing_algorithm_framework/domain/models/trade/trade_stop_loss.py +106 -41
  83. investing_algorithm_framework/domain/models/trade/trade_take_profit.py +161 -99
  84. investing_algorithm_framework/domain/order_executor.py +19 -0
  85. investing_algorithm_framework/domain/portfolio_provider.py +20 -1
  86. investing_algorithm_framework/domain/services/__init__.py +0 -13
  87. investing_algorithm_framework/domain/strategy.py +1 -29
  88. investing_algorithm_framework/domain/utils/__init__.py +5 -1
  89. investing_algorithm_framework/domain/utils/custom_tqdm.py +22 -0
  90. investing_algorithm_framework/domain/utils/jupyter_notebook_detection.py +19 -0
  91. investing_algorithm_framework/domain/utils/polars.py +17 -14
  92. investing_algorithm_framework/download_data.py +40 -10
  93. investing_algorithm_framework/infrastructure/__init__.py +13 -25
  94. investing_algorithm_framework/infrastructure/data_providers/__init__.py +7 -4
  95. investing_algorithm_framework/infrastructure/data_providers/ccxt.py +811 -546
  96. investing_algorithm_framework/infrastructure/data_providers/csv.py +433 -122
  97. investing_algorithm_framework/infrastructure/data_providers/pandas.py +599 -0
  98. investing_algorithm_framework/infrastructure/database/__init__.py +6 -2
  99. investing_algorithm_framework/infrastructure/database/sql_alchemy.py +81 -0
  100. investing_algorithm_framework/infrastructure/models/__init__.py +0 -13
  101. investing_algorithm_framework/infrastructure/models/order/order.py +9 -3
  102. investing_algorithm_framework/infrastructure/models/trades/trade_stop_loss.py +27 -8
  103. investing_algorithm_framework/infrastructure/models/trades/trade_take_profit.py +21 -7
  104. investing_algorithm_framework/infrastructure/order_executors/__init__.py +2 -0
  105. investing_algorithm_framework/infrastructure/order_executors/backtest_oder_executor.py +28 -0
  106. investing_algorithm_framework/infrastructure/repositories/repository.py +16 -2
  107. investing_algorithm_framework/infrastructure/repositories/trade_repository.py +2 -2
  108. investing_algorithm_framework/infrastructure/repositories/trade_stop_loss_repository.py +6 -0
  109. investing_algorithm_framework/infrastructure/repositories/trade_take_profit_repository.py +6 -0
  110. investing_algorithm_framework/infrastructure/services/__init__.py +0 -4
  111. investing_algorithm_framework/services/__init__.py +105 -8
  112. investing_algorithm_framework/services/backtesting/backtest_service.py +536 -476
  113. investing_algorithm_framework/services/configuration_service.py +14 -4
  114. investing_algorithm_framework/services/data_providers/__init__.py +5 -0
  115. investing_algorithm_framework/services/data_providers/data_provider_service.py +850 -0
  116. investing_algorithm_framework/{app/reporting → services}/metrics/__init__.py +48 -17
  117. investing_algorithm_framework/{app/reporting → services}/metrics/drawdown.py +10 -10
  118. investing_algorithm_framework/{app/reporting → services}/metrics/equity_curve.py +2 -2
  119. investing_algorithm_framework/{app/reporting → services}/metrics/exposure.py +60 -2
  120. investing_algorithm_framework/services/metrics/generate.py +358 -0
  121. investing_algorithm_framework/{app/reporting → services}/metrics/profit_factor.py +36 -0
  122. investing_algorithm_framework/{app/reporting → services}/metrics/recovery.py +2 -2
  123. investing_algorithm_framework/{app/reporting → services}/metrics/returns.py +146 -147
  124. investing_algorithm_framework/services/metrics/risk_free_rate.py +28 -0
  125. investing_algorithm_framework/{app/reporting/metrics/sharp_ratio.py → services/metrics/sharpe_ratio.py} +6 -10
  126. investing_algorithm_framework/{app/reporting → services}/metrics/sortino_ratio.py +3 -7
  127. investing_algorithm_framework/services/metrics/trades.py +500 -0
  128. investing_algorithm_framework/services/metrics/volatility.py +97 -0
  129. investing_algorithm_framework/{app/reporting → services}/metrics/win_rate.py +70 -3
  130. investing_algorithm_framework/services/order_service/order_backtest_service.py +21 -31
  131. investing_algorithm_framework/services/order_service/order_service.py +9 -71
  132. investing_algorithm_framework/services/portfolios/portfolio_provider_lookup.py +0 -2
  133. investing_algorithm_framework/services/portfolios/portfolio_service.py +3 -13
  134. investing_algorithm_framework/services/portfolios/portfolio_snapshot_service.py +62 -96
  135. investing_algorithm_framework/services/portfolios/portfolio_sync_service.py +0 -3
  136. investing_algorithm_framework/services/repository_service.py +5 -2
  137. investing_algorithm_framework/services/trade_order_evaluator/__init__.py +9 -0
  138. investing_algorithm_framework/services/trade_order_evaluator/backtest_trade_oder_evaluator.py +113 -0
  139. investing_algorithm_framework/services/trade_order_evaluator/default_trade_order_evaluator.py +51 -0
  140. investing_algorithm_framework/services/trade_order_evaluator/trade_order_evaluator.py +80 -0
  141. investing_algorithm_framework/services/trade_service/__init__.py +7 -1
  142. investing_algorithm_framework/services/trade_service/trade_service.py +51 -29
  143. investing_algorithm_framework/services/trade_service/trade_stop_loss_service.py +39 -0
  144. investing_algorithm_framework/services/trade_service/trade_take_profit_service.py +41 -0
  145. investing_algorithm_framework-7.19.15.dist-info/METADATA +537 -0
  146. {investing_algorithm_framework-6.9.1.dist-info → investing_algorithm_framework-7.19.15.dist-info}/RECORD +159 -148
  147. investing_algorithm_framework/app/reporting/evaluation.py +0 -243
  148. investing_algorithm_framework/app/reporting/metrics/risk_free_rate.py +0 -8
  149. investing_algorithm_framework/app/reporting/metrics/volatility.py +0 -69
  150. investing_algorithm_framework/cli/templates/requirements_azure_function.txt.template +0 -3
  151. investing_algorithm_framework/domain/models/backtesting/__init__.py +0 -9
  152. investing_algorithm_framework/domain/models/backtesting/backtest_date_range.py +0 -47
  153. investing_algorithm_framework/domain/models/backtesting/backtest_position.py +0 -120
  154. investing_algorithm_framework/domain/models/backtesting/backtest_reports_evaluation.py +0 -0
  155. investing_algorithm_framework/domain/models/backtesting/backtest_results.py +0 -440
  156. investing_algorithm_framework/domain/models/data_source.py +0 -21
  157. investing_algorithm_framework/domain/models/date_range.py +0 -64
  158. investing_algorithm_framework/domain/models/trade/trade_risk_type.py +0 -34
  159. investing_algorithm_framework/domain/models/trading_data_types.py +0 -48
  160. investing_algorithm_framework/domain/models/trading_time_frame.py +0 -223
  161. investing_algorithm_framework/domain/services/market_data_sources.py +0 -543
  162. investing_algorithm_framework/domain/services/market_service.py +0 -153
  163. investing_algorithm_framework/domain/services/observable.py +0 -51
  164. investing_algorithm_framework/domain/services/observer.py +0 -19
  165. investing_algorithm_framework/infrastructure/models/market_data_sources/__init__.py +0 -16
  166. investing_algorithm_framework/infrastructure/models/market_data_sources/ccxt.py +0 -746
  167. investing_algorithm_framework/infrastructure/models/market_data_sources/csv.py +0 -270
  168. investing_algorithm_framework/infrastructure/models/market_data_sources/pandas.py +0 -312
  169. investing_algorithm_framework/infrastructure/services/market_service/__init__.py +0 -5
  170. investing_algorithm_framework/infrastructure/services/market_service/ccxt_market_service.py +0 -471
  171. investing_algorithm_framework/infrastructure/services/performance_service/__init__.py +0 -7
  172. investing_algorithm_framework/infrastructure/services/performance_service/backtest_performance_service.py +0 -2
  173. investing_algorithm_framework/infrastructure/services/performance_service/performance_service.py +0 -322
  174. investing_algorithm_framework/services/market_data_source_service/__init__.py +0 -10
  175. investing_algorithm_framework/services/market_data_source_service/backtest_market_data_source_service.py +0 -269
  176. investing_algorithm_framework/services/market_data_source_service/data_provider_service.py +0 -350
  177. investing_algorithm_framework/services/market_data_source_service/market_data_source_service.py +0 -377
  178. investing_algorithm_framework/services/strategy_orchestrator_service.py +0 -296
  179. investing_algorithm_framework-6.9.1.dist-info/METADATA +0 -440
  180. /investing_algorithm_framework/{app/reporting → services}/metrics/alpha.py +0 -0
  181. /investing_algorithm_framework/{app/reporting → services}/metrics/beta.py +0 -0
  182. /investing_algorithm_framework/{app/reporting → services}/metrics/cagr.py +0 -0
  183. /investing_algorithm_framework/{app/reporting → services}/metrics/calmar_ratio.py +0 -0
  184. /investing_algorithm_framework/{app/reporting → services}/metrics/mean_daily_return.py +0 -0
  185. /investing_algorithm_framework/{app/reporting → services}/metrics/price_efficiency.py +0 -0
  186. /investing_algorithm_framework/{app/reporting → services}/metrics/standard_deviation.py +0 -0
  187. /investing_algorithm_framework/{app/reporting → services}/metrics/treynor_ratio.py +0 -0
  188. /investing_algorithm_framework/{app/reporting → services}/metrics/ulcer.py +0 -0
  189. /investing_algorithm_framework/{app/reporting → services}/metrics/value_at_risk.py +0 -0
  190. {investing_algorithm_framework-6.9.1.dist-info → investing_algorithm_framework-7.19.15.dist-info}/LICENSE +0 -0
  191. {investing_algorithm_framework-6.9.1.dist-info → investing_algorithm_framework-7.19.15.dist-info}/WHEEL +0 -0
  192. {investing_algorithm_framework-6.9.1.dist-info → investing_algorithm_framework-7.19.15.dist-info}/entry_points.txt +0 -0
@@ -2,7 +2,7 @@ import logging
2
2
  import os.path
3
3
  from datetime import datetime, timedelta, timezone
4
4
  from time import sleep
5
- from typing import Union
5
+ from typing import Union, List
6
6
 
7
7
  import ccxt
8
8
  import pandas as pd
@@ -10,119 +10,148 @@ import polars as pl
10
10
  from dateutil import parser
11
11
 
12
12
  from investing_algorithm_framework.domain import OperationalException, \
13
- DATETIME_FORMAT, DataProvider, TradingDataType, convert_polars_to_pandas, \
14
- NetworkError, TimeFrame, MarketCredential
13
+ DATETIME_FORMAT, DataProvider, convert_polars_to_pandas, \
14
+ NetworkError, TimeFrame, MarketCredential, DataType, DataSource, \
15
+ RESOURCE_DIRECTORY, CCXT_DATETIME_FORMAT, DATA_DIRECTORY, \
16
+ DATETIME_FORMAT_FILE_NAME
15
17
 
16
18
  logger = logging.getLogger("investing_algorithm_framework")
17
19
 
18
20
 
19
- class CCXTDataProvider(DataProvider):
21
+ class CCXTOHLCVDataProvider(DataProvider):
20
22
  """
23
+ Implementation of Data Provider for OHLCV data. OHLCV data
24
+ will be downloaded with the CCXT library.
25
+
26
+ If in backtest mode, and the data is already
27
+ available in the storage path, it will be loaded from there. If the
28
+ data is not available in the storage path, it will be fetched from the
29
+ CCXT library and saved to the storage path in csv format.
30
+
31
+ If the get_data method is called with a start and end date, the
32
+ data provider will look if the data is already available in the
33
+ storage directory. If this is the case, it will read the data
34
+ from the csv file and return it.
35
+
36
+ The CSV file should contain the following
37
+ columns: Datetime, Open, High, Low, Close, Volume.
38
+ The Datetime column should be in UTC timezone and in milliseconds.
39
+ The data will be loaded into a Polars DataFrame and will be kept in memory.
40
+
41
+ Attributes:
42
+ data_type (DataType): The type of data provided by this provider,
43
+ which is OHLCV.
44
+ data_provider_identifier (str): Identifier for the CSV OHLCV data
45
+ provider.
46
+ _start_date_data_source (datetime): The start date of the data
47
+ source, determined from the first row of the data.
48
+ _end_date_data_source (datetime): The end date of the data
49
+ source, determined from the last row of the data.
50
+ data (polars.DataFrame): The OHLCV data loaded from the CSV file when
51
+ in backtest mode.
21
52
  """
22
- backtest_data_directory = None
23
- backtest_data_end_date = None
24
- total_minutes_time_frame = None
25
- column_names = ["Datetime", "Open", "High", "Low", "Close", "Volume"]
53
+ data_type = DataType.OHLCV
54
+ data_provider_identifier = "ccxt_ohlcv_data_provider"
55
+ storage_directory = None
26
56
 
27
57
  def __init__(
28
58
  self,
29
- data_type: str = None,
30
- market=None,
31
- symbol=None,
32
- time_frame=None,
59
+ symbol: str = None,
60
+ time_frame: str = None,
61
+ market: str = None,
33
62
  window_size=None,
34
- priority=1
63
+ data_provider_identifier: str = None,
64
+ storage_directory=None,
65
+ pandas: bool = False,
66
+ config=None
35
67
  ):
68
+ """
69
+ Initialize the CCXT OHLCV Data Provider.
70
+
71
+ Args:
72
+ symbol (str): The symbol for which the data is provided.
73
+ time_frame (str): The time frame for the data.
74
+ market (str, optional): The market for the data. Defaults to None.
75
+ window_size (int, optional): The window size for the data.
76
+ Defaults to None.
77
+ data_provider_identifier (str, optional): The identifier for the
78
+ data provider.
79
+ pandas (bool, optional): If True, the data will be returned
80
+ as a pandas DataFrame instead of a Polars DataFrame.
81
+ storage_directory: (str, optional): the storage directory where
82
+ the OHLCV data need to be stored.
83
+ """
84
+ if data_provider_identifier is None:
85
+ data_provider_identifier = self.data_provider_identifier
86
+
36
87
  super().__init__(
37
- data_type=data_type,
38
88
  symbol=symbol,
89
+ market=market,
39
90
  time_frame=time_frame,
40
91
  window_size=window_size,
41
- priority=priority
92
+ storage_directory=storage_directory,
93
+ data_provider_identifier=data_provider_identifier,
94
+ config=config
42
95
  )
43
-
44
- self.market = market
45
- self.data = None
46
96
  self._start_date_data_source = None
47
97
  self._end_date_data_source = None
48
- self.backtest_end_index = self.window_size
49
- self.backtest_start_index = 0
98
+ self._columns = ["Datetime", "Open", "High", "Low", "Close", "Volume"]
99
+ self.pandas = pandas
50
100
  self.window_cache = {}
101
+ self.data = None
102
+ self.total_number_of_data_points = 0
103
+ self.missing_data_point_dates = []
104
+ self.data_file_path = None
51
105
 
52
- def initialize_exchange(self, market, market_credential):
106
+ def has_data(
107
+ self,
108
+ data_source: DataSource,
109
+ start_date: datetime = None,
110
+ end_date: datetime = None
111
+ ) -> bool:
53
112
  """
54
- Initializes the exchange for the given market.
113
+ Implementation of the has_data method to check if
114
+ the data provider has data for the given data source.
115
+
116
+ If start_date and/or end_date are provided, first the
117
+ storage_directory_will be checked for existence of the data.
118
+
119
+ If nothing is found or start_date and/or end_date are not provided
120
+ the ccxt library will be directly queried.
55
121
 
56
122
  Args:
57
- market (str): The market to initialize the exchange for.
58
- market_credential (MarketCredential): MarketCredential - the market
123
+ data_source (DataSource): The data source to check.
124
+ start_date (datetime, optional): The start date for the data.
125
+ Defaults to None.
126
+ end_date (datetime, optional): The end date for the data.
127
+ Defaults to None.
59
128
 
60
129
  Returns:
61
- Instance of the exchange class.
130
+ bool: True if the data provider has data for the given data source,
131
+ False otherwise.
62
132
  """
133
+ market = data_source.market
134
+ symbol = data_source.symbol
135
+ data_type = data_source.data_type
136
+ start_date = start_date or data_source.start_date
137
+ end_date = end_date or data_source.end_date
63
138
 
64
- market = market.lower()
65
- if not hasattr(ccxt, market):
66
- raise OperationalException(
67
- f"No exchange found for market id {market}"
68
- )
69
-
70
- exchange_class = getattr(ccxt, market)
139
+ if not DataType.OHLCV.equals(data_type):
140
+ return False
71
141
 
72
- if exchange_class is None:
73
- raise OperationalException(
74
- f"No exchange found for market id {market}"
142
+ if start_date is not None and end_date is not None:
143
+ # Check if the data is available in the storage path
144
+ data = self._get_data_from_storage(
145
+ symbol=symbol,
146
+ market=market,
147
+ time_frame=data_source.time_frame,
148
+ storage_path=data_source.storage_path,
149
+ start_date=start_date,
150
+ end_date=end_date
75
151
  )
76
152
 
77
- if market_credential is not None:
78
- exchange = exchange_class({
79
- 'apiKey': market_credential.api_key,
80
- 'secret': market_credential.secret_key,
81
- })
82
- else:
83
- exchange = exchange_class({})
84
-
85
- return exchange
86
-
87
- def pre_pare_backtest_data(
88
- self,
89
- backtest_start_date,
90
- backtest_end_date,
91
- symbol: str = None,
92
- market: str = None,
93
- time_frame: str = None,
94
- window_size=None
95
- ) -> None:
96
- pass
97
-
98
- def get_backtest_data(
99
- self,
100
- date: datetime = None,
101
- symbol: str = None,
102
- market: str = None,
103
- time_frame: str = None,
104
- backtest_start_date: datetime = None,
105
- backtest_end_date: datetime = None,
106
- window_size=None,
107
- pandas=False
108
- ) -> None:
109
- pass
110
-
111
- def has_data(
112
- self,
113
- data_type: str = None,
114
- symbol: str = None,
115
- market: str = None,
116
- time_frame: str = None,
117
- start_date: datetime = None,
118
- end_date: datetime = None,
119
- window_size=None,
120
- ) -> bool:
121
-
122
- if TradingDataType.CUSTOM.equals(data_type):
123
- raise OperationalException(
124
- "Custom data type is not supported for CCXTOHLCVDataProvider"
125
- )
153
+ if data is not None:
154
+ return True
126
155
 
127
156
  if market is None:
128
157
  market = "binance"
@@ -137,289 +166,118 @@ class CCXTDataProvider(DataProvider):
137
166
  return symbol in symbols
138
167
 
139
168
  except ccxt.NetworkError:
140
- raise NetworkError(
141
- "Network error occurred, make sure you have "
142
- "an active internet connection"
143
- )
169
+ pass
144
170
 
145
- except Exception:
171
+ except Exception as e:
172
+ logger.error(e)
146
173
  return False
147
174
 
148
- def get_data(
175
+ def prepare_backtest_data(
149
176
  self,
150
- data_type: str = None,
151
- date: datetime = None,
152
- symbol: str = None,
153
- market: str = None,
154
- time_frame: str = None,
155
- start_date: datetime = None,
156
- end_date: datetime = None,
157
- storage_path=None,
158
- window_size=None,
159
- pandas=False,
160
- ):
161
-
162
- if market is None:
163
- market = self.market
164
-
165
- if market is None:
166
- raise OperationalException(
167
- "Market is not set. Please set the market "
168
- "before calling get_data."
169
- )
170
-
171
- if symbol is None:
172
- symbol = self.symbol
173
-
174
- if symbol is None:
175
- raise OperationalException(
176
- "Symbol is not set. Please set the symbol "
177
- "before calling get_data."
178
- )
179
-
180
- if data_type is None:
181
- data_type = self.data_type
182
-
183
- if TradingDataType.OHLCV.equals(data_type):
184
-
185
- if time_frame is None:
186
- time_frame = self.time_frame
187
-
188
- if time_frame is None:
189
- raise OperationalException(
190
- "Time frame is not set. Please set the time frame "
191
- "before requesting ohlcv data."
192
- )
193
-
194
- if end_date is None and window_size is None:
195
- raise OperationalException(
196
- "A window size is required or a start and end date "
197
- "to retrieve ohlcv data."
198
- )
199
-
200
- if end_date is None:
201
- end_date = datetime.now(tz=timezone.utc)
202
-
203
- if start_date is None:
204
-
205
- if date is not None:
206
- start_date = date
207
- else:
208
- start_date = self.create_start_date(
209
- end_date=end_date,
210
- time_frame=time_frame,
211
- window_size=window_size
212
- )
213
-
214
- data = self.get_ohlcv(
215
- symbol=symbol,
216
- time_frame=time_frame,
217
- from_timestamp=start_date,
218
- market=market,
219
- to_timestamp=end_date
220
- )
221
-
222
- if pandas:
223
- data = convert_polars_to_pandas(data)
224
-
225
- return data
226
-
227
- raise OperationalException(
228
- f"Data type {data_type} is not supported for CCXTDataProvider"
229
- )
230
-
231
- def get_ohlcv(
232
- self, symbol, time_frame, from_timestamp, market, to_timestamp=None
233
- ) -> pl.DataFrame:
177
+ backtest_start_date,
178
+ backtest_end_date,
179
+ ) -> None:
234
180
  """
235
- Function to retrieve ohlcv data for a symbol, time frame and market
181
+ Prepares backtest data for a given symbol and date range.
236
182
 
237
183
  Args:
238
- symbol (str): The symbol to retrieve ohlcv data for
239
- time_frame: The time frame to retrieve ohlcv data for
240
- from_timestamp: The start date to retrieve ohlcv data from
241
- market: The market to retrieve ohlcv data from
242
- to_timestamp: The end date to retrieve ohlcv data to
184
+ backtest_start_date (datetime): The start date for the
185
+ backtest data.
186
+ backtest_end_date (datetime): The end date for the
187
+ backtest data.
188
+
189
+ Raises:
190
+ OperationalException: If the backtest start date is before the
191
+ start date of the data source or if the backtest end date is
192
+ after the end date of the data source.
243
193
 
244
194
  Returns:
245
- DataFrame: The ohlcv data for the symbol, time frame and market
246
- in polars DataFrame format
195
+ None
247
196
  """
248
-
249
- market_credential = self.get_credential(market)
250
- exchange = self.initialize_exchange(market, market_credential)
251
-
252
- if from_timestamp > to_timestamp:
253
- raise OperationalException(
254
- "OHLCV data start date must be before end date"
255
- )
256
-
257
- if self.config is not None and "DATETIME_FORMAT" in self.config:
258
- datetime_format = self.config["DATETIME_FORMAT"]
197
+ # There must be at least backtest_start_date - window_size * time_frame
198
+ # data available to create a sliding window.
199
+ if self.window_size is not None:
200
+ required_start_date = backtest_start_date - \
201
+ timedelta(
202
+ minutes=TimeFrame.from_value(
203
+ self.time_frame
204
+ ).amount_of_minutes * self.window_size
205
+ )
259
206
  else:
260
- datetime_format = DATETIME_FORMAT
261
-
262
- if not exchange.has['fetchOHLCV']:
263
- raise OperationalException(
264
- f"Market service {market} does not support "
265
- f"functionality get_ohclvs"
266
- )
267
-
268
- from_time_stamp = exchange.parse8601(
269
- from_timestamp.strftime(datetime_format)
207
+ required_start_date = backtest_start_date
208
+
209
+ storage_directory_path = self.get_storage_directory()
210
+
211
+ # Check if the data source is already available in the storage path
212
+ data = self._get_data_from_storage(
213
+ symbol=self.symbol,
214
+ market=self.market,
215
+ time_frame=self.time_frame,
216
+ storage_path=storage_directory_path,
217
+ start_date=required_start_date,
218
+ end_date=backtest_end_date
270
219
  )
271
220
 
272
- if to_timestamp is None:
273
- to_timestamp = exchange.milliseconds()
274
- else:
275
- to_timestamp = exchange.parse8601(
276
- to_timestamp.strftime(datetime_format)
221
+ if data is None:
222
+ # Disable pandas if it is set to True, because logic
223
+ # depends on polars DataFrame
224
+ has_pandas_flag = self.pandas
225
+ self.pandas = False
226
+
227
+ # If the data is not available in the storage path,
228
+ # retrieve it from the CCXT data provider
229
+ data = self.get_data(
230
+ start_date=required_start_date,
231
+ end_date=backtest_end_date,
232
+ save=True,
277
233
  )
278
- data = []
279
-
280
- while from_time_stamp < to_timestamp:
281
- ohlcv = exchange.fetch_ohlcv(symbol, time_frame, from_time_stamp)
282
-
283
- if len(ohlcv) > 0:
284
- from_time_stamp = \
285
- ohlcv[-1][0] + exchange.parse_timeframe(time_frame) * 1000
286
- else:
287
- from_time_stamp = to_timestamp
288
234
 
289
- for candle in ohlcv:
290
- datetime_stamp = parser.parse(exchange.iso8601(candle[0]))
235
+ self.pandas = has_pandas_flag
291
236
 
292
- to_timestamp_datetime = parser.parse(
293
- exchange.iso8601(to_timestamp),
294
- )
295
-
296
- if datetime_stamp <= to_timestamp_datetime:
297
- datetime_stamp = datetime_stamp \
298
- .strftime(datetime_format)
299
-
300
- data.append(
301
- [datetime_stamp] +
302
- [float(value) for value in candle[1:]]
303
- )
237
+ self.data = data
238
+ self._start_date_data_source = self.data["Datetime"].min()
239
+ self._end_date_data_source = self.data["Datetime"].max()
240
+ self.total_number_of_data_points = len(self.data)
304
241
 
305
- sleep(exchange.rateLimit / 1000)
306
-
307
- # Predefined column names
308
- col_names = ["Datetime", "Open", "High", "Low", "Close", "Volume"]
242
+ if required_start_date < self._start_date_data_source:
243
+ self.number_of_missing_data_points = (
244
+ self._start_date_data_source - required_start_date
245
+ ).total_seconds() / (
246
+ TimeFrame.from_value(self.time_frame).amount_of_minutes * 60
247
+ )
309
248
 
310
- # Combine the Series into a DataFrame with given column names
311
- df = pl.DataFrame(data, schema=col_names, orient="row")
312
- return df
249
+ if self.window_size is not None:
250
+ # Create cache with sliding windows
251
+ self._precompute_sliding_windows(
252
+ data=data,
253
+ window_size=self.window_size,
254
+ time_frame=self.time_frame,
255
+ start_date=backtest_start_date,
256
+ end_date=backtest_end_date
257
+ )
313
258
 
314
- def create_start_date(self, end_date, time_frame, window_size):
315
- minutes = TimeFrame.from_value(time_frame).amount_of_minutes
316
- return end_date - timedelta(minutes=window_size * minutes)
259
+ n_min = TimeFrame.from_value(self.time_frame).amount_of_minutes
260
+ # Assume self.data is a Polars DataFrame with a "Datetime" column
261
+ expected_dates = pl.datetime_range(
262
+ start=required_start_date,
263
+ end=backtest_end_date,
264
+ interval=f"{n_min}m",
265
+ eager=True
266
+ ).to_list()
317
267
 
268
+ actual_dates = self.data["Datetime"].to_list()
318
269
 
319
- class CCXTOHLCVDataProvider(DataProvider):
320
- """
321
- CCXT OHLCV Data Provider is a data provider that uses the
322
- CCXT library to retrieve OHLCV data from various cryptocurrency
323
- markets. It supports multiple markets and symbols,
324
- """
325
- backtest_data_directory = None
326
- backtest_data_end_date = None
327
- total_minutes_time_frame = None
328
- column_names = ["Datetime", "Open", "High", "Low", "Close", "Volume"]
329
-
330
- def __init__(
331
- self,
332
- market=None,
333
- symbol=None,
334
- time_frame=None,
335
- window_size=None,
336
- priority=1
337
- ):
338
- super().__init__(
339
- data_type=TradingDataType.OHLCV.value,
340
- symbol=symbol,
341
- time_frame=time_frame,
342
- window_size=window_size,
343
- priority=priority
270
+ # Find missing dates
271
+ self.missing_data_point_dates = sorted(
272
+ set(expected_dates) - set(actual_dates)
344
273
  )
345
274
 
346
- self.market = market
347
- self.data = None
348
- self._start_date_data_source = None
349
- self._end_date_data_source = None
350
- self.backtest_end_index = self.window_size
351
- self.backtest_start_index = 0
352
- self.window_cache = {}
353
-
354
- def pre_pare_backtest_data(
355
- self,
356
- backtest_start_date,
357
- backtest_end_date,
358
- symbol: str = None,
359
- market: str = None,
360
- time_frame: str = None,
361
- window_size=None
362
- ) -> None:
363
- pass
364
-
365
- def get_backtest_data(
366
- self,
367
- date: datetime = None,
368
- symbol: str = None,
369
- market: str = None,
370
- time_frame: str = None,
371
- backtest_start_date: datetime = None,
372
- backtest_end_date: datetime = None,
373
- window_size=None,
374
- pandas=False
375
- ) -> None:
376
- pass
377
-
378
- def has_data(
379
- self,
380
- data_type: str = None,
381
- symbol: str = None,
382
- market: str = None,
383
- time_frame: str = None,
384
- start_date: datetime = None,
385
- end_date: datetime = None,
386
- window_size=None,
387
- ) -> bool:
388
-
389
- if market is None:
390
- market = "binance"
391
-
392
- # Check if ccxt has an exchange for the given market
393
- try:
394
- market = market.lower()
395
- exchange_class = getattr(ccxt, market)
396
- exchange = exchange_class()
397
- symbols = exchange.load_markets()
398
- symbols = list(symbols.keys())
399
- return symbol in symbols
400
-
401
- except ccxt.NetworkError:
402
- raise NetworkError(
403
- "Network error occurred, make sure you have "
404
- "an active internet connection"
405
- )
406
-
407
- except Exception:
408
- return False
409
-
410
275
  def get_data(
411
276
  self,
412
- data_type: str = None,
413
277
  date: datetime = None,
414
- symbol: str = None,
415
- market: str = None,
416
- time_frame: str = None,
417
278
  start_date: datetime = None,
418
279
  end_date: datetime = None,
419
- storage_path=None,
420
- window_size=None,
421
- pandas=False,
422
- save: bool = True
280
+ save: bool = False,
423
281
  ) -> Union[pl.DataFrame, pd.DataFrame]:
424
282
  """
425
283
  Function to retrieve data from the CCXT data provider.
@@ -429,127 +287,240 @@ class CCXTOHLCVDataProvider(DataProvider):
429
287
  converts the polars DataFrame to a pandas DataFrame.
430
288
 
431
289
  Args:
432
- data_type (str): The type of data to retrieve.
433
- date (datetime): The date to retrieve data for.
434
- symbol (str): The symbol to retrieve data for.
435
- market (str): The market to retrieve data from.
436
- time_frame (str): The time frame to retrieve data for.
437
- start_date (datetime): The start date to retrieve data from.
438
- end_date (datetime): The end date to retrieve data to.
439
- storage_path (str): The path to store the data.
440
- window_size (int): The size of the data window.
441
- pandas (bool): Whether to return the data as a pandas DataFrame.
442
- save (bool): Whether to save the data to the storage path.
290
+ date (datetime, optional): The date for which to retrieve the data.
291
+ start_date (datetime): The start date for the data.
292
+ end_date (datetime): The end date for the data.
293
+ save (bool): If True, the data will be saved to the storage path
294
+ if it is not already available. Defaults to False.
443
295
 
444
296
  Returns:
445
- Union[pl.DataFrame, pd.DataFrame]: The retrieved data in
446
- Polars DataFrame format, or converted to pandas DataFrame
297
+ DataFrame: The data for the given symbol and market.
447
298
  """
448
- if market is None:
449
- market = self.market
450
299
 
451
- if market is None:
300
+ if self.market is None:
452
301
  raise OperationalException(
453
302
  "Market is not set. Please set the market "
454
303
  "before calling get_data."
455
304
  )
456
305
 
457
- if symbol is None:
458
- symbol = self.symbol
459
-
460
- if symbol is None:
306
+ if self.symbol is None:
461
307
  raise OperationalException(
462
308
  "Symbol is not set. Please set the symbol "
463
309
  "before calling get_data."
464
310
  )
465
311
 
466
- if data_type is None:
467
- data_type = self.data_type
468
-
469
- if TradingDataType.OHLCV.equals(data_type):
470
-
471
- if time_frame is None:
472
- time_frame = self.time_frame
473
-
474
- if time_frame is None:
475
- raise OperationalException(
476
- "Time frame is not set. Please set the time frame "
477
- "before requesting ohlcv data."
478
- )
312
+ if self.time_frame is None:
313
+ raise OperationalException(
314
+ "Time frame is not set. Please set the time frame "
315
+ "before requesting ohlcv data."
316
+ )
479
317
 
480
- if end_date is None and window_size is None:
318
+ if date is not None and self.window_size is not None \
319
+ and self.time_frame is not None:
320
+ start_date = self.create_start_date(
321
+ end_date=date,
322
+ time_frame=self.time_frame,
323
+ window_size=self.window_size
324
+ )
325
+ end_date = date
326
+ else:
327
+ if (end_date is None and start_date is None
328
+ and self.window_size is None):
481
329
  raise OperationalException(
482
- "A window size is required or a start and end date "
330
+ "A start date or end date or window size is required "
483
331
  "to retrieve ohlcv data."
484
332
  )
485
333
 
486
- if end_date is None:
334
+ if (start_date is not None and end_date is None
335
+ and self.window_size is None):
487
336
  end_date = datetime.now(tz=timezone.utc)
488
337
 
489
- if start_date is None:
490
-
491
- if date is not None:
492
- start_date = date
493
- else:
494
- start_date = self.create_start_date(
495
- end_date=end_date,
496
- time_frame=time_frame,
497
- window_size=window_size
498
- )
338
+ if (end_date is not None and start_date is None
339
+ and self.window_size is None):
340
+ raise OperationalException(
341
+ "A window size is required when using an end date "
342
+ "to retrieve ohlcv data."
343
+ )
499
344
 
500
- # Check if the data already exists in the storage
501
- if storage_path is not None:
502
- # Here you would implement the logic to check if the data
503
- # exists in the storage path and return it if it does.
504
- # This is a placeholder for that logic.
505
- data = self.retrieve_data_from_storage(
506
- storage_path=storage_path,
507
- symbol=symbol,
508
- market=market,
509
- time_frame=time_frame,
345
+ if start_date is not None and end_date is None:
346
+ end_date = self.create_end_date(
510
347
  start_date=start_date,
511
- end_date=end_date
348
+ time_frame=self.time_frame,
349
+ window_size=self.window_size
512
350
  )
513
351
 
514
- if data is None:
515
- data = self.get_ohlcv(
516
- symbol=symbol,
517
- time_frame=time_frame,
518
- from_timestamp=start_date,
519
- market=market,
520
- to_timestamp=end_date
521
- )
522
- else:
523
- data = self.get_ohlcv(
524
- symbol=symbol,
525
- time_frame=time_frame,
526
- from_timestamp=start_date,
527
- market=market,
528
- to_timestamp=end_date
352
+ if end_date is not None and start_date is None \
353
+ and self.window_size is not None:
354
+ start_date = self.create_start_date(
355
+ end_date=end_date,
356
+ time_frame=self.time_frame,
357
+ window_size=self.window_size
529
358
  )
530
359
 
360
+ if start_date is None and end_date is None:
361
+ end_date = datetime.now(tz=timezone.utc)
362
+ start_date = self.create_start_date(
363
+ end_date=end_date,
364
+ time_frame=self.time_frame,
365
+ window_size=self.window_size
366
+ )
367
+ data = self._get_data_from_storage(
368
+ symbol=self.symbol,
369
+ market=self.market,
370
+ time_frame=self.time_frame,
371
+ storage_path=self.get_storage_directory(),
372
+ start_date=start_date,
373
+ end_date=end_date
374
+ )
375
+
376
+ if data is None:
377
+ data = self.get_ohlcv(
378
+ symbol=self.symbol,
379
+ time_frame=self.time_frame,
380
+ from_timestamp=start_date,
381
+ market=self.market,
382
+ to_timestamp=end_date
383
+ )
384
+
531
385
  if save:
532
- # Here you would implement the logic to save the data
533
- # to the specified storage path.
534
- # This is a placeholder for that logic.
386
+ storage_directory = self.get_storage_directory()
387
+
388
+ if storage_directory is None:
389
+ raise OperationalException(
390
+ "Storage directory is not set for "
391
+ "the CCXTOHLCVDataProvider. Make sure to set the "
392
+ "storage directory in the configuration or "
393
+ "in the constructor."
394
+ )
395
+
535
396
  self.save_data_to_storage(
536
- symbol=symbol,
537
- market=market,
397
+ symbol=self.symbol,
398
+ market=self.market,
399
+ time_frame=self.time_frame,
538
400
  start_date=start_date,
539
401
  end_date=end_date,
540
- time_frame=time_frame,
541
402
  data=data,
542
- storage_path=storage_path
403
+ storage_directory_path=storage_directory
543
404
  )
544
405
 
545
- if pandas:
546
- data = convert_polars_to_pandas(data)
406
+ if self.pandas:
407
+ data = convert_polars_to_pandas(data)
547
408
 
548
- return data
409
+ return data
549
410
 
550
- raise OperationalException(
551
- f"Data type {data_type} is not supported for CCXTDataProvider"
552
- )
411
+ def get_backtest_data(
412
+ self,
413
+ backtest_index_date: datetime,
414
+ backtest_start_date: datetime = None,
415
+ backtest_end_date: datetime = None,
416
+ data_source: DataSource = None
417
+ ) -> None:
418
+ """
419
+ Fetches backtest data for a given datasource
420
+
421
+ Args:
422
+ backtest_index_date (datetime): The date for which to fetch
423
+ backtest data.
424
+ backtest_start_date (datetime): The start date for the
425
+ backtest data.
426
+ backtest_end_date (datetime): The end date for the
427
+ backtest data.
428
+ data_source (Optional[Datasource]): The data source for which to
429
+ fetch backtest data. Defaults to None.
430
+
431
+ Returns:
432
+ pl.DataFrame: The backtest data for the given datasource.
433
+ """
434
+
435
+ if backtest_start_date is not None and \
436
+ backtest_end_date is not None:
437
+
438
+ if backtest_start_date < self._start_date_data_source:
439
+
440
+ if data_source is not None:
441
+ raise OperationalException(
442
+ f"Request data date {backtest_start_date} "
443
+ f"is before the range of "
444
+ f"the available data "
445
+ f"{self._start_date_data_source} "
446
+ f"- {self._end_date_data_source}."
447
+ f" for data source {data_source.identifier}."
448
+ f" Data source file path: "
449
+ f"{self.get_data_source_file_path()}"
450
+ )
451
+
452
+ raise OperationalException(
453
+ f"Request data date {backtest_start_date} "
454
+ f"is before the range of "
455
+ f"the available data "
456
+ f"{self._start_date_data_source} "
457
+ f"- {self._end_date_data_source}."
458
+ f" Data source file path: "
459
+ f"{self.get_data_source_file_path()}"
460
+ )
461
+
462
+ if backtest_end_date > self._end_date_data_source:
463
+
464
+ if data_source is not None:
465
+ raise OperationalException(
466
+ f"Request data date {backtest_end_date} "
467
+ f"is after the range of "
468
+ f"the available data "
469
+ f"{self._start_date_data_source} "
470
+ f"- {self._end_date_data_source}."
471
+ f" for data source {data_source.identifier}."
472
+ f" Data source file path: "
473
+ f"{self.get_data_source_file_path()}"
474
+ )
475
+
476
+ raise OperationalException(
477
+ f"Request data date {backtest_end_date} "
478
+ f"is after the range of "
479
+ f"the available data "
480
+ f"{self._start_date_data_source} "
481
+ f"- {self._end_date_data_source}."
482
+ f" Data source file path: "
483
+ f"{self.get_data_source_file_path()}"
484
+ )
485
+
486
+ data = self.data.filter(
487
+ (pl.col("Datetime") >= backtest_start_date) &
488
+ (pl.col("Datetime") <= backtest_end_date)
489
+ )
490
+ else:
491
+ try:
492
+ data = self.window_cache[backtest_index_date]
493
+ except KeyError:
494
+
495
+ try:
496
+ # Return the key in the cache that is closest to the
497
+ # backtest_index_date but not after it.
498
+ closest_key = min(
499
+ [k for k in self.window_cache.keys()
500
+ if k >= backtest_index_date]
501
+ )
502
+ data = self.window_cache[closest_key]
503
+ except ValueError:
504
+
505
+ if data_source is not None:
506
+ raise OperationalException(
507
+ "No OHLCV data available for the "
508
+ f"date: {backtest_index_date} "
509
+ f"within the prepared backtest data "
510
+ f"for data source {data_source.identifier}. "
511
+ )
512
+
513
+ raise OperationalException(
514
+ "No OHLCV data available for the "
515
+ f"date: {backtest_index_date} "
516
+ f"within the prepared backtest data "
517
+ f"for symbol {self.symbol}. "
518
+ )
519
+
520
+ if self.pandas:
521
+ data = convert_polars_to_pandas(data)
522
+
523
+ return data
553
524
 
554
525
  def get_ohlcv(
555
526
  self, symbol, time_frame, from_timestamp, market, to_timestamp=None
@@ -566,21 +537,22 @@ class CCXTOHLCVDataProvider(DataProvider):
566
537
 
567
538
  Returns:
568
539
  DataFrame: The ohlcv data for the symbol, time frame and market
569
- in polars DataFrame format
540
+ in polars DataFrame format
570
541
  """
571
542
  symbol = symbol.upper()
572
543
  market_credential = self.get_credential(market)
573
544
  exchange = self.initialize_exchange(market, market_credential)
545
+ time_frame = time_frame.value
574
546
 
575
547
  if from_timestamp > to_timestamp:
576
548
  raise OperationalException(
577
549
  "OHLCV data start date must be before end date"
578
550
  )
579
551
 
580
- if self.config is not None and "DATETIME_FORMAT" in self.config:
581
- datetime_format = self.config["DATETIME_FORMAT"]
552
+ if self.config is not None and DATETIME_FORMAT in self.config:
553
+ datetime_format = self.config[DATETIME_FORMAT]
582
554
  else:
583
- datetime_format = DATETIME_FORMAT
555
+ datetime_format = CCXT_DATETIME_FORMAT
584
556
 
585
557
  if not exchange.has['fetchOHLCV']:
586
558
  raise OperationalException(
@@ -588,7 +560,7 @@ class CCXTOHLCVDataProvider(DataProvider):
588
560
  f"functionality get_ohclvs"
589
561
  )
590
562
 
591
- from_time_stamp = exchange.parse8601(
563
+ from_timestamp = exchange.parse8601(
592
564
  from_timestamp.strftime(datetime_format)
593
565
  )
594
566
 
@@ -600,67 +572,62 @@ class CCXTOHLCVDataProvider(DataProvider):
600
572
  )
601
573
  data = []
602
574
 
603
- while from_time_stamp < to_timestamp:
604
- ohlcv = exchange.fetch_ohlcv(symbol, time_frame, from_time_stamp)
575
+ try:
576
+ while from_timestamp < to_timestamp:
577
+ ohlcv = exchange.fetch_ohlcv(
578
+ symbol, time_frame, from_timestamp
579
+ )
605
580
 
606
- if len(ohlcv) > 0:
607
- from_time_stamp = \
608
- ohlcv[-1][0] + exchange.parse_timeframe(time_frame) * 1000
609
- else:
610
- from_time_stamp = to_timestamp
581
+ if len(ohlcv) > 0:
582
+ from_timestamp = \
583
+ ohlcv[-1][0] + \
584
+ exchange.parse_timeframe(time_frame) * 1000
585
+ else:
586
+ from_timestamp = to_timestamp
611
587
 
612
- for candle in ohlcv:
613
- datetime_stamp = parser.parse(exchange.iso8601(candle[0]))
588
+ for candle in ohlcv:
589
+ datetime_stamp = parser.parse(exchange.iso8601(candle[0]))
614
590
 
615
- to_timestamp_datetime = parser.parse(
616
- exchange.iso8601(to_timestamp),
617
- )
591
+ to_timestamp_datetime = parser.parse(
592
+ exchange.iso8601(to_timestamp),
593
+ )
618
594
 
619
- if datetime_stamp <= to_timestamp_datetime:
620
- datetime_stamp = datetime_stamp \
621
- .strftime(datetime_format)
595
+ if datetime_stamp <= to_timestamp_datetime:
596
+ datetime_stamp = datetime_stamp \
597
+ .strftime(datetime_format)
622
598
 
623
- data.append(
624
- [datetime_stamp] +
625
- [float(value) for value in candle[1:]]
626
- )
599
+ data.append(
600
+ [datetime_stamp] +
601
+ [float(value) for value in candle[1:]]
602
+ )
627
603
 
628
- sleep(exchange.rateLimit / 1000)
604
+ sleep(exchange.rateLimit / 1000)
605
+ except ccxt.NetworkError as e:
606
+ logger.error(
607
+ f"Network error occurred while fetching OHLCV data for "
608
+ f"{symbol} on {market} with time frame {time_frame}: {e}"
609
+ )
610
+ raise NetworkError(
611
+ "Network error occurred, make sure you have an active "
612
+ "internet connection"
613
+ )
629
614
 
630
615
  # Predefined column names
631
616
  col_names = ["Datetime", "Open", "High", "Low", "Close", "Volume"]
632
617
 
633
618
  # Combine the Series into a DataFrame with given column names
634
- df = pl.DataFrame(data, schema=col_names, orient="row")
619
+ df = pl.DataFrame(data, schema=col_names, orient="row").with_columns(
620
+ pl.col("Datetime").str.to_datetime(time_unit="ms", time_zone="UTC")
621
+ )
635
622
  return df
636
623
 
637
624
  def create_start_date(self, end_date, time_frame, window_size):
638
625
  minutes = TimeFrame.from_value(time_frame).amount_of_minutes
639
626
  return end_date - timedelta(minutes=window_size * minutes)
640
627
 
641
- def supports(self, market, symbol):
642
- """
643
- Function to check if the data provider supports
644
- the given market and symbol.
645
-
646
- Args:
647
- market (str): The market to check
648
- symbol (str): The symbol to check
649
-
650
- Returns:
651
- bool: True if the data provider supports the market and symbol,
652
- False otherwise
653
- """
654
- try:
655
- exchange_class = getattr(ccxt, market.lower())
656
- exchange = exchange_class()
657
- symbols = exchange.load_markets()
658
- return symbol.upper() in symbols
659
- except Exception as e:
660
- logger.error(
661
- f"Error checking support for {market} and {symbol}: {e}"
662
- )
663
- return False
628
+ def create_end_date(self, start_date, time_frame, window_size):
629
+ minutes = TimeFrame.from_value(time_frame).amount_of_minutes
630
+ return start_date + timedelta(minutes=window_size * minutes)
664
631
 
665
632
  @staticmethod
666
633
  def initialize_exchange(market, market_credential):
@@ -673,7 +640,7 @@ class CCXTOHLCVDataProvider(DataProvider):
673
640
  for the exchange
674
641
 
675
642
  Returns:
676
-
643
+ Exchange: CCXT exchange client
677
644
  """
678
645
  market = market.lower()
679
646
 
@@ -742,93 +709,48 @@ class CCXTOHLCVDataProvider(DataProvider):
742
709
  f" named as {market.upper()}_SECRET_KEY"
743
710
  )
744
711
 
745
- def retrieve_data_from_storage(
746
- self,
747
- storage_path: str,
748
- symbol: str = None,
749
- market: str = None,
750
- time_frame: str = None,
751
- start_date: datetime = None,
752
- end_date: datetime = None
753
- ) -> pl.DataFrame | None:
754
- """
755
- Function to retrieve data from the storage path.
756
-
757
- Args:
758
- storage_path (str): The path to the storage.
759
- symbol (str): The symbol to retrieve data for.
760
- market (str): The market to retrieve data from.
761
- time_frame (str): The time frame to retrieve data for.
762
- start_date (datetime): The start date to retrieve data from.
763
- end_date (datetime): The end date to retrieve data to.
764
-
765
- Returns:
766
- pl.DataFrame: The retrieved data in Polars DataFrame format.
767
- """
768
-
769
- if not os.path.isdir(storage_path):
770
- return None
771
-
772
- file_name = self._create_filename(
773
- symbol=symbol,
774
- market=market,
775
- time_frame=time_frame,
776
- start_date=start_date,
777
- end_date=end_date
778
- )
779
-
780
- file_path = os.path.join(storage_path, file_name)
781
-
782
- if os.path.exists(file_path):
783
- try:
784
- data = pl.read_csv(file_path, has_header=True)
785
- return data
786
- except Exception as e:
787
- logger.error(
788
- f"Error reading data from {file_path}: {e}"
789
- )
790
- return None
791
-
792
- return None
793
-
794
712
  def save_data_to_storage(
795
713
  self,
796
- symbol,
797
- market,
714
+ symbol: str,
715
+ market: str,
716
+ time_frame: TimeFrame,
798
717
  start_date: datetime,
799
718
  end_date: datetime,
800
- time_frame: str,
801
719
  data: pl.DataFrame,
802
- storage_path: str,
720
+ storage_directory_path: str,
803
721
  ):
804
722
  """
805
723
  Function to save data to the storage path.
806
724
 
807
725
  Args:
726
+ symbol (str): The symbol for which the data is saved.
727
+ market (str): The market for which the data is saved.
728
+ time_frame (TimeFrame): The time frame for which the data is saved.
808
729
  data (pl.DataFrame): The data to save.
809
- storage_path (str): The path to the storage.
730
+ storage_directory_path (str): The path to the storage directory.
731
+ start_date (datetime): The start date for the data.
732
+ end_date (datetime): The end date for the data.
810
733
 
811
734
  Returns:
812
735
  None
813
736
  """
814
- if storage_path is None:
737
+ if storage_directory_path is None:
815
738
  raise OperationalException(
816
739
  "Storage path is not set. Please set the storage path "
817
740
  "before saving data."
818
741
  )
819
742
 
820
- if not os.path.isdir(storage_path):
821
- os.makedirs(storage_path)
743
+ if not os.path.isdir(storage_directory_path):
744
+ os.makedirs(storage_directory_path)
822
745
 
823
- symbol = symbol.upper().replace('/', '_')
824
746
  filename = self._create_filename(
825
747
  symbol=symbol,
826
748
  market=market,
827
- time_frame=time_frame,
749
+ time_frame=time_frame.value,
828
750
  start_date=start_date,
829
751
  end_date=end_date
830
752
  )
831
- storage_path = os.path.join(storage_path, filename)
753
+ storage_path = os.path.join(storage_directory_path, filename)
832
754
  if os.path.exists(storage_path):
833
755
  os.remove(storage_path)
834
756
 
@@ -839,15 +761,8 @@ class CCXTOHLCVDataProvider(DataProvider):
839
761
 
840
762
  data.write_csv(storage_path)
841
763
 
842
- def __repr__(self):
843
- return (
844
- f"CCXTOHLCVDataProvider(market={self.market}, "
845
- f"symbol={self.symbol}, time_frame={self.time_frame}, "
846
- f"window_size={self.window_size})"
847
- )
848
-
849
- @staticmethod
850
764
  def _create_filename(
765
+ self,
851
766
  symbol: str,
852
767
  market: str,
853
768
  time_frame: str,
@@ -868,11 +783,361 @@ class CCXTOHLCVDataProvider(DataProvider):
868
783
  Returns:
869
784
  str: The generated filename.
870
785
  """
871
- symbol = symbol.upper().replace('/', '_')
872
- start_date_str = start_date.strftime('%Y%m%d%H')
873
- end_date_str = end_date.strftime('%Y%m%d%H')
786
+ datetime_format = self.config[DATETIME_FORMAT_FILE_NAME]
787
+ symbol = symbol.upper().replace('/', '-')
788
+ start_date_str = start_date.strftime(datetime_format)
789
+ end_date_str = end_date.strftime(datetime_format)
874
790
  filename = (
875
- f"{symbol}_{market}_{time_frame}_{start_date_str}_"
791
+ f"OHLCV_{symbol}_{market.upper()}_{time_frame}_{start_date_str}_"
876
792
  f"{end_date_str}.csv"
877
793
  )
878
794
  return filename
795
+
796
+ def _get_data_from_storage(
797
+ self,
798
+ storage_path,
799
+ symbol: str,
800
+ market: str,
801
+ time_frame: TimeFrame,
802
+ start_date: datetime,
803
+ end_date: datetime,
804
+ ) -> Union[pl.DataFrame, None]:
805
+ """
806
+ Helper function to retrieve the data from the storage path if
807
+ it exists. If the data does not exist, it returns None.
808
+ """
809
+ data = None
810
+ if storage_path is None:
811
+ return None
812
+
813
+ # Loop through all files in the data storage path
814
+ if not os.path.isdir(storage_path):
815
+ logger.error(
816
+ f"Storage path {storage_path} does not exist or is not a "
817
+ "directory."
818
+ )
819
+ return None
820
+
821
+ for file_name in os.listdir(storage_path):
822
+ if file_name.startswith("OHLCV_") and file_name.endswith(".csv"):
823
+
824
+ try:
825
+ data_source_spec = self.\
826
+ _get_data_source_specification_from_file_name(
827
+ file_name
828
+ )
829
+
830
+ if data_source_spec is None:
831
+ continue
832
+
833
+ if data_source_spec.symbol.upper() == symbol.upper() and \
834
+ data_source_spec.market.upper() == market.upper() and \
835
+ data_source_spec.time_frame.equals(time_frame):
836
+
837
+ # Check if the data source specification matches
838
+ # the start and end date if its specified
839
+ if (data_source_spec.start_date is not None and
840
+ data_source_spec.end_date is not None and
841
+ (data_source_spec.start_date <= start_date
842
+ and data_source_spec.end_date >= end_date)):
843
+
844
+ # If the data source specification matches,
845
+ # read the file
846
+ file_path = os.path.join(storage_path, file_name)
847
+ self.data_file_path = file_path
848
+
849
+ # Read CSV as-is first
850
+ data = pl.read_csv(file_path, low_memory=True)
851
+
852
+ # Check what columns we have
853
+ if "Datetime" in data.columns:
854
+ # Try to parse the datetime column
855
+ try:
856
+ # Try the ISO format with timezone first
857
+ data = data.with_columns(
858
+ pl.col("Datetime").str.to_datetime(
859
+ format="%Y-%m-%dT%H:%M:%S%.f%z",
860
+ time_zone="UTC"
861
+ )
862
+ )
863
+ except Exception as e1:
864
+ try:
865
+ # Fallback: let Polars infer the format
866
+ data = data.with_columns(
867
+ pl.col("Datetime").str.to_datetime(
868
+ time_zone="UTC"
869
+ )
870
+ )
871
+ except Exception as e2:
872
+ logger.warning(
873
+ f"Could not parse Datetime "
874
+ f"column in {file_name}: "
875
+ f"Format error: {str(e1)}, "
876
+ f"Infer error: {str(e2)}"
877
+ )
878
+ continue
879
+ else:
880
+ logger.warning(
881
+ f"No 'Datetime' column "
882
+ f"found in {file_name}. "
883
+ f"Available columns: {data.columns}"
884
+ )
885
+ continue
886
+
887
+ # Filter by date range
888
+ data = data.filter(
889
+ (pl.col("Datetime") >= start_date) &
890
+ (pl.col("Datetime") <= end_date)
891
+ )
892
+ break
893
+
894
+ except Exception as e:
895
+ logger.warning(
896
+ f"Error reading data from {file_name}: {str(e)}"
897
+ )
898
+ continue
899
+
900
+ return data
901
+
902
+ def _get_data_source_specification_from_file_name(
903
+ self, file_name: str
904
+ ) -> Union[DataSource, None]:
905
+ """
906
+ Extracts the data source specification from the OHLCV data filename.
907
+ Given that the file name is in the format:
908
+
909
+ "OHLCV_<SYMBOL>_<MARKET>_<TIME_FRAME>_<START_DATE>_<END_DATE>.csv",
910
+ this function extracts all attributes and returns a DataSource object.
911
+ This object can then later be used to compare it to the datasource
912
+ object that is passed to the get_data method.
913
+
914
+ Args:
915
+ file_name (str): The file name from which to extract the DataSource
916
+
917
+ Returns:
918
+ DataSource: The extracted data source specification.
919
+ """
920
+
921
+ try:
922
+ parts = file_name.split('_')
923
+
924
+ if len(parts) < 3:
925
+ return None
926
+
927
+ data_type = parts[0].upper()
928
+ symbol = parts[1].upper().replace('-', '/')
929
+ market = parts[2].upper()
930
+ time_frame_str = parts[3]
931
+ start_date_str = parts[4]
932
+ end_date_str = parts[5].replace('.csv', '')
933
+ return DataSource(
934
+ data_type=DataType.from_string(data_type),
935
+ symbol=symbol,
936
+ market=market,
937
+ time_frame=TimeFrame.from_string(time_frame_str),
938
+ start_date=parser.parse(
939
+ start_date_str
940
+ ).replace(tzinfo=timezone.utc),
941
+ end_date=parser.parse(
942
+ end_date_str
943
+ ).replace(tzinfo=timezone.utc)
944
+ )
945
+ except ValueError:
946
+ logger.info(
947
+ f"Could not extract data source attributes from "
948
+ f"file name: {file_name}. "
949
+ f"Expected format 'OHLCV_<SYMBOL>_<MARKET>_<TIME_FRAME>_"
950
+ f"<START_DATE>_<END_DATE>.csv."
951
+ )
952
+ return None
953
+
954
+ def _precompute_sliding_windows(
955
+ self,
956
+ data,
957
+ window_size: int,
958
+ time_frame: TimeFrame,
959
+ start_date: datetime,
960
+ end_date: datetime
961
+ ) -> None:
962
+ """
963
+ Precompute all sliding windows for fast retrieval in backtest mode.
964
+
965
+ A sliding window is calculated as a subset of the data. It will
966
+ take for each timestamp in the data a window of size `window_size`
967
+ and stores it in a cache with the last timestamp of the window.
968
+
969
+ So if the window size is 200, the first window will be
970
+ the first 200 rows of the data, the second window will be
971
+ the rows 1 to 200, the third window will be the rows
972
+ 2 to 201, and so on until the last window which will be
973
+ the last 200 rows of the data.
974
+
975
+ Args:
976
+ data (pl.DataFrame): The data to precompute the sliding
977
+ windows for.
978
+ window_size (int): The size of the sliding window to precompute.
979
+ start_date (datetime, optional): The start date for the sliding
980
+ windows.
981
+ end_date (datetime, optional): The end date for the sliding
982
+ windows.
983
+
984
+ Returns:
985
+ None
986
+ """
987
+ self.window_cache = {}
988
+ timestamps = data["Datetime"].to_list()
989
+ # Only select the entries after the start date
990
+ timestamps = [
991
+ ts for ts in timestamps if start_date <= ts <= end_date
992
+ ]
993
+
994
+ # Create sliding windows of size <window_size> for each timestamp
995
+ # in the data with the given the time frame and window size
996
+ for timestamp in timestamps:
997
+ # Use timestamp as key
998
+ self.window_cache[timestamp] = data.filter(
999
+ (data["Datetime"] <= timestamp) &
1000
+ (data["Datetime"] >= timestamp - timedelta(
1001
+ minutes=time_frame.amount_of_minutes * window_size
1002
+ ))
1003
+ )
1004
+
1005
+ # Make sure the end datetime of the backtest is included in the
1006
+ # sliding windows cache
1007
+ if end_date not in self.window_cache:
1008
+ self.window_cache[end_date] = data[-window_size:]
1009
+
1010
+ def get_storage_directory(self) -> Union[str, None]:
1011
+ """
1012
+ Get the storage directory for the OHLCV data provider.
1013
+
1014
+ Returns:
1015
+ Union[str, None]: The storage directory path if set,
1016
+ otherwise None.
1017
+ """
1018
+
1019
+ if self.storage_directory is not None:
1020
+ return self.storage_directory
1021
+
1022
+ if self.config is not None:
1023
+ resource_directory = self.config.get(RESOURCE_DIRECTORY)
1024
+ data_directory_name = self.config.get(DATA_DIRECTORY)
1025
+ return os.path.join(resource_directory, data_directory_name)
1026
+
1027
+ return None
1028
+
1029
+ def copy(self, data_source) -> "CCXTOHLCVDataProvider":
1030
+ """
1031
+ Returns a copy of the CCXTOHLCVDataProvider instance based on a
1032
+ given data source. The data source is previously matched
1033
+ with the 'has_data' method. Then a new instance of the data
1034
+ provider must be registered in the framework so that each
1035
+ data source has its own instance of the data provider.
1036
+
1037
+ Args:
1038
+ data_source (DataSource): The data source specification that
1039
+ matches a data provider.
1040
+
1041
+ Returns:
1042
+ DataProvider: A new instance of the data provider with the same
1043
+ configuration.
1044
+ """
1045
+ # Check that the data source has the required attributes set
1046
+ # for usage with CCXT data providers
1047
+
1048
+ if data_source.market is None or data_source.market == "":
1049
+ raise OperationalException(
1050
+ "DataSource has not `market` attribute specified, "
1051
+ "please specify the market attribute in the "
1052
+ "data source specification before using the "
1053
+ "ccxt OHLCV data provider"
1054
+ )
1055
+
1056
+ if data_source.time_frame is None or data_source.time_frame == "":
1057
+ raise OperationalException(
1058
+ "DataSource has not `time_frame` attribute specified, "
1059
+ "please specify the time_frame attribute in the "
1060
+ "data source specification before using the "
1061
+ "ccxt OHLCV data provider"
1062
+ )
1063
+
1064
+ if data_source.symbol is None or data_source.symbol == "":
1065
+ raise OperationalException(
1066
+ "DataSource has not `symbol` attribute specified, "
1067
+ "please specify the symbol attribute in the "
1068
+ "data source specification before using the "
1069
+ "ccxt OHLCV data provider"
1070
+ )
1071
+
1072
+ storage_path = data_source.storage_path
1073
+
1074
+ if storage_path is None:
1075
+ storage_path = self.get_storage_directory()
1076
+
1077
+ return CCXTOHLCVDataProvider(
1078
+ symbol=data_source.symbol,
1079
+ time_frame=data_source.time_frame,
1080
+ market=data_source.market,
1081
+ window_size=data_source.window_size,
1082
+ data_provider_identifier=data_source.data_provider_identifier,
1083
+ storage_directory=storage_path,
1084
+ config=self.config,
1085
+ pandas=data_source.pandas,
1086
+ )
1087
+
1088
+ def get_number_of_data_points(
1089
+ self,
1090
+ start_date: datetime,
1091
+ end_date: datetime
1092
+ ) -> int:
1093
+
1094
+ """
1095
+ Returns the number of data points available between the given
1096
+ start and end dates.
1097
+
1098
+ Args:
1099
+ start_date (datetime): The start date for checking missing data.
1100
+ end_date (datetime): The end date for checking missing data.
1101
+
1102
+ Returns:
1103
+ int: The number of available data points between the given
1104
+ start and end dates.
1105
+ """
1106
+ available_dates = [
1107
+ date for date in self.data["Datetime"].to_list()
1108
+ if start_date <= date <= end_date
1109
+ ]
1110
+ return len(available_dates)
1111
+
1112
+ def get_missing_data_dates(
1113
+ self,
1114
+ start_date: datetime,
1115
+ end_date: datetime,
1116
+ ) -> List[datetime]:
1117
+ """
1118
+ Returns a list of dates for which data is missing between the
1119
+ given start and end dates.
1120
+
1121
+ Args:
1122
+ start_date (datetime): The start date for checking missing data.
1123
+ end_date (datetime): The end date for checking missing data.
1124
+
1125
+ Returns:
1126
+ List[datetime]: A list of dates for which data is missing
1127
+ between the given start and end dates.
1128
+ """
1129
+ missing_dates = [
1130
+ date for date in self.missing_data_point_dates
1131
+ if start_date <= date <= end_date
1132
+ ]
1133
+ return missing_dates
1134
+
1135
+ def get_data_source_file_path(self) -> Union[str, None]:
1136
+ """
1137
+ Get the file path of the data source if stored in local storage.
1138
+
1139
+ Returns:
1140
+ Union[str, None]: The file path of the data source if stored
1141
+ locally, otherwise None.
1142
+ """
1143
+ return self.data_file_path