investing-algorithm-framework 6.9.1__py3-none-any.whl → 7.19.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of investing-algorithm-framework might be problematic. Click here for more details.

Files changed (192) hide show
  1. investing_algorithm_framework/__init__.py +147 -44
  2. investing_algorithm_framework/app/__init__.py +23 -6
  3. investing_algorithm_framework/app/algorithm/algorithm.py +5 -41
  4. investing_algorithm_framework/app/algorithm/algorithm_factory.py +17 -10
  5. investing_algorithm_framework/app/analysis/__init__.py +15 -0
  6. investing_algorithm_framework/app/analysis/backtest_data_ranges.py +121 -0
  7. investing_algorithm_framework/app/analysis/backtest_utils.py +107 -0
  8. investing_algorithm_framework/app/analysis/permutation.py +116 -0
  9. investing_algorithm_framework/app/analysis/ranking.py +297 -0
  10. investing_algorithm_framework/app/app.py +1322 -707
  11. investing_algorithm_framework/app/context.py +196 -88
  12. investing_algorithm_framework/app/eventloop.py +590 -0
  13. investing_algorithm_framework/app/reporting/__init__.py +16 -5
  14. investing_algorithm_framework/app/reporting/ascii.py +57 -202
  15. investing_algorithm_framework/app/reporting/backtest_report.py +284 -170
  16. investing_algorithm_framework/app/reporting/charts/__init__.py +10 -2
  17. investing_algorithm_framework/app/reporting/charts/entry_exist_signals.py +66 -0
  18. investing_algorithm_framework/app/reporting/charts/equity_curve.py +37 -0
  19. investing_algorithm_framework/app/reporting/charts/equity_curve_drawdown.py +11 -26
  20. investing_algorithm_framework/app/reporting/charts/line_chart.py +11 -0
  21. investing_algorithm_framework/app/reporting/charts/ohlcv_data_completeness.py +51 -0
  22. investing_algorithm_framework/app/reporting/charts/rolling_sharp_ratio.py +1 -1
  23. investing_algorithm_framework/app/reporting/generate.py +100 -114
  24. investing_algorithm_framework/app/reporting/tables/key_metrics_table.py +40 -32
  25. investing_algorithm_framework/app/reporting/tables/time_metrics_table.py +34 -27
  26. investing_algorithm_framework/app/reporting/tables/trade_metrics_table.py +23 -19
  27. investing_algorithm_framework/app/reporting/tables/trades_table.py +1 -1
  28. investing_algorithm_framework/app/reporting/tables/utils.py +1 -0
  29. investing_algorithm_framework/app/reporting/templates/report_template.html.j2 +10 -16
  30. investing_algorithm_framework/app/strategy.py +315 -175
  31. investing_algorithm_framework/app/task.py +5 -3
  32. investing_algorithm_framework/cli/cli.py +30 -12
  33. investing_algorithm_framework/cli/deploy_to_aws_lambda.py +131 -34
  34. investing_algorithm_framework/cli/initialize_app.py +20 -1
  35. investing_algorithm_framework/cli/templates/app_aws_lambda_function.py.template +18 -6
  36. investing_algorithm_framework/cli/templates/aws_lambda_dockerfile.template +22 -0
  37. investing_algorithm_framework/cli/templates/aws_lambda_dockerignore.template +92 -0
  38. investing_algorithm_framework/cli/templates/aws_lambda_requirements.txt.template +2 -2
  39. investing_algorithm_framework/cli/templates/azure_function_requirements.txt.template +1 -1
  40. investing_algorithm_framework/create_app.py +3 -5
  41. investing_algorithm_framework/dependency_container.py +25 -39
  42. investing_algorithm_framework/domain/__init__.py +45 -38
  43. investing_algorithm_framework/domain/backtesting/__init__.py +21 -0
  44. investing_algorithm_framework/domain/backtesting/backtest.py +503 -0
  45. investing_algorithm_framework/domain/backtesting/backtest_date_range.py +96 -0
  46. investing_algorithm_framework/domain/backtesting/backtest_evaluation_focuss.py +242 -0
  47. investing_algorithm_framework/domain/backtesting/backtest_metrics.py +459 -0
  48. investing_algorithm_framework/domain/backtesting/backtest_permutation_test.py +275 -0
  49. investing_algorithm_framework/domain/backtesting/backtest_run.py +605 -0
  50. investing_algorithm_framework/domain/backtesting/backtest_summary_metrics.py +162 -0
  51. investing_algorithm_framework/domain/backtesting/combine_backtests.py +280 -0
  52. investing_algorithm_framework/domain/config.py +27 -0
  53. investing_algorithm_framework/domain/constants.py +6 -34
  54. investing_algorithm_framework/domain/data_provider.py +200 -56
  55. investing_algorithm_framework/domain/exceptions.py +34 -1
  56. investing_algorithm_framework/domain/models/__init__.py +10 -19
  57. investing_algorithm_framework/domain/models/base_model.py +0 -6
  58. investing_algorithm_framework/domain/models/data/__init__.py +7 -0
  59. investing_algorithm_framework/domain/models/data/data_source.py +214 -0
  60. investing_algorithm_framework/domain/models/{market_data_type.py → data/data_type.py} +7 -7
  61. investing_algorithm_framework/domain/models/market/market_credential.py +6 -0
  62. investing_algorithm_framework/domain/models/order/order.py +34 -13
  63. investing_algorithm_framework/domain/models/order/order_status.py +1 -1
  64. investing_algorithm_framework/domain/models/order/order_type.py +1 -1
  65. investing_algorithm_framework/domain/models/portfolio/portfolio.py +14 -1
  66. investing_algorithm_framework/domain/models/portfolio/portfolio_configuration.py +5 -1
  67. investing_algorithm_framework/domain/models/portfolio/portfolio_snapshot.py +51 -11
  68. investing_algorithm_framework/domain/models/position/__init__.py +2 -1
  69. investing_algorithm_framework/domain/models/position/position.py +9 -0
  70. investing_algorithm_framework/domain/models/position/position_size.py +41 -0
  71. investing_algorithm_framework/domain/models/risk_rules/__init__.py +7 -0
  72. investing_algorithm_framework/domain/models/risk_rules/stop_loss_rule.py +51 -0
  73. investing_algorithm_framework/domain/models/risk_rules/take_profit_rule.py +55 -0
  74. investing_algorithm_framework/domain/models/snapshot_interval.py +0 -1
  75. investing_algorithm_framework/domain/models/strategy_profile.py +19 -151
  76. investing_algorithm_framework/domain/models/time_frame.py +7 -0
  77. investing_algorithm_framework/domain/models/time_interval.py +33 -0
  78. investing_algorithm_framework/domain/models/time_unit.py +63 -1
  79. investing_algorithm_framework/domain/models/trade/__init__.py +0 -2
  80. investing_algorithm_framework/domain/models/trade/trade.py +56 -32
  81. investing_algorithm_framework/domain/models/trade/trade_status.py +8 -2
  82. investing_algorithm_framework/domain/models/trade/trade_stop_loss.py +106 -41
  83. investing_algorithm_framework/domain/models/trade/trade_take_profit.py +161 -99
  84. investing_algorithm_framework/domain/order_executor.py +19 -0
  85. investing_algorithm_framework/domain/portfolio_provider.py +20 -1
  86. investing_algorithm_framework/domain/services/__init__.py +0 -13
  87. investing_algorithm_framework/domain/strategy.py +1 -29
  88. investing_algorithm_framework/domain/utils/__init__.py +5 -1
  89. investing_algorithm_framework/domain/utils/custom_tqdm.py +22 -0
  90. investing_algorithm_framework/domain/utils/jupyter_notebook_detection.py +19 -0
  91. investing_algorithm_framework/domain/utils/polars.py +17 -14
  92. investing_algorithm_framework/download_data.py +40 -10
  93. investing_algorithm_framework/infrastructure/__init__.py +13 -25
  94. investing_algorithm_framework/infrastructure/data_providers/__init__.py +7 -4
  95. investing_algorithm_framework/infrastructure/data_providers/ccxt.py +811 -546
  96. investing_algorithm_framework/infrastructure/data_providers/csv.py +433 -122
  97. investing_algorithm_framework/infrastructure/data_providers/pandas.py +599 -0
  98. investing_algorithm_framework/infrastructure/database/__init__.py +6 -2
  99. investing_algorithm_framework/infrastructure/database/sql_alchemy.py +81 -0
  100. investing_algorithm_framework/infrastructure/models/__init__.py +0 -13
  101. investing_algorithm_framework/infrastructure/models/order/order.py +9 -3
  102. investing_algorithm_framework/infrastructure/models/trades/trade_stop_loss.py +27 -8
  103. investing_algorithm_framework/infrastructure/models/trades/trade_take_profit.py +21 -7
  104. investing_algorithm_framework/infrastructure/order_executors/__init__.py +2 -0
  105. investing_algorithm_framework/infrastructure/order_executors/backtest_oder_executor.py +28 -0
  106. investing_algorithm_framework/infrastructure/repositories/repository.py +16 -2
  107. investing_algorithm_framework/infrastructure/repositories/trade_repository.py +2 -2
  108. investing_algorithm_framework/infrastructure/repositories/trade_stop_loss_repository.py +6 -0
  109. investing_algorithm_framework/infrastructure/repositories/trade_take_profit_repository.py +6 -0
  110. investing_algorithm_framework/infrastructure/services/__init__.py +0 -4
  111. investing_algorithm_framework/services/__init__.py +105 -8
  112. investing_algorithm_framework/services/backtesting/backtest_service.py +536 -476
  113. investing_algorithm_framework/services/configuration_service.py +14 -4
  114. investing_algorithm_framework/services/data_providers/__init__.py +5 -0
  115. investing_algorithm_framework/services/data_providers/data_provider_service.py +850 -0
  116. investing_algorithm_framework/{app/reporting → services}/metrics/__init__.py +48 -17
  117. investing_algorithm_framework/{app/reporting → services}/metrics/drawdown.py +10 -10
  118. investing_algorithm_framework/{app/reporting → services}/metrics/equity_curve.py +2 -2
  119. investing_algorithm_framework/{app/reporting → services}/metrics/exposure.py +60 -2
  120. investing_algorithm_framework/services/metrics/generate.py +358 -0
  121. investing_algorithm_framework/{app/reporting → services}/metrics/profit_factor.py +36 -0
  122. investing_algorithm_framework/{app/reporting → services}/metrics/recovery.py +2 -2
  123. investing_algorithm_framework/{app/reporting → services}/metrics/returns.py +146 -147
  124. investing_algorithm_framework/services/metrics/risk_free_rate.py +28 -0
  125. investing_algorithm_framework/{app/reporting/metrics/sharp_ratio.py → services/metrics/sharpe_ratio.py} +6 -10
  126. investing_algorithm_framework/{app/reporting → services}/metrics/sortino_ratio.py +3 -7
  127. investing_algorithm_framework/services/metrics/trades.py +500 -0
  128. investing_algorithm_framework/services/metrics/volatility.py +97 -0
  129. investing_algorithm_framework/{app/reporting → services}/metrics/win_rate.py +70 -3
  130. investing_algorithm_framework/services/order_service/order_backtest_service.py +21 -31
  131. investing_algorithm_framework/services/order_service/order_service.py +9 -71
  132. investing_algorithm_framework/services/portfolios/portfolio_provider_lookup.py +0 -2
  133. investing_algorithm_framework/services/portfolios/portfolio_service.py +3 -13
  134. investing_algorithm_framework/services/portfolios/portfolio_snapshot_service.py +62 -96
  135. investing_algorithm_framework/services/portfolios/portfolio_sync_service.py +0 -3
  136. investing_algorithm_framework/services/repository_service.py +5 -2
  137. investing_algorithm_framework/services/trade_order_evaluator/__init__.py +9 -0
  138. investing_algorithm_framework/services/trade_order_evaluator/backtest_trade_oder_evaluator.py +113 -0
  139. investing_algorithm_framework/services/trade_order_evaluator/default_trade_order_evaluator.py +51 -0
  140. investing_algorithm_framework/services/trade_order_evaluator/trade_order_evaluator.py +80 -0
  141. investing_algorithm_framework/services/trade_service/__init__.py +7 -1
  142. investing_algorithm_framework/services/trade_service/trade_service.py +51 -29
  143. investing_algorithm_framework/services/trade_service/trade_stop_loss_service.py +39 -0
  144. investing_algorithm_framework/services/trade_service/trade_take_profit_service.py +41 -0
  145. investing_algorithm_framework-7.19.15.dist-info/METADATA +537 -0
  146. {investing_algorithm_framework-6.9.1.dist-info → investing_algorithm_framework-7.19.15.dist-info}/RECORD +159 -148
  147. investing_algorithm_framework/app/reporting/evaluation.py +0 -243
  148. investing_algorithm_framework/app/reporting/metrics/risk_free_rate.py +0 -8
  149. investing_algorithm_framework/app/reporting/metrics/volatility.py +0 -69
  150. investing_algorithm_framework/cli/templates/requirements_azure_function.txt.template +0 -3
  151. investing_algorithm_framework/domain/models/backtesting/__init__.py +0 -9
  152. investing_algorithm_framework/domain/models/backtesting/backtest_date_range.py +0 -47
  153. investing_algorithm_framework/domain/models/backtesting/backtest_position.py +0 -120
  154. investing_algorithm_framework/domain/models/backtesting/backtest_reports_evaluation.py +0 -0
  155. investing_algorithm_framework/domain/models/backtesting/backtest_results.py +0 -440
  156. investing_algorithm_framework/domain/models/data_source.py +0 -21
  157. investing_algorithm_framework/domain/models/date_range.py +0 -64
  158. investing_algorithm_framework/domain/models/trade/trade_risk_type.py +0 -34
  159. investing_algorithm_framework/domain/models/trading_data_types.py +0 -48
  160. investing_algorithm_framework/domain/models/trading_time_frame.py +0 -223
  161. investing_algorithm_framework/domain/services/market_data_sources.py +0 -543
  162. investing_algorithm_framework/domain/services/market_service.py +0 -153
  163. investing_algorithm_framework/domain/services/observable.py +0 -51
  164. investing_algorithm_framework/domain/services/observer.py +0 -19
  165. investing_algorithm_framework/infrastructure/models/market_data_sources/__init__.py +0 -16
  166. investing_algorithm_framework/infrastructure/models/market_data_sources/ccxt.py +0 -746
  167. investing_algorithm_framework/infrastructure/models/market_data_sources/csv.py +0 -270
  168. investing_algorithm_framework/infrastructure/models/market_data_sources/pandas.py +0 -312
  169. investing_algorithm_framework/infrastructure/services/market_service/__init__.py +0 -5
  170. investing_algorithm_framework/infrastructure/services/market_service/ccxt_market_service.py +0 -471
  171. investing_algorithm_framework/infrastructure/services/performance_service/__init__.py +0 -7
  172. investing_algorithm_framework/infrastructure/services/performance_service/backtest_performance_service.py +0 -2
  173. investing_algorithm_framework/infrastructure/services/performance_service/performance_service.py +0 -322
  174. investing_algorithm_framework/services/market_data_source_service/__init__.py +0 -10
  175. investing_algorithm_framework/services/market_data_source_service/backtest_market_data_source_service.py +0 -269
  176. investing_algorithm_framework/services/market_data_source_service/data_provider_service.py +0 -350
  177. investing_algorithm_framework/services/market_data_source_service/market_data_source_service.py +0 -377
  178. investing_algorithm_framework/services/strategy_orchestrator_service.py +0 -296
  179. investing_algorithm_framework-6.9.1.dist-info/METADATA +0 -440
  180. /investing_algorithm_framework/{app/reporting → services}/metrics/alpha.py +0 -0
  181. /investing_algorithm_framework/{app/reporting → services}/metrics/beta.py +0 -0
  182. /investing_algorithm_framework/{app/reporting → services}/metrics/cagr.py +0 -0
  183. /investing_algorithm_framework/{app/reporting → services}/metrics/calmar_ratio.py +0 -0
  184. /investing_algorithm_framework/{app/reporting → services}/metrics/mean_daily_return.py +0 -0
  185. /investing_algorithm_framework/{app/reporting → services}/metrics/price_efficiency.py +0 -0
  186. /investing_algorithm_framework/{app/reporting → services}/metrics/standard_deviation.py +0 -0
  187. /investing_algorithm_framework/{app/reporting → services}/metrics/treynor_ratio.py +0 -0
  188. /investing_algorithm_framework/{app/reporting → services}/metrics/ulcer.py +0 -0
  189. /investing_algorithm_framework/{app/reporting → services}/metrics/value_at_risk.py +0 -0
  190. {investing_algorithm_framework-6.9.1.dist-info → investing_algorithm_framework-7.19.15.dist-info}/LICENSE +0 -0
  191. {investing_algorithm_framework-6.9.1.dist-info → investing_algorithm_framework-7.19.15.dist-info}/WHEEL +0 -0
  192. {investing_algorithm_framework-6.9.1.dist-info → investing_algorithm_framework-7.19.15.dist-info}/entry_points.txt +0 -0
@@ -1,147 +1,232 @@
1
- import polars
2
- from datetime import datetime
1
+ from typing import List, Union
2
+ from datetime import datetime, timezone, timedelta
3
+
4
+ import polars as pl
5
+
3
6
  from investing_algorithm_framework.domain import DataProvider, \
4
- TradingDataType, OperationalException
7
+ OperationalException, DataSource, DataType, TimeFrame, \
8
+ convert_polars_to_pandas
5
9
 
6
10
 
7
11
  class CSVOHLCVDataProvider(DataProvider):
8
12
  """
9
- Implementation of Data Provider for OHLCV data.
13
+ Implementation of Data Provider for OHLCV data. OHLCV data
14
+ will be loaded from a CSV file. The CSV file should contain
15
+ the following columns: Datetime, Open, High, Low, Close, Volume.
16
+ The Datetime column should be in UTC timezone and in milliseconds.
17
+ The data will be loaded into a Polars DataFrame and will be kept in memory.
18
+
19
+ Attributes:
20
+ data_type (DataType): The type of data provided by this provider,
21
+ which is OHLCV.
22
+ data_provider_identifier (str): Identifier for the CSV OHLCV data
23
+ provider.
24
+ _start_date_data_source (datetime): The start date of the data
25
+ source, determined from the first row of the data.
26
+ _end_date_data_source (datetime): The end date of the data
27
+ source, determined from the last row of the data.
28
+ data (polars.DataFrame): The OHLCV data loaded from the CSV file.
10
29
  """
30
+ data_type = DataType.OHLCV
31
+ data_provider_identifier = "csv_ohlcv_data_provider"
32
+
11
33
  def __init__(
12
34
  self,
13
- file_path: str,
35
+ storage_path: str,
14
36
  symbol: str,
15
37
  time_frame: str,
16
- market: str = None,
17
- priority: int = 0,
38
+ market: str,
18
39
  window_size=None,
19
- storage_path=None,
40
+ data_provider_identifier: str = None,
41
+ pandas: bool = False,
20
42
  ):
21
43
  """
22
44
  Initialize the CSV Data Provider.
23
45
 
24
46
  Args:
25
- file_path (str): Path to the CSV file.
47
+ storage_path (str): Path to the CSV file.
48
+ symbol (str): The symbol for which the data is provided.
49
+ time_frame (str): The time frame for the data.
50
+ market (str, optional): The market for the data. Defaults to None.
51
+ window_size (int, optional): The window size for the data.
52
+ Defaults to None.
26
53
  """
27
-
54
+ if data_provider_identifier is None:
55
+ data_provider_identifier = self.data_provider_identifier
28
56
  super().__init__(
29
- data_type=TradingDataType.OHLCV.value,
30
57
  symbol=symbol,
31
58
  market=market,
32
- markets=[],
33
- priority=priority,
34
59
  time_frame=time_frame,
35
60
  window_size=window_size,
36
61
  storage_path=storage_path,
62
+ data_provider_identifier=data_provider_identifier,
63
+ data_type=DataType.OHLCV.value
37
64
  )
38
- self.file_path = file_path
39
65
  self._start_date_data_source = None
40
66
  self._end_date_data_source = None
41
- self.data = None
67
+ self._columns = ["Datetime", "Open", "High", "Low", "Close", "Volume"]
68
+ self.window_cache = {}
69
+ self._load_data(self.storage_path)
70
+ self.pandas = pandas
71
+ self.number_of_missing_data_points = 0
72
+ self.missing_data_point_dates: List[datetime] = []
42
73
 
43
74
  def has_data(
44
75
  self,
45
- data_type: str = None,
46
- symbol: str = None,
47
- market: str = None,
48
- time_frame: str = None,
76
+ data_source: DataSource,
49
77
  start_date: datetime = None,
50
- end_date: datetime = None,
51
- window_size=None
78
+ end_date: datetime = None
52
79
  ) -> bool:
80
+ """
81
+ Implementation of the has_data method to check if
82
+ the data provider has data for the given data source.
83
+
84
+ Args:
85
+ data_source (DataSource): The data source to check.
86
+ start_date (datetime, optional): The start date for the data.
87
+ Defaults to None.
88
+ end_date (datetime, optional): The end date for the data.
89
+ Defaults to None.
90
+
91
+ Returns:
92
+ bool: True if the data provider has data for the given data source,
93
+ False otherwise.
94
+ """
95
+ if start_date is None and end_date is None:
96
+ return False
97
+
98
+ if DataType.OHLCV.equals(data_source.data_type) and \
99
+ data_source.symbol == self.symbol and \
100
+ data_source.time_frame.equals(self.time_frame) and \
101
+ data_source.market == self.market:
102
+
103
+ if end_date > self._end_date_data_source:
104
+ return False
105
+
106
+ if data_source.window_size is not None:
107
+ minutes = TimeFrame.from_value(
108
+ data_source.time_frame
109
+ ).amount_of_minutes * data_source.window_size
110
+ required_start_date = end_date - timedelta(
111
+ minutes=minutes
112
+ )
113
+
114
+ if required_start_date < self._start_date_data_source:
115
+ return False
116
+ else:
117
+ required_start_date = start_date
118
+ if required_start_date < self._start_date_data_source:
119
+ return False
53
120
 
54
- if symbol == self.symbol and market == self.market and \
55
- data_type == self.data_type and time_frame == self.time_frame:
56
121
  return True
57
122
 
58
123
  return False
59
124
 
60
125
  def get_data(
61
126
  self,
62
- data_type: str = None,
63
127
  date: datetime = None,
64
- symbol: str = None,
65
- market: str = None,
66
- time_frame: str = None,
67
128
  start_date: datetime = None,
68
129
  end_date: datetime = None,
69
- storage_path=None,
70
- window_size=None,
71
- pandas=False
130
+ save: bool = False,
72
131
  ):
132
+ """
133
+ Fetches OHLCV data for a given symbol and date range.
134
+ If no date range is provided, it returns the entire dataset.
73
135
 
74
- if self.data is None:
75
- self._load_data(self.file_path)
136
+ Args:
137
+ date (datetime, optional): A specific date to fetch data for.
138
+ Defaults to None.
139
+ start_date (datetime, optional): The start date for the data.
140
+ Defaults to None.
141
+ end_date (datetime, optional): The end date for the data.
142
+ Defaults to None.
143
+ save (bool, optional): Whether to save the data to a file.
144
+
145
+ Returns:
146
+ polars.DataFrame: A DataFrame containing the OHLCV data for the
147
+ specified symbol and date range.
148
+ """
149
+ windows_size = self.window_size
76
150
 
77
151
  if start_date is None and end_date is None:
78
- return self.data
79
-
80
- if end_date is not None and start_date is not None:
81
-
82
- if end_date < start_date:
83
- raise OperationalException(
84
- f"End date {end_date} is before the start date "
85
- f"{start_date}"
86
- )
87
-
88
- if start_date > self._end_date_data_source:
89
- return polars.DataFrame()
90
-
152
+ end_date = datetime.now(tz=timezone.utc)
153
+ time_frame = TimeFrame.from_value(self.time_frame)
154
+ start_date = end_date - timedelta(
155
+ minutes=time_frame.amount_of_minutes() * windows_size
156
+ )
157
+ elif start_date is None and end_date is not None:
158
+ start_date = end_date - timedelta(
159
+ minutes=TimeFrame.from_value(
160
+ self.time_frame
161
+ ).amount_of_minutes * windows_size
162
+ )
91
163
  df = self.data
92
164
  df = df.filter(
93
- (df['Datetime'] >= start_date)
94
- & (df['Datetime'] <= end_date)
165
+ (df['Datetime'] >= start_date) & (df['Datetime'] <= end_date)
95
166
  )
96
167
  return df
97
168
 
98
169
  if start_date is not None:
170
+ end_date = start_date + timedelta(
171
+ minutes=TimeFrame.from_value(self.time_frame)
172
+ .amount_of_minutes * windows_size
173
+ )
99
174
 
100
175
  if start_date < self._start_date_data_source:
101
- return polars.DataFrame()
176
+ return pl.DataFrame()
102
177
 
103
178
  if start_date > self._end_date_data_source:
104
- return polars.DataFrame()
179
+ return pl.DataFrame()
105
180
 
106
181
  df = self.data
107
182
  df = df.filter(
108
- (df['Datetime'] >= start_date)
183
+ (df['Datetime'] >= start_date) & (df['Datetime'] <= end_date)
109
184
  )
110
- df = df.head(self.window_size)
111
185
  return df
112
186
 
113
187
  if end_date is not None:
188
+ start_date = end_date - timedelta(
189
+ minutes=TimeFrame.from_value(
190
+ self.time_frame
191
+ ).amount_of_minutes * windows_size
192
+ )
114
193
 
115
194
  if end_date < self._start_date_data_source:
116
- return polars.DataFrame()
195
+ return pl.DataFrame()
117
196
 
118
197
  if end_date > self._end_date_data_source:
119
- return polars.DataFrame()
198
+ return pl.DataFrame()
120
199
 
121
200
  df = self.data
122
201
  df = df.filter(
123
- (df['Datetime'] <= end_date)
202
+ (df['Datetime'] >= start_date) & (df['Datetime'] <= end_date)
124
203
  )
125
- df = df.tail(self.window_size)
126
204
  return df
127
205
 
128
206
  return self.data
129
207
 
130
- def pre_pare_backtest_data(
208
+ def prepare_backtest_data(
131
209
  self,
132
210
  backtest_start_date,
133
- backtest_end_date,
134
- symbol: str = None,
135
- market: str = None,
136
- time_frame: str = None,
137
- window_size=None
211
+ backtest_end_date
138
212
  ) -> None:
213
+ """
214
+ Prepares backtest data for a given symbol and date range.
139
215
 
140
- if symbol is not None:
141
- return
142
-
143
- if self.data is None:
144
- self._load_data(self.file_path)
216
+ Args:
217
+ backtest_start_date (datetime): The start date for the
218
+ backtest data.
219
+ backtest_end_date (datetime): The end date for the
220
+ backtest data.
221
+
222
+ Raises:
223
+ OperationalException: If the backtest start date is before the
224
+ start date of the data source or if the backtest end date is
225
+ after the end date of the data source.
226
+
227
+ Returns:
228
+ None
229
+ """
145
230
 
146
231
  if backtest_start_date < self._start_date_data_source:
147
232
  raise OperationalException(
@@ -155,80 +240,173 @@ class CSVOHLCVDataProvider(DataProvider):
155
240
  f"end date {self._end_date_data_source}"
156
241
  )
157
242
 
243
+ # There must be at least backtest_start_date - window_size * time_frame
244
+ # data available to create a sliding window.
245
+ required_start_date = backtest_start_date - \
246
+ timedelta(
247
+ minutes=TimeFrame.from_value(self.time_frame)
248
+ .amount_of_minutes * self.window_size
249
+ )
250
+
251
+ # Create cache with sliding windows
252
+ self._precompute_sliding_windows(
253
+ window_size=self.window_size,
254
+ start_date=backtest_start_date,
255
+ end_date=backtest_end_date
256
+ )
257
+
258
+ if required_start_date < self._start_date_data_source:
259
+ self.number_of_missing_data_points = (
260
+ self._start_date_data_source - required_start_date
261
+ ).total_seconds() / (
262
+ TimeFrame.from_value(self.time_frame).amount_of_minutes * 60
263
+ )
264
+
265
+ n_min = TimeFrame.from_value(self.time_frame).amount_of_minutes
266
+
267
+ # Assume self.data is a Polars DataFrame with a "Datetime" column
268
+ expected_dates = pl.datetime_range(
269
+ start=required_start_date,
270
+ end=backtest_end_date,
271
+ interval=f"{n_min}m",
272
+ eager=True
273
+ ).to_list()
274
+
275
+ actual_dates = self.data["Datetime"].to_list()
276
+
277
+ # Find missing dates
278
+ self.missing_data_point_dates = sorted(
279
+ set(expected_dates) - set(actual_dates)
280
+ )
281
+
158
282
  def get_backtest_data(
159
283
  self,
160
- date: datetime = None,
161
- symbol: str = None,
162
- market: str = None,
163
- time_frame: str = None,
284
+ backtest_index_date: datetime,
164
285
  backtest_start_date: datetime = None,
165
286
  backtest_end_date: datetime = None,
166
- window_size=None,
167
- pandas=False
287
+ data_source: DataSource = None
168
288
  ) -> None:
289
+ """
290
+ Fetches backtest data for a given datasource
169
291
 
170
- if self.data is None:
171
- self._load_data(self.file_path)
292
+ Args:
293
+ backtest_index_date (datetime): The date for which to fetch
294
+ backtest data.
295
+ backtest_start_date (datetime): The start date for the
296
+ backtest data.
297
+ backtest_end_date (datetime): The end date for the
298
+ backtest data.
299
+ data_source (Optional[DataSource]): The data source specification
300
+ that matches a data provider.
301
+
302
+ Raises:
303
+ OperationalException: If the requested backtest date range
304
+ is outside the available data range.
305
+
306
+ Returns:
307
+ pl.DataFrame: The backtest data for the given datasource.
308
+ """
309
+ if backtest_start_date is not None and \
310
+ backtest_end_date is not None:
172
311
 
173
- if backtest_start_date is None and backtest_end_date is None:
174
- return self.data
312
+ if backtest_start_date < self._start_date_data_source:
175
313
 
176
- if backtest_start_date is not None and backtest_end_date is not None:
314
+ if data_source is not None:
315
+ raise OperationalException(
316
+ f"Request data date {backtest_end_date} "
317
+ f"is after the range of "
318
+ f"the available data "
319
+ f"{self._start_date_data_source} "
320
+ f"- {self._end_date_data_source}."
321
+ f" for data source {data_source.identifier}."
322
+ )
177
323
 
178
- if backtest_end_date < backtest_start_date:
179
324
  raise OperationalException(
180
- f"Backtest end date {backtest_end_date} is before the "
181
- f"start date {backtest_start_date}"
325
+ f"Request data date {backtest_start_date} "
326
+ f"is before the range of "
327
+ f"the available data "
328
+ f"{self._start_date_data_source} "
329
+ f"- {self._end_date_data_source}."
182
330
  )
183
331
 
184
- if backtest_start_date > self._end_date_data_source:
185
- return polars.DataFrame()
186
-
187
- df = self.data
188
- df = df.filter(
189
- (df['Datetime'] >= backtest_start_date)
190
- & (df['Datetime'] <= backtest_end_date)
191
- )
192
- return df
332
+ if backtest_end_date > self._end_date_data_source:
193
333
 
194
- if backtest_start_date is not None:
334
+ if data_source is not None:
335
+ raise OperationalException(
336
+ f"Request data date {backtest_end_date} "
337
+ f"is after the range of "
338
+ f"the available data "
339
+ f"{self._start_date_data_source} "
340
+ f"- {self._end_date_data_source}."
341
+ f" for data source {data_source.identifier}."
342
+ )
195
343
 
196
- if backtest_start_date < self._start_date_data_source:
197
- return polars.DataFrame()
198
-
199
- if backtest_start_date > self._end_date_data_source:
200
- return polars.DataFrame()
344
+ raise OperationalException(
345
+ f"Request data date {backtest_end_date} "
346
+ f"is after the range of "
347
+ f"the available data "
348
+ f"{self._start_date_data_source} "
349
+ f"- {self._end_date_data_source}."
350
+ )
201
351
 
202
- df = self.data
203
- df = df.filter(
204
- (df['Datetime'] >= backtest_start_date)
352
+ data = self.data.filter(
353
+ (pl.col("Datetime") >= backtest_start_date) &
354
+ (pl.col("Datetime") <= backtest_end_date)
205
355
  )
206
- df = df.head(self.window_size)
207
- return df
208
-
209
- if backtest_end_date is not None:
356
+ else:
357
+ try:
358
+ data = self.window_cache[backtest_index_date]
359
+ except KeyError:
360
+
361
+ try:
362
+ # Return the key in the cache that is closest to the
363
+ # backtest_index_date but not after it.
364
+ closest_key = min(
365
+ [k for k in self.window_cache.keys()
366
+ if k >= backtest_index_date]
367
+ )
368
+ data = self.window_cache[closest_key]
369
+ except ValueError:
370
+
371
+ if data_source is not None:
372
+ raise OperationalException(
373
+ "No data available for the "
374
+ f"date: {backtest_index_date} "
375
+ "within the prepared backtest data "
376
+ f"for data source {data_source.identifier}."
377
+ )
378
+
379
+ raise OperationalException(
380
+ "No data available for the "
381
+ f"date: {backtest_index_date} "
382
+ "within the prepared backtest data."
383
+ )
384
+
385
+ if self.pandas:
386
+ data = convert_polars_to_pandas(data)
387
+
388
+ return data
210
389
 
211
- if backtest_end_date < self._start_date_data_source:
212
- return polars.DataFrame()
390
+ def _load_data(self, storage_path):
391
+ """
392
+ Load OHLCV data from a CSV file into a Polars DataFrame.
393
+ The CSV file should contain the following columns:
213
394
 
214
- if backtest_end_date > self._end_date_data_source:
215
- return polars.DataFrame()
395
+ Datetime, Open, High, Low, Close, Volume.
216
396
 
217
- df = self.data
218
- df = df.filter(
219
- (df['Datetime'] <= backtest_end_date)
220
- )
221
- df = df.tail(self.window_size)
222
- return df
397
+ The Datetime column should be in UTC timezone and in milliseconds.
223
398
 
224
- return self.data
399
+ Args:
400
+ storage_path (str): The path to the CSV file containing OHLCV data.
225
401
 
226
- def _load_data(self, storage_path):
227
- self._columns = [
228
- "Datetime", "Open", "High", "Low", "Close", "Volume"
229
- ]
402
+ Raises:
403
+ OperationalException: If the CSV file does not contain all
404
+ required OHLCV columns.
230
405
 
231
- df = polars.read_csv(storage_path)
406
+ Returns:
407
+ None
408
+ """
409
+ df = pl.read_csv(storage_path)
232
410
 
233
411
  # Check if all column names are in the csv file
234
412
  if not all(column in df.columns for column in self._columns):
@@ -241,13 +419,13 @@ class CSVOHLCVDataProvider(DataProvider):
241
419
  f"Missing columns: {missing_columns}"
242
420
  )
243
421
 
244
- self.data = polars.read_csv(
422
+ self.data = pl.read_csv(
245
423
  storage_path,
246
- schema_overrides={"Datetime": polars.Datetime},
424
+ schema_overrides={"Datetime": pl.Datetime},
247
425
  low_memory=True
248
426
  ).with_columns(
249
- polars.col("Datetime").cast(
250
- polars.Datetime(time_unit="ms", time_zone="UTC")
427
+ pl.col("Datetime").cast(
428
+ pl.Datetime(time_unit="ms", time_zone="UTC")
251
429
  )
252
430
  )
253
431
 
@@ -255,3 +433,136 @@ class CSVOHLCVDataProvider(DataProvider):
255
433
  last_row = self.data.tail(1)
256
434
  self._start_date_data_source = first_row["Datetime"][0]
257
435
  self._end_date_data_source = last_row["Datetime"][0]
436
+
437
+ def _precompute_sliding_windows(
438
+ self,
439
+ window_size: int,
440
+ start_date: datetime,
441
+ end_date: datetime
442
+ ) -> None:
443
+ """
444
+ Precompute all sliding windows for fast retrieval in backtest mode.
445
+
446
+ A sliding window is calculated as a subset of the data. It will
447
+ take for each timestamp in the data a window of size `window_size`
448
+ and stores it in a cache with the last timestamp of the window.
449
+
450
+ So if the window size is 200, the first window will be
451
+ the first 200 rows of the data, the second window will be
452
+ the rows 1 to 200, the third window will be the rows
453
+ 2 to 201, and so on until the last window which will be
454
+ the last 200 rows of the data.
455
+
456
+ Args:
457
+ window_size (int): The size of the sliding window to precompute.
458
+ start_date (datetime, optional): The start date for the sliding
459
+ windows.
460
+ end_date (datetime, optional): The end date for the sliding
461
+ windows.
462
+
463
+ Returns:
464
+ None
465
+ """
466
+ self.window_cache = {}
467
+ timestamps = self.data["Datetime"].to_list()
468
+
469
+ # Only select the entries after the start date
470
+ timestamps = [
471
+ ts for ts in timestamps
472
+ if start_date <= ts <= end_date
473
+ ]
474
+
475
+ # Create sliding windows of size <window_size> for each timestamp
476
+ # in the data with the given the time frame and window size
477
+ for timestamp in timestamps:
478
+ # Use timestamp as key
479
+ self.window_cache[timestamp] = self.data.filter(
480
+ (self.data["Datetime"] <= timestamp) &
481
+ (self.data["Datetime"] >= timestamp - timedelta(
482
+ minutes=self.time_frame.amount_of_minutes * window_size
483
+ ))
484
+ )
485
+
486
+ def copy(self, data_source: DataSource) -> "DataProvider":
487
+ """
488
+ Create a copy of the data provider with the given data source.
489
+
490
+ Args:
491
+ data_source (DataSource): The data source to copy.
492
+
493
+ Returns:
494
+ DataProvider: A new instance of the data provider with the
495
+ specified data source.
496
+ """
497
+
498
+ storage_path = data_source.storage_path
499
+
500
+ if storage_path is None:
501
+ storage_path = self.storage_path
502
+
503
+ return CSVOHLCVDataProvider(
504
+ storage_path=storage_path,
505
+ symbol=data_source.symbol,
506
+ time_frame=data_source.time_frame,
507
+ market=data_source.market,
508
+ window_size=data_source.window_size,
509
+ data_provider_identifier=self.data_provider_identifier,
510
+ pandas=data_source.pandas
511
+ )
512
+
513
+ def get_number_of_data_points(
514
+ self,
515
+ start_date: datetime,
516
+ end_date: datetime
517
+ ) -> int:
518
+
519
+ """
520
+ Returns the number of data points available between the given
521
+ start and end dates.
522
+
523
+ Args:
524
+ start_date (datetime): The start date for checking missing data.
525
+ end_date (datetime): The end date for checking missing data.
526
+
527
+ Returns:
528
+ int: The number of available data points between the given
529
+ start and end dates.
530
+ """
531
+ available_dates = [
532
+ date for date in self.data["Datetime"].to_list()
533
+ if start_date <= date <= end_date
534
+ ]
535
+ return len(available_dates)
536
+
537
+ def get_missing_data_dates(
538
+ self,
539
+ start_date: datetime,
540
+ end_date: datetime,
541
+ ) -> List[datetime]:
542
+ """
543
+ Returns a list of dates for which data is missing between the
544
+ given start and end dates.
545
+
546
+ Args:
547
+ start_date (datetime): The start date for checking missing data.
548
+ end_date (datetime): The end date for checking missing data.
549
+
550
+ Returns:
551
+ List[datetime]: A list of dates for which data is missing
552
+ between the given start and end dates.
553
+ """
554
+ missing_dates = [
555
+ date for date in self.missing_data_point_dates
556
+ if start_date <= date <= end_date
557
+ ]
558
+ return missing_dates
559
+
560
+ def get_data_source_file_path(self) -> Union[str, None]:
561
+ """
562
+ Get the file path of the data source if stored in local storage.
563
+
564
+ Returns:
565
+ Union[str, None]: The file path of the data source if stored
566
+ locally, otherwise None.
567
+ """
568
+ return self.storage_path