investing-algorithm-framework 1.5__py3-none-any.whl → 7.25.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. investing_algorithm_framework/__init__.py +192 -16
  2. investing_algorithm_framework/analysis/__init__.py +16 -0
  3. investing_algorithm_framework/analysis/backtest_data_ranges.py +202 -0
  4. investing_algorithm_framework/analysis/data.py +170 -0
  5. investing_algorithm_framework/analysis/markdown.py +91 -0
  6. investing_algorithm_framework/analysis/ranking.py +298 -0
  7. investing_algorithm_framework/app/__init__.py +29 -4
  8. investing_algorithm_framework/app/algorithm/__init__.py +7 -0
  9. investing_algorithm_framework/app/algorithm/algorithm.py +193 -0
  10. investing_algorithm_framework/app/algorithm/algorithm_factory.py +118 -0
  11. investing_algorithm_framework/app/app.py +2220 -379
  12. investing_algorithm_framework/app/app_hook.py +28 -0
  13. investing_algorithm_framework/app/context.py +1724 -0
  14. investing_algorithm_framework/app/eventloop.py +620 -0
  15. investing_algorithm_framework/app/reporting/__init__.py +27 -0
  16. investing_algorithm_framework/app/reporting/ascii.py +921 -0
  17. investing_algorithm_framework/app/reporting/backtest_report.py +349 -0
  18. investing_algorithm_framework/app/reporting/charts/__init__.py +19 -0
  19. investing_algorithm_framework/app/reporting/charts/entry_exist_signals.py +66 -0
  20. investing_algorithm_framework/app/reporting/charts/equity_curve.py +37 -0
  21. investing_algorithm_framework/app/reporting/charts/equity_curve_drawdown.py +74 -0
  22. investing_algorithm_framework/app/reporting/charts/line_chart.py +11 -0
  23. investing_algorithm_framework/app/reporting/charts/monthly_returns_heatmap.py +70 -0
  24. investing_algorithm_framework/app/reporting/charts/ohlcv_data_completeness.py +51 -0
  25. investing_algorithm_framework/app/reporting/charts/rolling_sharp_ratio.py +79 -0
  26. investing_algorithm_framework/app/reporting/charts/yearly_returns_barchart.py +55 -0
  27. investing_algorithm_framework/app/reporting/generate.py +185 -0
  28. investing_algorithm_framework/app/reporting/tables/__init__.py +11 -0
  29. investing_algorithm_framework/app/reporting/tables/key_metrics_table.py +217 -0
  30. investing_algorithm_framework/app/reporting/tables/time_metrics_table.py +80 -0
  31. investing_algorithm_framework/app/reporting/tables/trade_metrics_table.py +147 -0
  32. investing_algorithm_framework/app/reporting/tables/trades_table.py +75 -0
  33. investing_algorithm_framework/app/reporting/tables/utils.py +29 -0
  34. investing_algorithm_framework/app/reporting/templates/report_template.html.j2 +154 -0
  35. investing_algorithm_framework/app/stateless/action_handlers/__init__.py +6 -3
  36. investing_algorithm_framework/app/stateless/action_handlers/action_handler_strategy.py +1 -1
  37. investing_algorithm_framework/app/stateless/action_handlers/check_online_handler.py +2 -1
  38. investing_algorithm_framework/app/stateless/action_handlers/run_strategy_handler.py +14 -7
  39. investing_algorithm_framework/app/strategy.py +867 -60
  40. investing_algorithm_framework/app/task.py +5 -3
  41. investing_algorithm_framework/app/web/__init__.py +2 -1
  42. investing_algorithm_framework/app/web/controllers/__init__.py +2 -2
  43. investing_algorithm_framework/app/web/controllers/orders.py +3 -2
  44. investing_algorithm_framework/app/web/controllers/positions.py +2 -2
  45. investing_algorithm_framework/app/web/create_app.py +4 -2
  46. investing_algorithm_framework/app/web/schemas/position.py +1 -0
  47. investing_algorithm_framework/cli/__init__.py +0 -0
  48. investing_algorithm_framework/cli/cli.py +231 -0
  49. investing_algorithm_framework/cli/deploy_to_aws_lambda.py +501 -0
  50. investing_algorithm_framework/cli/deploy_to_azure_function.py +718 -0
  51. investing_algorithm_framework/cli/initialize_app.py +603 -0
  52. investing_algorithm_framework/cli/templates/.gitignore.template +178 -0
  53. investing_algorithm_framework/cli/templates/app.py.template +18 -0
  54. investing_algorithm_framework/cli/templates/app_aws_lambda_function.py.template +48 -0
  55. investing_algorithm_framework/cli/templates/app_azure_function.py.template +14 -0
  56. investing_algorithm_framework/cli/templates/app_web.py.template +18 -0
  57. investing_algorithm_framework/cli/templates/aws_lambda_dockerfile.template +22 -0
  58. investing_algorithm_framework/cli/templates/aws_lambda_dockerignore.template +92 -0
  59. investing_algorithm_framework/cli/templates/aws_lambda_readme.md.template +110 -0
  60. investing_algorithm_framework/cli/templates/aws_lambda_requirements.txt.template +2 -0
  61. investing_algorithm_framework/cli/templates/azure_function_function_app.py.template +65 -0
  62. investing_algorithm_framework/cli/templates/azure_function_host.json.template +15 -0
  63. investing_algorithm_framework/cli/templates/azure_function_local.settings.json.template +8 -0
  64. investing_algorithm_framework/cli/templates/azure_function_requirements.txt.template +3 -0
  65. investing_algorithm_framework/cli/templates/data_providers.py.template +17 -0
  66. investing_algorithm_framework/cli/templates/env.example.template +2 -0
  67. investing_algorithm_framework/cli/templates/env_azure_function.example.template +4 -0
  68. investing_algorithm_framework/cli/templates/market_data_providers.py.template +9 -0
  69. investing_algorithm_framework/cli/templates/readme.md.template +135 -0
  70. investing_algorithm_framework/cli/templates/requirements.txt.template +2 -0
  71. investing_algorithm_framework/cli/templates/run_backtest.py.template +20 -0
  72. investing_algorithm_framework/cli/templates/strategy.py.template +124 -0
  73. investing_algorithm_framework/cli/validate_backtest_checkpoints.py +197 -0
  74. investing_algorithm_framework/create_app.py +40 -7
  75. investing_algorithm_framework/dependency_container.py +100 -47
  76. investing_algorithm_framework/domain/__init__.py +97 -30
  77. investing_algorithm_framework/domain/algorithm_id.py +69 -0
  78. investing_algorithm_framework/domain/backtesting/__init__.py +25 -0
  79. investing_algorithm_framework/domain/backtesting/backtest.py +548 -0
  80. investing_algorithm_framework/domain/backtesting/backtest_date_range.py +113 -0
  81. investing_algorithm_framework/domain/backtesting/backtest_evaluation_focuss.py +241 -0
  82. investing_algorithm_framework/domain/backtesting/backtest_metrics.py +470 -0
  83. investing_algorithm_framework/domain/backtesting/backtest_permutation_test.py +275 -0
  84. investing_algorithm_framework/domain/backtesting/backtest_run.py +663 -0
  85. investing_algorithm_framework/domain/backtesting/backtest_summary_metrics.py +162 -0
  86. investing_algorithm_framework/domain/backtesting/backtest_utils.py +198 -0
  87. investing_algorithm_framework/domain/backtesting/combine_backtests.py +392 -0
  88. investing_algorithm_framework/domain/config.py +59 -136
  89. investing_algorithm_framework/domain/constants.py +18 -37
  90. investing_algorithm_framework/domain/data_provider.py +334 -0
  91. investing_algorithm_framework/domain/data_structures.py +42 -0
  92. investing_algorithm_framework/domain/exceptions.py +51 -1
  93. investing_algorithm_framework/domain/models/__init__.py +26 -19
  94. investing_algorithm_framework/domain/models/app_mode.py +34 -0
  95. investing_algorithm_framework/domain/models/data/__init__.py +7 -0
  96. investing_algorithm_framework/domain/models/data/data_source.py +222 -0
  97. investing_algorithm_framework/domain/models/data/data_type.py +46 -0
  98. investing_algorithm_framework/domain/models/event.py +35 -0
  99. investing_algorithm_framework/domain/models/market/__init__.py +5 -0
  100. investing_algorithm_framework/domain/models/market/market_credential.py +88 -0
  101. investing_algorithm_framework/domain/models/order/__init__.py +3 -4
  102. investing_algorithm_framework/domain/models/order/order.py +198 -65
  103. investing_algorithm_framework/domain/models/order/order_status.py +2 -2
  104. investing_algorithm_framework/domain/models/order/order_type.py +1 -3
  105. investing_algorithm_framework/domain/models/portfolio/__init__.py +6 -2
  106. investing_algorithm_framework/domain/models/portfolio/portfolio.py +98 -3
  107. investing_algorithm_framework/domain/models/portfolio/portfolio_configuration.py +37 -43
  108. investing_algorithm_framework/domain/models/portfolio/portfolio_snapshot.py +108 -11
  109. investing_algorithm_framework/domain/models/position/__init__.py +2 -1
  110. investing_algorithm_framework/domain/models/position/position.py +20 -0
  111. investing_algorithm_framework/domain/models/position/position_size.py +41 -0
  112. investing_algorithm_framework/domain/models/position/position_snapshot.py +0 -2
  113. investing_algorithm_framework/domain/models/risk_rules/__init__.py +7 -0
  114. investing_algorithm_framework/domain/models/risk_rules/stop_loss_rule.py +51 -0
  115. investing_algorithm_framework/domain/models/risk_rules/take_profit_rule.py +55 -0
  116. investing_algorithm_framework/domain/models/snapshot_interval.py +45 -0
  117. investing_algorithm_framework/domain/models/strategy_profile.py +19 -141
  118. investing_algorithm_framework/domain/models/time_frame.py +94 -98
  119. investing_algorithm_framework/domain/models/time_interval.py +33 -0
  120. investing_algorithm_framework/domain/models/time_unit.py +66 -2
  121. investing_algorithm_framework/domain/models/tracing/__init__.py +0 -0
  122. investing_algorithm_framework/domain/models/tracing/trace.py +23 -0
  123. investing_algorithm_framework/domain/models/trade/__init__.py +11 -0
  124. investing_algorithm_framework/domain/models/trade/trade.py +389 -0
  125. investing_algorithm_framework/domain/models/trade/trade_status.py +40 -0
  126. investing_algorithm_framework/domain/models/trade/trade_stop_loss.py +332 -0
  127. investing_algorithm_framework/domain/models/trade/trade_take_profit.py +365 -0
  128. investing_algorithm_framework/domain/order_executor.py +112 -0
  129. investing_algorithm_framework/domain/portfolio_provider.py +118 -0
  130. investing_algorithm_framework/domain/services/__init__.py +11 -0
  131. investing_algorithm_framework/domain/services/market_credential_service.py +37 -0
  132. investing_algorithm_framework/domain/services/portfolios/__init__.py +5 -0
  133. investing_algorithm_framework/domain/services/portfolios/portfolio_sync_service.py +9 -0
  134. investing_algorithm_framework/domain/services/rounding_service.py +27 -0
  135. investing_algorithm_framework/domain/services/state_handler.py +38 -0
  136. investing_algorithm_framework/domain/strategy.py +1 -29
  137. investing_algorithm_framework/domain/utils/__init__.py +15 -5
  138. investing_algorithm_framework/domain/utils/csv.py +22 -0
  139. investing_algorithm_framework/domain/utils/custom_tqdm.py +22 -0
  140. investing_algorithm_framework/domain/utils/dates.py +57 -0
  141. investing_algorithm_framework/domain/utils/jupyter_notebook_detection.py +19 -0
  142. investing_algorithm_framework/domain/utils/polars.py +53 -0
  143. investing_algorithm_framework/domain/utils/random.py +29 -0
  144. investing_algorithm_framework/download_data.py +244 -0
  145. investing_algorithm_framework/infrastructure/__init__.py +37 -11
  146. investing_algorithm_framework/infrastructure/data_providers/__init__.py +36 -0
  147. investing_algorithm_framework/infrastructure/data_providers/ccxt.py +1152 -0
  148. investing_algorithm_framework/infrastructure/data_providers/csv.py +568 -0
  149. investing_algorithm_framework/infrastructure/data_providers/pandas.py +599 -0
  150. investing_algorithm_framework/infrastructure/database/__init__.py +6 -2
  151. investing_algorithm_framework/infrastructure/database/sql_alchemy.py +86 -12
  152. investing_algorithm_framework/infrastructure/models/__init__.py +7 -3
  153. investing_algorithm_framework/infrastructure/models/order/__init__.py +2 -2
  154. investing_algorithm_framework/infrastructure/models/order/order.py +53 -53
  155. investing_algorithm_framework/infrastructure/models/order/order_metadata.py +44 -0
  156. investing_algorithm_framework/infrastructure/models/order_trade_association.py +10 -0
  157. investing_algorithm_framework/infrastructure/models/portfolio/__init__.py +1 -1
  158. investing_algorithm_framework/infrastructure/models/portfolio/portfolio_snapshot.py +8 -2
  159. investing_algorithm_framework/infrastructure/models/portfolio/{portfolio.py → sql_portfolio.py} +17 -6
  160. investing_algorithm_framework/infrastructure/models/position/position_snapshot.py +3 -1
  161. investing_algorithm_framework/infrastructure/models/trades/__init__.py +9 -0
  162. investing_algorithm_framework/infrastructure/models/trades/trade.py +130 -0
  163. investing_algorithm_framework/infrastructure/models/trades/trade_stop_loss.py +59 -0
  164. investing_algorithm_framework/infrastructure/models/trades/trade_take_profit.py +55 -0
  165. investing_algorithm_framework/infrastructure/order_executors/__init__.py +21 -0
  166. investing_algorithm_framework/infrastructure/order_executors/backtest_oder_executor.py +28 -0
  167. investing_algorithm_framework/infrastructure/order_executors/ccxt_order_executor.py +200 -0
  168. investing_algorithm_framework/infrastructure/portfolio_providers/__init__.py +19 -0
  169. investing_algorithm_framework/infrastructure/portfolio_providers/ccxt_portfolio_provider.py +199 -0
  170. investing_algorithm_framework/infrastructure/repositories/__init__.py +10 -4
  171. investing_algorithm_framework/infrastructure/repositories/order_metadata_repository.py +17 -0
  172. investing_algorithm_framework/infrastructure/repositories/order_repository.py +16 -5
  173. investing_algorithm_framework/infrastructure/repositories/portfolio_repository.py +2 -2
  174. investing_algorithm_framework/infrastructure/repositories/position_repository.py +11 -0
  175. investing_algorithm_framework/infrastructure/repositories/repository.py +84 -30
  176. investing_algorithm_framework/infrastructure/repositories/trade_repository.py +71 -0
  177. investing_algorithm_framework/infrastructure/repositories/trade_stop_loss_repository.py +29 -0
  178. investing_algorithm_framework/infrastructure/repositories/trade_take_profit_repository.py +29 -0
  179. investing_algorithm_framework/infrastructure/services/__init__.py +9 -4
  180. investing_algorithm_framework/infrastructure/services/aws/__init__.py +6 -0
  181. investing_algorithm_framework/infrastructure/services/aws/state_handler.py +193 -0
  182. investing_algorithm_framework/infrastructure/services/azure/__init__.py +5 -0
  183. investing_algorithm_framework/infrastructure/services/azure/state_handler.py +158 -0
  184. investing_algorithm_framework/infrastructure/services/backtesting/__init__.py +9 -0
  185. investing_algorithm_framework/infrastructure/services/backtesting/backtest_service.py +2596 -0
  186. investing_algorithm_framework/infrastructure/services/backtesting/event_backtest_service.py +285 -0
  187. investing_algorithm_framework/infrastructure/services/backtesting/vector_backtest_service.py +468 -0
  188. investing_algorithm_framework/services/__init__.py +123 -15
  189. investing_algorithm_framework/services/configuration_service.py +77 -11
  190. investing_algorithm_framework/services/data_providers/__init__.py +5 -0
  191. investing_algorithm_framework/services/data_providers/data_provider_service.py +1058 -0
  192. investing_algorithm_framework/services/market_credential_service.py +40 -0
  193. investing_algorithm_framework/services/metrics/__init__.py +119 -0
  194. investing_algorithm_framework/services/metrics/alpha.py +0 -0
  195. investing_algorithm_framework/services/metrics/beta.py +0 -0
  196. investing_algorithm_framework/services/metrics/cagr.py +60 -0
  197. investing_algorithm_framework/services/metrics/calmar_ratio.py +40 -0
  198. investing_algorithm_framework/services/metrics/drawdown.py +218 -0
  199. investing_algorithm_framework/services/metrics/equity_curve.py +24 -0
  200. investing_algorithm_framework/services/metrics/exposure.py +210 -0
  201. investing_algorithm_framework/services/metrics/generate.py +358 -0
  202. investing_algorithm_framework/services/metrics/mean_daily_return.py +84 -0
  203. investing_algorithm_framework/services/metrics/price_efficiency.py +57 -0
  204. investing_algorithm_framework/services/metrics/profit_factor.py +165 -0
  205. investing_algorithm_framework/services/metrics/recovery.py +113 -0
  206. investing_algorithm_framework/services/metrics/returns.py +452 -0
  207. investing_algorithm_framework/services/metrics/risk_free_rate.py +28 -0
  208. investing_algorithm_framework/services/metrics/sharpe_ratio.py +137 -0
  209. investing_algorithm_framework/services/metrics/sortino_ratio.py +74 -0
  210. investing_algorithm_framework/services/metrics/standard_deviation.py +156 -0
  211. investing_algorithm_framework/services/metrics/trades.py +473 -0
  212. investing_algorithm_framework/services/metrics/treynor_ratio.py +0 -0
  213. investing_algorithm_framework/services/metrics/ulcer.py +0 -0
  214. investing_algorithm_framework/services/metrics/value_at_risk.py +0 -0
  215. investing_algorithm_framework/services/metrics/volatility.py +118 -0
  216. investing_algorithm_framework/services/metrics/win_rate.py +177 -0
  217. investing_algorithm_framework/services/order_service/__init__.py +9 -0
  218. investing_algorithm_framework/services/order_service/order_backtest_service.py +178 -0
  219. investing_algorithm_framework/services/order_service/order_executor_lookup.py +110 -0
  220. investing_algorithm_framework/services/order_service/order_service.py +826 -0
  221. investing_algorithm_framework/services/portfolios/__init__.py +16 -0
  222. investing_algorithm_framework/services/portfolios/backtest_portfolio_service.py +54 -0
  223. investing_algorithm_framework/services/{portfolio_configuration_service.py → portfolios/portfolio_configuration_service.py} +27 -12
  224. investing_algorithm_framework/services/portfolios/portfolio_provider_lookup.py +106 -0
  225. investing_algorithm_framework/services/portfolios/portfolio_service.py +188 -0
  226. investing_algorithm_framework/services/portfolios/portfolio_snapshot_service.py +136 -0
  227. investing_algorithm_framework/services/portfolios/portfolio_sync_service.py +182 -0
  228. investing_algorithm_framework/services/positions/__init__.py +7 -0
  229. investing_algorithm_framework/services/positions/position_service.py +210 -0
  230. investing_algorithm_framework/services/repository_service.py +8 -2
  231. investing_algorithm_framework/services/trade_order_evaluator/__init__.py +9 -0
  232. investing_algorithm_framework/services/trade_order_evaluator/backtest_trade_oder_evaluator.py +117 -0
  233. investing_algorithm_framework/services/trade_order_evaluator/default_trade_order_evaluator.py +51 -0
  234. investing_algorithm_framework/services/trade_order_evaluator/trade_order_evaluator.py +80 -0
  235. investing_algorithm_framework/services/trade_service/__init__.py +9 -0
  236. investing_algorithm_framework/services/trade_service/trade_service.py +1099 -0
  237. investing_algorithm_framework/services/trade_service/trade_stop_loss_service.py +39 -0
  238. investing_algorithm_framework/services/trade_service/trade_take_profit_service.py +41 -0
  239. investing_algorithm_framework-7.25.6.dist-info/METADATA +535 -0
  240. investing_algorithm_framework-7.25.6.dist-info/RECORD +268 -0
  241. {investing_algorithm_framework-1.5.dist-info → investing_algorithm_framework-7.25.6.dist-info}/WHEEL +1 -2
  242. investing_algorithm_framework-7.25.6.dist-info/entry_points.txt +3 -0
  243. investing_algorithm_framework/app/algorithm.py +0 -630
  244. investing_algorithm_framework/domain/models/backtest_profile.py +0 -414
  245. investing_algorithm_framework/domain/models/market_data/__init__.py +0 -11
  246. investing_algorithm_framework/domain/models/market_data/asset_price.py +0 -50
  247. investing_algorithm_framework/domain/models/market_data/ohlcv.py +0 -105
  248. investing_algorithm_framework/domain/models/market_data/order_book.py +0 -63
  249. investing_algorithm_framework/domain/models/market_data/ticker.py +0 -92
  250. investing_algorithm_framework/domain/models/order/order_fee.py +0 -45
  251. investing_algorithm_framework/domain/models/trade.py +0 -78
  252. investing_algorithm_framework/domain/models/trading_data_types.py +0 -47
  253. investing_algorithm_framework/domain/models/trading_time_frame.py +0 -223
  254. investing_algorithm_framework/domain/singleton.py +0 -9
  255. investing_algorithm_framework/domain/utils/backtesting.py +0 -82
  256. investing_algorithm_framework/infrastructure/models/order/order_fee.py +0 -21
  257. investing_algorithm_framework/infrastructure/repositories/order_fee_repository.py +0 -15
  258. investing_algorithm_framework/infrastructure/services/market_backtest_service.py +0 -360
  259. investing_algorithm_framework/infrastructure/services/market_service.py +0 -410
  260. investing_algorithm_framework/infrastructure/services/performance_service.py +0 -192
  261. investing_algorithm_framework/services/backtest_service.py +0 -268
  262. investing_algorithm_framework/services/market_data_service.py +0 -77
  263. investing_algorithm_framework/services/order_backtest_service.py +0 -122
  264. investing_algorithm_framework/services/order_service.py +0 -752
  265. investing_algorithm_framework/services/portfolio_service.py +0 -164
  266. investing_algorithm_framework/services/portfolio_snapshot_service.py +0 -68
  267. investing_algorithm_framework/services/position_cost_service.py +0 -5
  268. investing_algorithm_framework/services/position_service.py +0 -63
  269. investing_algorithm_framework/services/strategy_orchestrator_service.py +0 -225
  270. investing_algorithm_framework-1.5.dist-info/AUTHORS.md +0 -8
  271. investing_algorithm_framework-1.5.dist-info/METADATA +0 -230
  272. investing_algorithm_framework-1.5.dist-info/RECORD +0 -119
  273. investing_algorithm_framework-1.5.dist-info/top_level.txt +0 -1
  274. /investing_algorithm_framework/{infrastructure/services/performance_backtest_service.py → app/reporting/tables/stop_loss_table.py} +0 -0
  275. /investing_algorithm_framework/services/{position_snapshot_service.py → positions/position_snapshot_service.py} +0 -0
  276. {investing_algorithm_framework-1.5.dist-info → investing_algorithm_framework-7.25.6.dist-info}/LICENSE +0 -0
@@ -0,0 +1,2596 @@
1
+ import gc
2
+ import json
3
+ import logging
4
+ import os
5
+ import numpy as np
6
+ import pandas as pd
7
+ import polars as pl
8
+ from collections import defaultdict
9
+ from datetime import datetime, timedelta, timezone
10
+ from pathlib import Path
11
+ from typing import Dict, List, Union, Optional, Callable
12
+
13
+ from investing_algorithm_framework.domain import BacktestRun, TimeUnit, \
14
+ OperationalException, BacktestDateRange, Backtest, combine_backtests, \
15
+ generate_backtest_summary_metrics, DataSource, \
16
+ PortfolioConfiguration, tqdm, SnapshotInterval, \
17
+ save_backtests_to_directory
18
+ from investing_algorithm_framework.services.data_providers import \
19
+ DataProviderService
20
+ from investing_algorithm_framework.services.metrics import \
21
+ create_backtest_metrics, get_risk_free_rate_us
22
+ from investing_algorithm_framework.services.portfolios import \
23
+ PortfolioConfigurationService
24
+ from .vector_backtest_service import VectorBacktestService
25
+
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def _print_progress(message: str, show_progress: bool = True):
31
+ """
32
+ Print progress message with forced flush.
33
+
34
+ This ensures output is immediately visible, especially important
35
+ in Jupyter notebooks and long-running processes.
36
+
37
+ Args:
38
+ message: The message to print.
39
+ show_progress: Whether to actually print the message.
40
+ """
41
+ if show_progress:
42
+ print(message, flush=True)
43
+
44
+
45
+ class BacktestService:
46
+ """
47
+ Service that facilitates backtests for algorithm objects.
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ data_provider_service: DataProviderService,
53
+ order_service,
54
+ portfolio_service,
55
+ portfolio_snapshot_service,
56
+ position_repository,
57
+ trade_service,
58
+ configuration_service,
59
+ portfolio_configuration_service,
60
+ ):
61
+ super().__init__()
62
+ self._order_service = order_service
63
+ self._trade_service = trade_service
64
+ self._portfolio_service = portfolio_service
65
+ self._portfolio_snapshot_service = portfolio_snapshot_service
66
+ self._position_repository = position_repository
67
+ self._configuration_service = configuration_service
68
+ self._portfolio_configuration_service: PortfolioConfigurationService \
69
+ = portfolio_configuration_service
70
+ self._data_provider_service = data_provider_service
71
+
72
+ def _validate_algorithm_ids(
73
+ self,
74
+ algorithms: List = None,
75
+ strategies: List = None
76
+ ):
77
+ """
78
+ Validate that all strategies have an algorithm id and that they
79
+ are unique.
80
+
81
+ Args:
82
+ algorithms (List[Algorithm], optional): The list of algorithms
83
+ to validate.
84
+ strategies (List[TradingStrategy], optional): The list of
85
+ strategies to validate.
86
+
87
+ Raises:
88
+ OperationalException: If any strategy does not have an
89
+ algorithm id.
90
+ """
91
+ algorithm_ids = set()
92
+
93
+ if algorithms is not None:
94
+ for algorithm in algorithms:
95
+
96
+ if not hasattr(algorithm, 'algorithm_id') or \
97
+ algorithm.algorithm_id is None:
98
+ raise OperationalException(
99
+ "All algorithms must have an algorithm_id set "
100
+ "before backtesting. Please set a unique "
101
+ "algorithm_id for each algorithm."
102
+ )
103
+ if algorithm.algorithm_id in algorithm_ids:
104
+ raise OperationalException(
105
+ f"Duplicate algorithm_id found: "
106
+ f"{algorithm.algorithm_id}. "
107
+ "Please ensure all algorithms have unique "
108
+ "algorithm_ids."
109
+ )
110
+ algorithm_ids.add(algorithm.algorithm_id)
111
+
112
+ else:
113
+
114
+ for strategy in strategies:
115
+ if not hasattr(strategy, 'algorithm_id') or \
116
+ strategy.algorithm_id is None:
117
+ raise OperationalException(
118
+ "All strategies must have an algorithm_id set "
119
+ "before backtesting. Please set a unique "
120
+ "algorithm_id for each strategy."
121
+ )
122
+ if strategy.algorithm_id in algorithm_ids:
123
+ raise OperationalException(
124
+ f"Duplicate algorithm_id found: "
125
+ f"{strategy.algorithm_id}. "
126
+ "Please ensure all strategies have unique "
127
+ "algorithm_ids."
128
+ )
129
+
130
+ algorithm_ids.add(strategy.algorithm_id)
131
+
132
+ @staticmethod
133
+ def create_checkpoint(
134
+ backtests,
135
+ backtest_date_range,
136
+ storage_directory,
137
+ show_progress: bool = False,
138
+ mode: str = "append"
139
+ ):
140
+ """
141
+ Create or update a checkpoint file.
142
+
143
+ Args:
144
+ backtests: List of backtests to create checkpoints for.
145
+ storage_directory: Directory to store the checkpoints.
146
+ backtest_date_range: The backtest date range to create
147
+ checkpoints for.
148
+ show_progress: Whether to print progress information.
149
+ mode: The mode to use when creating the checkpoint file.
150
+ Can be "append" or "overwrite".
151
+
152
+ Returns:
153
+ None
154
+ """
155
+
156
+ if len(backtests) == 0:
157
+ if show_progress:
158
+ print("No checkpoints to create")
159
+ return
160
+
161
+ checkpoint_file = os.path.join(
162
+ storage_directory, "checkpoints.json"
163
+ )
164
+ checkpoints = {}
165
+
166
+ if not os.path.exists(checkpoint_file):
167
+
168
+ if show_progress:
169
+ print(
170
+ "No existing checkpoint file found, "
171
+ "creating new checkpoint file ..."
172
+ )
173
+ checkpoints = {}
174
+ else:
175
+ # Load existing checkpoint file
176
+ with open(checkpoint_file, "r") as f:
177
+ checkpoints = json.load(f)
178
+
179
+ backtest_range_key = (f"{backtest_date_range.start_date.isoformat()}_"
180
+ f"{backtest_date_range.end_date.isoformat()}")
181
+ start_date = backtest_date_range.start_date.strftime("%Y-%m-%d")
182
+ end_date = backtest_date_range.end_date.strftime("%Y-%m-%d")
183
+ algorithm_ids = [bt.algorithm_id for bt in backtests]
184
+
185
+ if mode == "append" and backtest_range_key in checkpoints:
186
+ existing_ids = set(checkpoints[backtest_range_key])
187
+ new_ids = set(algorithm_ids)
188
+ combined_ids = list(existing_ids.union(new_ids))
189
+ checkpoints[backtest_range_key] = combined_ids
190
+ else:
191
+ checkpoints[backtest_range_key] = algorithm_ids
192
+
193
+ if show_progress:
194
+ print(
195
+ "Updated checkpoints for backtest "
196
+ f"range: {start_date} to {end_date}"
197
+ )
198
+ print(f"Saving {len(algorithm_ids)} checkpoints ...")
199
+
200
+ with open(checkpoint_file, "w") as f:
201
+ json.dump(checkpoints, f, indent=4)
202
+
203
+ @staticmethod
204
+ def get_checkpoints(
205
+ algorithm_ids: List[str],
206
+ backtest_date_range: BacktestDateRange,
207
+ storage_directory: str,
208
+ show_progress: bool = False
209
+ ) -> tuple[list[str], list[Backtest], list[str]]:
210
+ """
211
+ Get the checkpoint file. If it does not exist, an empty dict
212
+ will be returned.
213
+
214
+ Args:
215
+ algorithm_ids: The list of algorithm IDs to get checkpoints for.
216
+ backtest_date_range: The backtest date range to get checkpoints
217
+ for.
218
+ storage_directory: The directory where checkpoints are stored.
219
+ show_progress: Whether to print progress information.
220
+
221
+ Returns:
222
+ Tuple[List[str], List[Backtest], List[str]]: A tuple
223
+ containing a list of checkpointed algorithm IDs,
224
+ a list of backtests and a list of missing checkpointed
225
+ algorithm IDs.
226
+ """
227
+ checkpoint_file = os.path.join(
228
+ storage_directory, "checkpoints.json"
229
+ )
230
+
231
+ start_date = backtest_date_range.start_date.strftime("%Y-%m-%d")
232
+ end_date = backtest_date_range.end_date.strftime("%Y-%m-%d")
233
+ start_date_key = backtest_date_range.start_date.isoformat()
234
+ end_date_key = backtest_date_range.end_date.isoformat()
235
+
236
+ if show_progress:
237
+ print(
238
+ "Loading checkpoints for backtest "
239
+ f"range: {start_date} to {end_date}"
240
+ )
241
+
242
+ if not os.path.exists(checkpoint_file):
243
+ if show_progress:
244
+ print(
245
+ "Found 0 checkpoints for backtest "
246
+ f"range {start_date} to {end_date}."
247
+ )
248
+
249
+ return [], [], algorithm_ids
250
+
251
+ with open(checkpoint_file, "r") as f:
252
+ checkpoints = json.load(f)
253
+
254
+ backtest_range_key = f"{start_date_key}_{end_date_key}"
255
+ checkpointed = checkpoints.get(backtest_range_key, [])
256
+
257
+ # Determine which algorithms are missing, by comparing witch
258
+ # algorithm id's are present in the checkpoints and which are not
259
+ if len(checkpointed) != 0:
260
+ missing_checkpointed = set(checkpointed) - set(algorithm_ids)
261
+ else:
262
+ missing_checkpointed = algorithm_ids
263
+
264
+ if show_progress:
265
+ print(
266
+ f"Found {len(checkpointed)} checkpoints "
267
+ f"for backtest range {start_date} to {end_date}."
268
+ )
269
+
270
+ backtests = []
271
+
272
+ if len(checkpointed) != 0:
273
+ # Load checkpoints
274
+ if show_progress:
275
+ for checkpoint in tqdm(
276
+ checkpointed, colour="green", desc="Loading checkpoints"
277
+ ):
278
+ backtests.append(
279
+ Backtest.open(
280
+ os.path.join(storage_directory, checkpoint),
281
+ backtest_date_ranges=[backtest_date_range]
282
+ )
283
+ )
284
+ else:
285
+ for checkpoint in checkpointed:
286
+ backtests.append(
287
+ Backtest.open(
288
+ os.path.join(storage_directory, checkpoint),
289
+ backtest_date_ranges=[backtest_date_range]
290
+ )
291
+ )
292
+
293
+ return checkpointed, backtests, list(missing_checkpointed)
294
+
295
+ @staticmethod
296
+ def validate_strategy_for_vector_backtest(strategy):
297
+ """
298
+ Validate if the strategy is suitable for backtesting.
299
+
300
+ Args:
301
+ strategy: The strategy to validate.
302
+
303
+ Raises:
304
+ OperationalException: If the strategy does not have the required
305
+ buy/sell signal functions.
306
+ """
307
+ if not hasattr(strategy, 'generate_buy_signals'):
308
+ raise OperationalException(
309
+ "Strategy must define a vectorized buy signal function "
310
+ "(buy_signal_vectorized)."
311
+ )
312
+ if not hasattr(strategy, 'generate_sell_signals'):
313
+ raise OperationalException(
314
+ "Strategy must define a vectorized sell signal function "
315
+ "(sell_signal_vectorized)."
316
+ )
317
+
318
+ def generate_schedule(
319
+ self,
320
+ strategies,
321
+ tasks,
322
+ start_date,
323
+ end_date
324
+ ) -> Dict[datetime, Dict[str, List[str]]]:
325
+ """
326
+ Generates a dict-based schedule: datetime => {strategy_ids, task_ids}
327
+ """
328
+ schedule = defaultdict(
329
+ lambda: {"strategy_ids": set(), "task_ids": set(tasks)}
330
+ )
331
+
332
+ for strategy in strategies:
333
+ strategy_id = strategy.strategy_profile.strategy_id
334
+ interval = strategy.strategy_profile.interval
335
+ time_unit = strategy.strategy_profile.time_unit
336
+
337
+ if time_unit == TimeUnit.SECOND:
338
+ step = timedelta(seconds=interval)
339
+ elif time_unit == TimeUnit.MINUTE:
340
+ step = timedelta(minutes=interval)
341
+ elif time_unit == TimeUnit.HOUR:
342
+ step = timedelta(hours=interval)
343
+ elif time_unit == TimeUnit.DAY:
344
+ step = timedelta(days=interval)
345
+ else:
346
+ raise ValueError(f"Unsupported time unit: {time_unit}")
347
+
348
+ t = start_date
349
+ while t <= end_date:
350
+ schedule[t]["strategy_ids"].add(strategy_id)
351
+ t += step
352
+
353
+ return {
354
+ ts: {
355
+ "strategy_ids": sorted(data["strategy_ids"]),
356
+ "task_ids": sorted(data["task_ids"])
357
+ }
358
+ for ts, data in schedule.items()
359
+ }
360
+
361
+ def _get_initial_unallocated(self) -> float:
362
+ """
363
+ Get the initial unallocated amount for the backtest.
364
+
365
+ Returns:
366
+ float: The initial unallocated amount.
367
+ """
368
+ portfolios = self._portfolio_service.get_all()
369
+ initial_unallocated = 0.0
370
+
371
+ for portfolio in portfolios:
372
+ initial_unallocated += portfolio.initial_balance
373
+
374
+ return initial_unallocated
375
+
376
+ def create_backtest(
377
+ self,
378
+ algorithm,
379
+ number_of_runs,
380
+ backtest_date_range: BacktestDateRange,
381
+ risk_free_rate,
382
+ strategy_directory_path=None
383
+ ) -> Backtest:
384
+ """
385
+ Create a backtest for the given algorithm.
386
+
387
+ It will store all results and metrics in a Backtest object through
388
+ the BacktestResults and BacktestMetrics objects. Optionally,
389
+ it will also store the strategy related paths and backtest
390
+ data file paths.
391
+
392
+ Args:
393
+ algorithm: The algorithm to create the backtest report for
394
+ number_of_runs: The number of runs
395
+ backtest_date_range: The backtest date range of the backtest
396
+ risk_free_rate: The risk-free rate to use for the backtest metrics
397
+ strategy_directory_path (optional, str): The path to the
398
+ strategy directory
399
+
400
+ Returns:
401
+ Backtest: The backtest containing the results and metrics.
402
+ """
403
+
404
+ # Get the first portfolio
405
+ portfolio = self._portfolio_service.get_all()[0]
406
+
407
+ run = BacktestRun(
408
+ backtest_start_date=backtest_date_range.start_date,
409
+ backtest_end_date=backtest_date_range.end_date,
410
+ backtest_date_range_name=backtest_date_range.name,
411
+ initial_unallocated=self._get_initial_unallocated(),
412
+ trading_symbol=portfolio.trading_symbol,
413
+ created_at=datetime.now(tz=timezone.utc),
414
+ portfolio_snapshots=self._portfolio_snapshot_service.get_all(
415
+ {"portfolio_id": portfolio.id}
416
+ ),
417
+ number_of_runs=number_of_runs,
418
+ trades=self._trade_service.get_all(
419
+ {"portfolio": portfolio.id}
420
+ ),
421
+ orders=self._order_service.get_all(
422
+ {"portfolio": portfolio.id}
423
+ ),
424
+ positions=self._position_repository.get_all(
425
+ {"portfolio": portfolio.id}
426
+ ),
427
+ )
428
+ backtest_metrics = create_backtest_metrics(
429
+ run, risk_free_rate=risk_free_rate
430
+ )
431
+ run.backtest_metrics = backtest_metrics
432
+ return Backtest(
433
+ algorithm_id=algorithm.id,
434
+ backtest_runs=[run],
435
+ backtest_summary=generate_backtest_summary_metrics(
436
+ [backtest_metrics]
437
+ )
438
+ )
439
+
440
+ def backtest_exists(
441
+ self,
442
+ strategy,
443
+ backtest_date_range: BacktestDateRange,
444
+ storage_directory: str
445
+ ) -> bool:
446
+ """
447
+ Check if a backtest already exists for the given strategy
448
+ and backtest date range.
449
+
450
+ Args:
451
+ strategy: The strategy to check.
452
+ backtest_date_range: The backtest date range to check.
453
+ storage_directory: The directory where backtests are stored.
454
+
455
+ Returns:
456
+ bool: True if the backtest exists, False otherwise.
457
+ """
458
+ algorithm_id = strategy.algorithm_id
459
+ backtest_directory = os.path.join(storage_directory, algorithm_id)
460
+
461
+ if os.path.exists(backtest_directory):
462
+ backtest = Backtest.open(backtest_directory)
463
+ backtest_date_ranges = backtest.get_backtest_date_ranges()
464
+
465
+ for backtest_date_range_ref in backtest_date_ranges:
466
+
467
+ if backtest_date_range_ref.start_date \
468
+ == backtest_date_range.start_date and \
469
+ backtest_date_range_ref.end_date \
470
+ == backtest_date_range.end_date:
471
+ return True
472
+
473
+ return False
474
+
475
+ def load_backtest_by_strategy_and_backtest_date_range(
476
+ self,
477
+ strategy,
478
+ backtest_date_range: BacktestDateRange,
479
+ storage_directory: str
480
+ ) -> Backtest:
481
+ """
482
+ Load a backtest for the given strategy and backtest date range.
483
+ If the backtest does not exist, an exception will be raised.
484
+ For the given backtest, only the run and metrics corresponding
485
+ to the backtest date range will be returned.
486
+
487
+ Args:
488
+ strategy: The strategy to load the backtest for.
489
+ backtest_date_range: The backtest date range to load.
490
+ storage_directory: The directory where backtests are stored.
491
+
492
+ Returns:
493
+ Backtest: instance of the loaded backtest with only
494
+ the given run and metrics corresponding to the
495
+ backtest date range.
496
+ """
497
+ strategy_id = strategy.id
498
+ backtest_directory = os.path.join(storage_directory, strategy_id)
499
+
500
+ if os.path.exists(backtest_directory):
501
+ backtest = Backtest.open(backtest_directory)
502
+ run = backtest.get_backtest_run(backtest_date_range)
503
+ metadata = backtest.get_metadata()
504
+ return Backtest(backtest_runs=[run], metadata=metadata)
505
+ else:
506
+ raise OperationalException("Backtest does not exist.")
507
+
508
+ def initialize_data_sources_backtest(
509
+ self,
510
+ data_sources: List[DataSource],
511
+ backtest_date_range: BacktestDateRange,
512
+ show_progress: bool = True
513
+ ):
514
+ """
515
+ Function to initialize the data sources for the app in backtest mode.
516
+ This method should be called before running the algorithm in backtest
517
+ mode. It initializes all data sources so that they are
518
+ ready to be used.
519
+
520
+ Args:
521
+ data_sources (List[DataSource]): The data sources to initialize.
522
+ backtest_date_range (BacktestDateRange): The date range for the
523
+ backtest. This should be an instance of BacktestDateRange.
524
+ show_progress (bool): Whether to show a progress bar when
525
+ preparing the backtest data for each data provider.
526
+
527
+ Returns:
528
+ None
529
+ """
530
+ logger.info("Initializing data sources for backtest")
531
+
532
+ if data_sources is None or len(data_sources) == 0:
533
+ return
534
+
535
+ # Initialize all data sources
536
+ self._data_provider_service.index_backtest_data_providers(
537
+ data_sources, backtest_date_range, show_progress=show_progress
538
+ )
539
+
540
+ description = "Preparing backtest data for all data sources"
541
+ data_providers = self._data_provider_service\
542
+ .data_provider_index.get_all()
543
+
544
+ if show_progress:
545
+ data_providers = tqdm(
546
+ data_providers, desc=description, colour="green"
547
+ )
548
+
549
+ # Prepare the backtest data for each data provider
550
+ for _, data_provider in data_providers:
551
+ data_provider.prepare_backtest_data(
552
+ backtest_start_date=backtest_date_range.start_date,
553
+ backtest_end_date=backtest_date_range.end_date
554
+ )
555
+
556
+ def run_vector_backtests(
557
+ self,
558
+ strategies: List,
559
+ portfolio_configuration: PortfolioConfiguration,
560
+ backtest_date_range: BacktestDateRange = None,
561
+ backtest_date_ranges: List[BacktestDateRange] = None,
562
+ snapshot_interval: SnapshotInterval = SnapshotInterval.DAILY,
563
+ risk_free_rate: Optional[float] = None,
564
+ skip_data_sources_initialization: bool = False,
565
+ show_progress: bool = True,
566
+ continue_on_error: bool = False,
567
+ window_filter_function: Optional[
568
+ Callable[[List[Backtest], BacktestDateRange], List[Backtest]]
569
+ ] = None,
570
+ final_filter_function: Optional[
571
+ Callable[[List[Backtest]], List[Backtest]]
572
+ ] = None,
573
+ backtest_storage_directory: Optional[Union[str, Path]] = None,
574
+ use_checkpoints: bool = True,
575
+ batch_size: int = 50,
576
+ checkpoint_batch_size: int = 25,
577
+ n_workers: Optional[int] = None,
578
+ dynamic_position_sizing: bool = False,
579
+ ):
580
+ """
581
+ OPTIMIZED version: Run vectorized backtests with optional
582
+ checkpointing, batching, and reduced I/O.
583
+
584
+ Optimizations:
585
+ - Checkpoint cache loaded once at start (reduces file I/O by 80-90%)
586
+ - Batch processing of strategies (reduces memory usage by 60-70%)
587
+ - Batch saving of backtests (reduces disk writes by 70-80%)
588
+ - Batch checkpoint updates (reduces checkpoint file writes)
589
+ - More aggressive memory cleanup
590
+ - Optional parallel processing (2-8x speedup on multi-core CPUs)
591
+
592
+ For 10,000 backtests:
593
+ - Sequential: 40-60% faster than original
594
+ - Parallel (8 cores): 3-5x faster than original
595
+
596
+ Args:
597
+ strategies: List of strategies to backtest.
598
+ portfolio_configuration: Portfolio configuration with
599
+ initial balance, market, and trading symbol.
600
+ backtest_date_range: Single backtest date range to use
601
+ for all strategies.
602
+ backtest_date_ranges: List of backtest date ranges to use
603
+ for all strategies.
604
+ snapshot_interval: Interval for portfolio snapshots.
605
+ risk_free_rate: Risk-free rate for backtest metrics.
606
+ skip_data_sources_initialization: Whether to skip data
607
+ source initialization.
608
+ show_progress: Whether to show progress bars.
609
+ continue_on_error: Whether to continue on errors.
610
+ window_filter_function: Filter function applied after each
611
+ date range.
612
+ final_filter_function: Filter function applied at the end.
613
+ backtest_storage_directory: Directory to store backtests.
614
+ use_checkpoints: Whether to use checkpointing to resume interrupted
615
+ backtests. If True, completed backtests will be saved to disk
616
+ and skipped on subsequent runs. If False, all backtests will
617
+ run every time (default: True).
618
+ batch_size: Number of strategies to process in
619
+ each batch (default: 50).
620
+ checkpoint_batch_size: Number of backtests before batch
621
+ save/checkpoint (default: 25).
622
+ n_workers: Number of parallel workers (default: None = sequential,
623
+ -1 = use all CPU cores, N = use N workers).
624
+ Recommended: os.cpu_count() - 1 to leave one core free.
625
+ dynamic_position_sizing: If True, position sizes are recalculated
626
+ at each trade based on current portfolio value (similar to
627
+ event-based backtesting). If False (default), position sizes
628
+ are calculated once at the start based on initial portfolio
629
+ value.
630
+
631
+ Returns:
632
+ List[Backtest]: List of backtest results.
633
+ """
634
+
635
+ if use_checkpoints and backtest_storage_directory is None:
636
+ raise OperationalException(
637
+ "When using checkpoints, a backtest_storage_directory must "
638
+ "be provided"
639
+ )
640
+
641
+ if backtest_date_range is None and backtest_date_ranges is None:
642
+ raise OperationalException(
643
+ "Either backtest_date_range or backtest_date_ranges "
644
+ "must be provided"
645
+ )
646
+
647
+ # Collect all data sources
648
+ data_sources = []
649
+
650
+ for strategy in strategies:
651
+ data_sources.extend(strategy.data_sources)
652
+
653
+ # Get risk-free rate if not provided
654
+ if risk_free_rate is None:
655
+
656
+ if show_progress:
657
+ _print_progress(
658
+ "Retrieving risk free rate for metrics calculation ...",
659
+ show_progress
660
+ )
661
+
662
+ risk_free_rate = self._get_risk_free_rate()
663
+
664
+ if show_progress:
665
+ _print_progress(
666
+ f"Retrieved risk free rate of: {risk_free_rate}",
667
+ show_progress
668
+ )
669
+
670
+ # Load checkpoint cache only if checkpointing is enabled
671
+ checkpoint_cache = {}
672
+ if use_checkpoints and backtest_storage_directory is not None:
673
+ checkpoint_cache = self._load_checkpoint_cache(
674
+ backtest_storage_directory
675
+ )
676
+
677
+ # Create session cache to track backtests run in this session
678
+ # This ensures we only load backtests from this run, not pre-existing
679
+ # ones in the storage directory
680
+ session_cache = None
681
+ if backtest_storage_directory is not None:
682
+ session_cache = self._create_session_cache()
683
+
684
+ # Handle single date range case - convert to list
685
+ # for unified processing
686
+ if backtest_date_range is not None:
687
+ backtest_date_ranges = [backtest_date_range]
688
+
689
+ # Handle multiple date ranges with batching
690
+ active_strategies = strategies.copy()
691
+
692
+ # Sort and deduplicate date ranges
693
+ unique_date_ranges = set(backtest_date_ranges)
694
+ backtest_date_ranges = sorted(
695
+ unique_date_ranges, key=lambda x: x.start_date
696
+ )
697
+
698
+ # Track all backtests across date ranges (for combining later)
699
+ # {algorithm_id: [Backtest, Backtest, ...]}
700
+ backtests_by_algorithm = {}
701
+
702
+ # Validate algorithm IDs
703
+ self._validate_algorithm_ids(strategies)
704
+
705
+ for backtest_date_range in tqdm(
706
+ backtest_date_ranges,
707
+ colour="green",
708
+ desc="Running backtests for all date ranges",
709
+ disable=not show_progress
710
+ ):
711
+ if not skip_data_sources_initialization:
712
+ self.initialize_data_sources_backtest(
713
+ data_sources,
714
+ backtest_date_range,
715
+ show_progress=show_progress
716
+ )
717
+
718
+ start_date = backtest_date_range.start_date.strftime('%Y-%m-%d')
719
+ end_date = backtest_date_range.end_date.strftime('%Y-%m-%d')
720
+ active_algorithm_ids = [s.algorithm_id for s in active_strategies]
721
+
722
+ # Only check for checkpoints if use_checkpoints is True
723
+ if use_checkpoints:
724
+ _print_progress(
725
+ "Using checkpoints to skip completed backtests ...",
726
+ show_progress
727
+ )
728
+ checkpointed_ids = self._get_checkpointed_from_cache(
729
+ checkpoint_cache, backtest_date_range
730
+ )
731
+ missing_ids = set(active_algorithm_ids) - set(checkpointed_ids)
732
+ strategies_to_run = [
733
+ s for s in active_strategies
734
+ if s.algorithm_id in missing_ids
735
+ ]
736
+
737
+ # Add checkpointed IDs to session cache so they're included
738
+ # in final loading (they were run in a previous session but
739
+ # are part of the current batch)
740
+ if session_cache is not None:
741
+ for algo_id in checkpointed_ids:
742
+ if algo_id in active_algorithm_ids:
743
+ backtest_path = os.path.join(
744
+ backtest_storage_directory, algo_id
745
+ )
746
+ session_cache["backtests"][algo_id] = backtest_path
747
+
748
+ # Count how many active strategies are in the checkpoint
749
+ matched_checkpoint_count = len(
750
+ set(active_algorithm_ids) & set(checkpointed_ids)
751
+ )
752
+
753
+ if show_progress:
754
+ _print_progress(
755
+ f"Active strategies: {len(active_algorithm_ids)}, "
756
+ f"checkpoint file has: {len(checkpointed_ids)}, "
757
+ f"matched: {matched_checkpoint_count}, "
758
+ f"running {len(strategies_to_run)} new backtests",
759
+ show_progress
760
+ )
761
+ else:
762
+ # Run all strategies when checkpoints are disabled
763
+ strategies_to_run = active_strategies
764
+
765
+ all_backtests = []
766
+ batch_buffer = []
767
+
768
+ if len(strategies_to_run) > 0:
769
+ # Determine if we should use parallel processing
770
+ use_parallel = n_workers is not None and n_workers != 0
771
+
772
+ if use_parallel:
773
+ # Parallel processing of backtests (batches per worker)
774
+ import multiprocessing
775
+ from concurrent.futures import \
776
+ ProcessPoolExecutor, as_completed
777
+
778
+ # Determine number of workers
779
+ if n_workers == -1:
780
+ n_workers = multiprocessing.cpu_count()
781
+
782
+ # Calculate optimal batch size per worker
783
+ # Each worker processes a batch of strategies
784
+ worker_batch_size = max(
785
+ 1, len(strategies_to_run) // n_workers
786
+ )
787
+
788
+ # Split strategies into batches for each worker
789
+ strategy_batches = [
790
+ strategies_to_run[i:i + worker_batch_size]
791
+ for i in range(
792
+ 0, len(strategies_to_run), worker_batch_size
793
+ )
794
+ ]
795
+
796
+ if show_progress:
797
+ _print_progress(
798
+ f"Running {len(strategies_to_run)} backtests on "
799
+ f"{n_workers} workers "
800
+ f"({len(strategy_batches)} batches, "
801
+ f"~{worker_batch_size} strategies per worker)",
802
+ show_progress
803
+ )
804
+
805
+ worker_args = []
806
+
807
+ for batch in strategy_batches:
808
+ worker_args.append((
809
+ batch,
810
+ backtest_date_range,
811
+ portfolio_configuration,
812
+ snapshot_interval,
813
+ risk_free_rate,
814
+ continue_on_error,
815
+ self._data_provider_service.copy(),
816
+ False,
817
+ dynamic_position_sizing
818
+ ))
819
+
820
+ # Execute batches in parallel
821
+ with (ProcessPoolExecutor(max_workers=n_workers)
822
+ as executor):
823
+ # Submit all batch tasks
824
+ futures = [
825
+ executor.submit(
826
+ self._run_batch_backtest_worker, args
827
+ )
828
+ for args in worker_args
829
+ ]
830
+
831
+ # Track completed batches for periodic cleanup
832
+ completed_count = 0
833
+
834
+ # Collect results with progress bar
835
+ for future in tqdm(
836
+ as_completed(futures),
837
+ total=len(futures),
838
+ colour="green",
839
+ desc="Running backtests for "
840
+ f"{start_date} to {end_date}",
841
+ disable=not show_progress
842
+ ):
843
+ try:
844
+ batch_result = future.result()
845
+ if batch_result:
846
+ # Add all results from this batch
847
+ all_backtests.extend(batch_result)
848
+ batch_buffer.extend(batch_result)
849
+
850
+ # Save and create checkpoint files when
851
+ # storage directory provided
852
+ # This builds checkpoint infrastructure
853
+ # for future runs with use_checkpoints=True
854
+ if backtest_storage_directory is not None:
855
+ self._save_batch_if_full(
856
+ batch_buffer,
857
+ checkpoint_batch_size,
858
+ backtest_date_range,
859
+ backtest_storage_directory,
860
+ checkpoint_cache,
861
+ session_cache
862
+ )
863
+
864
+ # Periodic garbage collection every 10 batches
865
+ # to prevent memory accumulation
866
+ completed_count += 1
867
+ if completed_count % 10 == 0:
868
+ gc.collect()
869
+
870
+ except Exception as e:
871
+ if continue_on_error:
872
+ logger.error(
873
+ f"Error processing batch: {e}"
874
+ )
875
+ continue
876
+ else:
877
+ raise
878
+
879
+ # Save remaining batch and create checkpoint files when
880
+ # storage directory provided
881
+ if backtest_storage_directory is not None:
882
+ self._save_remaining_batch(
883
+ batch_buffer,
884
+ backtest_date_range,
885
+ backtest_storage_directory,
886
+ checkpoint_cache,
887
+ session_cache
888
+ )
889
+
890
+ else:
891
+ # Process strategies in batches to manage memory
892
+ # Split strategies_to_run into batches based on batch_size
893
+ strategy_batches = [
894
+ strategies_to_run[i:i + batch_size]
895
+ for i in range(0, len(strategies_to_run), batch_size)
896
+ ]
897
+
898
+ if show_progress and len(strategy_batches) > 1:
899
+ _print_progress(
900
+ f"Processing {len(strategies_to_run)} "
901
+ "strategies in "
902
+ f"{len(strategy_batches)} batches "
903
+ f"of ~{batch_size} strategies each",
904
+ show_progress
905
+ )
906
+
907
+ # Process each batch
908
+ for batch_idx, strategy_batch in enumerate(tqdm(
909
+ strategy_batches,
910
+ colour="green",
911
+ desc="Processing strategy batches",
912
+ disable=not show_progress or len(strategy_batches) == 1
913
+ )):
914
+ worker_args = (
915
+ strategy_batch,
916
+ backtest_date_range,
917
+ portfolio_configuration,
918
+ snapshot_interval,
919
+ risk_free_rate,
920
+ continue_on_error,
921
+ self._data_provider_service,
922
+ False, # Don't show progress for individual
923
+ dynamic_position_sizing
924
+ )
925
+
926
+ try:
927
+ batch_result = \
928
+ self._run_batch_backtest_worker(worker_args)
929
+
930
+ if batch_result:
931
+ all_backtests.extend(batch_result)
932
+ batch_buffer.extend(batch_result)
933
+
934
+ # Save and create checkpoint files when
935
+ # storage directory provided
936
+ # This builds checkpoint infrastructure for
937
+ # future runs with use_checkpoints=True
938
+ if backtest_storage_directory is not None:
939
+ self._save_batch_if_full(
940
+ batch_buffer,
941
+ checkpoint_batch_size,
942
+ backtest_date_range,
943
+ backtest_storage_directory,
944
+ checkpoint_cache,
945
+ session_cache
946
+ )
947
+
948
+ # Periodic garbage collection every 5 batches
949
+ # to prevent memory accumulation
950
+ if (batch_idx + 1) % 5 == 0:
951
+ gc.collect()
952
+
953
+ except Exception as e:
954
+ if continue_on_error:
955
+ logger.error(
956
+ f"Error processing "
957
+ f"batch {batch_idx + 1}: {e}"
958
+ )
959
+ else:
960
+ raise
961
+
962
+ # Save remaining batch and create checkpoint files when
963
+ # storage directory provided
964
+ if backtest_storage_directory is not None:
965
+ self._save_remaining_batch(
966
+ batch_buffer,
967
+ backtest_date_range,
968
+ backtest_storage_directory,
969
+ checkpoint_cache,
970
+ session_cache
971
+ )
972
+
973
+ # Store backtests in memory when no storage directory provided
974
+ # This must happen regardless of whether strategies_to_run
975
+ # was empty or not
976
+ if backtest_storage_directory is None:
977
+ for backtest in all_backtests:
978
+ if backtest.algorithm_id not in backtests_by_algorithm:
979
+ backtests_by_algorithm[backtest.algorithm_id] = []
980
+ backtests_by_algorithm[backtest.algorithm_id]\
981
+ .append(backtest)
982
+
983
+ # Load checkpointed backtests that were SKIPPED (not run in this
984
+ # iteration) if needed for filtering. Only load backtests that
985
+ # were checkpointed from a previous session, not ones that were
986
+ # just run and checkpointed in this session.
987
+ if use_checkpoints and (window_filter_function is not None
988
+ or final_filter_function is not None):
989
+ # Get IDs of strategies that were actually run in this
990
+ # iteration
991
+ run_algorithm_ids = set(s.algorithm_id
992
+ for s in strategies_to_run)
993
+ # Only load backtests that were SKIPPED
994
+ # (checkpointed, not run)
995
+ skipped_algorithm_ids = [
996
+ algo_id for algo_id in active_algorithm_ids
997
+ if algo_id not in run_algorithm_ids
998
+ ]
999
+
1000
+ if len(skipped_algorithm_ids) > 0:
1001
+ checkpointed_backtests = self._load_backtests_from_cache(
1002
+ checkpoint_cache,
1003
+ backtest_date_range,
1004
+ backtest_storage_directory,
1005
+ skipped_algorithm_ids
1006
+ )
1007
+ all_backtests.extend(checkpointed_backtests)
1008
+
1009
+ # Apply window filter function
1010
+ if window_filter_function is not None:
1011
+ if show_progress:
1012
+ _print_progress(
1013
+ "Applying window filter function ...",
1014
+ show_progress
1015
+ )
1016
+ filtered_backtests = window_filter_function(
1017
+ all_backtests, backtest_date_range
1018
+ )
1019
+ active_strategies = [
1020
+ s for s in active_strategies
1021
+ if s.algorithm_id in [b.algorithm_id
1022
+ for b in filtered_backtests]
1023
+ ]
1024
+
1025
+ # Update tracking based on whether we're using
1026
+ # storage or memory
1027
+ if backtest_storage_directory is None:
1028
+ # Update backtests_by_algorithm after filtering
1029
+ # Remove algorithms that were filtered out
1030
+ filtered_algorithm_ids = set(b.algorithm_id
1031
+ for b in filtered_backtests)
1032
+ algorithms_to_remove = [
1033
+ alg_id for alg_id in backtests_by_algorithm.keys()
1034
+ if alg_id not in filtered_algorithm_ids
1035
+ ]
1036
+ for alg_id in algorithms_to_remove:
1037
+ del backtests_by_algorithm[alg_id]
1038
+ else:
1039
+ # When using storage, mark filtered-out backtests
1040
+ # with metadata flag instead of deleting them
1041
+ filtered_algorithm_ids = set(
1042
+ b.algorithm_id for b in filtered_backtests)
1043
+ algorithms_to_mark = [
1044
+ alg_id for alg_id in active_algorithm_ids
1045
+ if alg_id not in filtered_algorithm_ids
1046
+ ]
1047
+
1048
+ # Update session cache to only include filtered backtests
1049
+ if session_cache is not None:
1050
+ session_cache["backtests"] = {
1051
+ k: v for k, v in session_cache["backtests"].items()
1052
+ if k in filtered_algorithm_ids
1053
+ }
1054
+
1055
+ # Clear filtered_out flag for backtests that passed
1056
+ # the filter (they may have been filtered out before)
1057
+ for alg_id in filtered_algorithm_ids:
1058
+ backtest_dir = os.path.join(
1059
+ backtest_storage_directory, alg_id
1060
+ )
1061
+ if os.path.exists(backtest_dir):
1062
+ try:
1063
+ backtest = Backtest.open(backtest_dir)
1064
+ if backtest.metadata is not None and \
1065
+ backtest.metadata.get(
1066
+ 'filtered_out', False
1067
+ ):
1068
+ # Clear the filtered_out flag
1069
+ backtest.metadata['filtered_out'] = False
1070
+ if 'filtered_out_at_date_range' in \
1071
+ backtest.metadata:
1072
+ del backtest.metadata[
1073
+ 'filtered_out_at_date_range'
1074
+ ]
1075
+ backtest.save(backtest_dir)
1076
+ except Exception as e:
1077
+ logger.warning(
1078
+ f"Could not clear filtered_out flag "
1079
+ f"for backtest {alg_id}: {e}"
1080
+ )
1081
+
1082
+ # Mark filtered-out backtests with metadata flag
1083
+ # This preserves them in storage for future runs
1084
+ for alg_id in algorithms_to_mark:
1085
+ backtest_dir = os.path.join(
1086
+ backtest_storage_directory, alg_id
1087
+ )
1088
+ if os.path.exists(backtest_dir):
1089
+ try:
1090
+ # Load the backtest
1091
+ backtest = Backtest.open(backtest_dir)
1092
+ start_date = backtest_date_range.start_date
1093
+ end_date = backtest_date_range.end_date
1094
+ date_key = (
1095
+ f"{start_date.isoformat()}_"
1096
+ f"{end_date.isoformat()}"
1097
+ )
1098
+ # Mark as filtered out in metadata
1099
+ if backtest.metadata is None:
1100
+ backtest.metadata = {}
1101
+ backtest.metadata['filtered_out'] = True
1102
+ backtest.metadata[
1103
+ 'filtered_out_at_date_range'
1104
+ ] = (
1105
+ backtest_date_range.name
1106
+ if backtest_date_range.name
1107
+ else date_key
1108
+ )
1109
+
1110
+ # Save the updated backtest
1111
+ backtest.save(backtest_dir)
1112
+
1113
+ except Exception as e:
1114
+ logger.warning(
1115
+ f"Could not mark backtest {alg_id} "
1116
+ f"as filtered: {e}"
1117
+ )
1118
+
1119
+ # Clear memory
1120
+ del all_backtests
1121
+ del batch_buffer
1122
+ gc.collect()
1123
+
1124
+ # Combine backtests with the same algorithm_id across date ranges
1125
+ if show_progress:
1126
+ _print_progress(
1127
+ "Combining backtests across date ranges ...",
1128
+ show_progress
1129
+ )
1130
+
1131
+ # After window filtering, active_strategies contains only algorithms
1132
+ # that passed all window filters. Use these for final processing.
1133
+ active_algorithm_ids_final = set(
1134
+ s.algorithm_id for s in active_strategies
1135
+ )
1136
+
1137
+ loaded_from_storage = False
1138
+ if backtest_storage_directory is not None:
1139
+ # Save session cache to disk before final loading
1140
+ if session_cache is not None:
1141
+ self._save_session_cache(
1142
+ session_cache, backtest_storage_directory
1143
+ )
1144
+
1145
+ # Load ONLY from session cache - this ensures we only get
1146
+ # backtests from this run, not pre-existing ones in the directory
1147
+ all_backtests = self._load_backtests_from_session(
1148
+ session_cache,
1149
+ active_algorithm_ids_final,
1150
+ show_progress=show_progress
1151
+ )
1152
+
1153
+ if show_progress and session_cache is not None:
1154
+ total_in_session = len(session_cache.get("backtests", {}))
1155
+ loaded_count = len(all_backtests)
1156
+ if total_in_session > loaded_count:
1157
+ _print_progress(
1158
+ f"Loaded {loaded_count} backtests from session "
1159
+ f"({total_in_session - loaded_count} filtered out)",
1160
+ show_progress
1161
+ )
1162
+
1163
+ loaded_from_storage = True
1164
+ else:
1165
+ # Combine from memory
1166
+ combined_backtests = []
1167
+ for algorithm_id, backtests_list in backtests_by_algorithm.items():
1168
+ if len(backtests_list) == 1:
1169
+ combined_backtests.append(backtests_list[0])
1170
+ else:
1171
+ # Combine multiple backtests for the same algorithm
1172
+ from investing_algorithm_framework.domain import (
1173
+ combine_backtests)
1174
+ combined = combine_backtests(backtests_list)
1175
+ combined_backtests.append(combined)
1176
+
1177
+ all_backtests = combined_backtests
1178
+
1179
+ # Generate summary metrics
1180
+ for backtest in tqdm(
1181
+ all_backtests,
1182
+ colour="green",
1183
+ desc="Generating backtest summary metrics",
1184
+ disable=not show_progress
1185
+ ):
1186
+ backtest.backtest_summary = generate_backtest_summary_metrics(
1187
+ backtest.get_all_backtest_metrics()
1188
+ )
1189
+
1190
+ # Apply final filter function
1191
+ if final_filter_function is not None:
1192
+ if show_progress:
1193
+ _print_progress(
1194
+ "Applying final filter function ...",
1195
+ show_progress
1196
+ )
1197
+ all_backtests = final_filter_function(all_backtests)
1198
+
1199
+ # Only save if we didn't load from storage (avoid duplicate saves)
1200
+ # When loaded from storage, backtests are already properly
1201
+ # saved during execution
1202
+ if backtest_storage_directory is not None and not loaded_from_storage:
1203
+ # Save final combined backtests
1204
+ save_backtests_to_directory(
1205
+ backtests=all_backtests,
1206
+ directory_path=backtest_storage_directory,
1207
+ show_progress=show_progress
1208
+ )
1209
+
1210
+ # Cleanup session file at the end
1211
+ if backtest_storage_directory is not None:
1212
+ session_file = os.path.join(
1213
+ backtest_storage_directory, "backtest_session.json"
1214
+ )
1215
+ if os.path.exists(session_file):
1216
+ os.remove(session_file)
1217
+
1218
+ return all_backtests
1219
+
1220
+ def _load_checkpoint_cache(self, storage_directory: str) -> Dict:
1221
+ """Load checkpoint file into memory cache once."""
1222
+ checkpoint_file = os.path.join(storage_directory, "checkpoints.json")
1223
+ if os.path.exists(checkpoint_file):
1224
+ with open(checkpoint_file, "r") as f:
1225
+ return json.load(f)
1226
+ return {}
1227
+
1228
+ def _get_checkpointed_from_cache(
1229
+ self,
1230
+ cache: Dict,
1231
+ date_range: BacktestDateRange
1232
+ ) -> List[str]:
1233
+ """Get checkpointed algorithm IDs from cache."""
1234
+ key = (f"{date_range.start_date.isoformat()}_"
1235
+ f"{date_range.end_date.isoformat()}")
1236
+ return cache.get(key, [])
1237
+
1238
+ def _batch_save_and_checkpoint(
1239
+ self,
1240
+ backtests: List[Backtest],
1241
+ date_range: BacktestDateRange,
1242
+ storage_directory: str,
1243
+ checkpoint_cache: Dict,
1244
+ show_progress: bool = False,
1245
+ session_cache: Dict = None
1246
+ ):
1247
+ """Save a batch of backtests and update checkpoint cache."""
1248
+ if len(backtests) == 0:
1249
+ return
1250
+
1251
+ # Save backtests to disk
1252
+ save_backtests_to_directory(
1253
+ backtests=backtests,
1254
+ directory_path=storage_directory,
1255
+ show_progress=show_progress
1256
+ )
1257
+
1258
+ # Update checkpoint cache
1259
+ key = (f"{date_range.start_date.isoformat()}_"
1260
+ f"{date_range.end_date.isoformat()}")
1261
+ if key not in checkpoint_cache:
1262
+ checkpoint_cache[key] = []
1263
+
1264
+ for backtest in backtests:
1265
+ if backtest.algorithm_id not in checkpoint_cache[key]:
1266
+ checkpoint_cache[key].append(backtest.algorithm_id)
1267
+
1268
+ # Write checkpoint file with forced flush to disk
1269
+ checkpoint_file = os.path.join(storage_directory, "checkpoints.json")
1270
+ with open(checkpoint_file, "w") as f:
1271
+ json.dump(checkpoint_cache, f, indent=4)
1272
+ f.flush()
1273
+ os.fsync(f.fileno()) # Force write to disk
1274
+
1275
+ # Update session cache if provided
1276
+ if session_cache is not None:
1277
+ self._update_session_cache(
1278
+ backtests, storage_directory, session_cache
1279
+ )
1280
+
1281
+ def _create_session_cache(self) -> Dict:
1282
+ """
1283
+ Create a new session cache to track backtests run in this session.
1284
+
1285
+ Returns:
1286
+ Dict: Empty session cache structure
1287
+ """
1288
+ return {
1289
+ "session_id": datetime.now(timezone.utc).isoformat(),
1290
+ "backtests": {} # algorithm_id -> backtest_path
1291
+ }
1292
+
1293
+ def _update_session_cache(
1294
+ self,
1295
+ backtests: List[Backtest],
1296
+ storage_directory: str,
1297
+ session_cache: Dict
1298
+ ):
1299
+ """
1300
+ Update session cache with newly saved backtests.
1301
+
1302
+ Args:
1303
+ backtests: List of backtests that were saved
1304
+ storage_directory: Directory where backtests are stored
1305
+ session_cache: Session cache to update
1306
+ """
1307
+ for backtest in backtests:
1308
+ algorithm_id = backtest.algorithm_id
1309
+ backtest_path = os.path.join(storage_directory, algorithm_id)
1310
+ session_cache["backtests"][algorithm_id] = backtest_path
1311
+
1312
+ def _save_session_cache(
1313
+ self,
1314
+ session_cache: Dict,
1315
+ storage_directory: str
1316
+ ):
1317
+ """
1318
+ Save session cache to disk.
1319
+
1320
+ Args:
1321
+ session_cache: Session cache to save
1322
+ storage_directory: Directory to save the session file
1323
+ """
1324
+ session_file = os.path.join(
1325
+ storage_directory, "backtest_session.json"
1326
+ )
1327
+ with open(session_file, "w") as f:
1328
+ json.dump(session_cache, f, indent=4)
1329
+ f.flush()
1330
+ os.fsync(f.fileno())
1331
+
1332
+ def _load_backtests_from_session(
1333
+ self,
1334
+ session_cache: Dict,
1335
+ active_algorithm_ids: set = None,
1336
+ show_progress: bool = False
1337
+ ) -> List[Backtest]:
1338
+ """
1339
+ Load backtests from the current session cache.
1340
+
1341
+ This method efficiently loads only the backtests that were run
1342
+ in the current session, avoiding loading pre-existing backtests
1343
+ from the storage directory.
1344
+
1345
+ Args:
1346
+ session_cache: Session cache containing backtest paths
1347
+ active_algorithm_ids: Optional set of algorithm IDs to filter by
1348
+ (e.g., those that passed window filters)
1349
+ show_progress: Whether to show progress bar
1350
+
1351
+ Returns:
1352
+ List[Backtest]: List of backtests from the current session
1353
+ """
1354
+ backtests = []
1355
+ backtest_paths = session_cache.get("backtests", {})
1356
+
1357
+ # Filter by active_algorithm_ids if provided
1358
+ if active_algorithm_ids is not None:
1359
+ paths_to_load = {
1360
+ alg_id: path for alg_id, path in backtest_paths.items()
1361
+ if alg_id in active_algorithm_ids
1362
+ }
1363
+ else:
1364
+ paths_to_load = backtest_paths
1365
+
1366
+ items = list(paths_to_load.items())
1367
+
1368
+ for algorithm_id, backtest_path in tqdm(
1369
+ items,
1370
+ colour="green",
1371
+ desc="Loading session backtests",
1372
+ disable=not show_progress
1373
+ ):
1374
+ try:
1375
+ if os.path.exists(backtest_path):
1376
+ backtest = Backtest.open(backtest_path)
1377
+ backtests.append(backtest)
1378
+ else:
1379
+ logger.warning(
1380
+ f"Backtest path does not exist: {backtest_path}"
1381
+ )
1382
+ except Exception as e:
1383
+ logger.warning(
1384
+ f"Could not load backtest {algorithm_id} "
1385
+ f"from {backtest_path}: {e}"
1386
+ )
1387
+
1388
+ return backtests
1389
+
1390
+ def _load_backtests_from_cache(
1391
+ self,
1392
+ checkpoint_cache: Dict,
1393
+ date_range: BacktestDateRange,
1394
+ storage_directory: str,
1395
+ algorithm_ids: List[str]
1396
+ ) -> List[Backtest]:
1397
+ """Load specific backtests from disk based on checkpoint cache."""
1398
+ checkpointed_ids = self._get_checkpointed_from_cache(
1399
+ checkpoint_cache, date_range
1400
+ )
1401
+ backtests = []
1402
+
1403
+ for algo_id in checkpointed_ids:
1404
+ if algo_id in algorithm_ids:
1405
+ try:
1406
+ backtest_dir = os.path.join(storage_directory, algo_id)
1407
+ if os.path.exists(backtest_dir):
1408
+ backtest = Backtest.open(
1409
+ backtest_dir,
1410
+ backtest_date_ranges=[date_range]
1411
+ )
1412
+ backtests.append(backtest)
1413
+ except Exception as e:
1414
+ logger.warning(
1415
+ f"Could not load backtest for {algo_id}: {e}"
1416
+ )
1417
+
1418
+ return backtests
1419
+
1420
+ def _process_strategy_batch(
1421
+ self,
1422
+ strategies: List,
1423
+ backtest_date_range: BacktestDateRange,
1424
+ initial_amount: float,
1425
+ snapshot_interval: SnapshotInterval,
1426
+ risk_free_rate: float,
1427
+ market: Optional[str],
1428
+ trading_symbol: Optional[str],
1429
+ continue_on_error: bool,
1430
+ show_progress: bool = False
1431
+ ) -> List[Backtest]:
1432
+ """
1433
+ Process a batch of strategies sequentially.
1434
+
1435
+ Args:
1436
+ strategies: List of strategies to process
1437
+ backtest_date_range: Date range for backtesting
1438
+ initial_amount: Initial portfolio amount
1439
+ snapshot_interval: Interval for portfolio snapshots
1440
+ risk_free_rate: Risk-free rate for metrics
1441
+ market: Optional market filter
1442
+ trading_symbol: Optional trading symbol
1443
+ continue_on_error: Whether to continue on errors
1444
+ show_progress: Whether to show individual progress
1445
+
1446
+ Returns:
1447
+ List of completed backtests
1448
+ """
1449
+ backtests = []
1450
+
1451
+ for strategy in strategies:
1452
+ try:
1453
+ backtest = self.run_vector_backtest(
1454
+ backtest_date_range=backtest_date_range,
1455
+ initial_amount=initial_amount,
1456
+ strategy=strategy,
1457
+ snapshot_interval=snapshot_interval,
1458
+ risk_free_rate=risk_free_rate,
1459
+ skip_data_sources_initialization=True,
1460
+ market=market,
1461
+ trading_symbol=trading_symbol,
1462
+ continue_on_error=continue_on_error,
1463
+ backtest_storage_directory=None,
1464
+ show_progress=show_progress,
1465
+ )
1466
+ backtests.append(backtest)
1467
+
1468
+ except Exception as e:
1469
+ if continue_on_error:
1470
+ logger.error(
1471
+ f"Error in backtest for {strategy.algorithm_id}: {e}"
1472
+ )
1473
+ continue
1474
+ else:
1475
+ raise
1476
+
1477
+ return backtests
1478
+
1479
+ def _save_batch_if_full(
1480
+ self,
1481
+ batch_buffer: List[Backtest],
1482
+ checkpoint_batch_size: int,
1483
+ backtest_date_range: BacktestDateRange,
1484
+ backtest_storage_directory: str,
1485
+ checkpoint_cache: Dict,
1486
+ session_cache: Dict = None
1487
+ ) -> bool:
1488
+ """
1489
+ Save batch if buffer is full and clear memory.
1490
+
1491
+ Args:
1492
+ batch_buffer: List of backtests to potentially save.
1493
+ checkpoint_batch_size: Threshold for saving.
1494
+ backtest_date_range: The backtest date range.
1495
+ backtest_storage_directory: Directory to save to.
1496
+ checkpoint_cache: Checkpoint cache to update.
1497
+ session_cache: Session cache to track backtests from this run.
1498
+
1499
+ Returns:
1500
+ True if batch was saved, False otherwise
1501
+ """
1502
+ if len(batch_buffer) >= checkpoint_batch_size:
1503
+ self._batch_save_and_checkpoint(
1504
+ batch_buffer,
1505
+ backtest_date_range,
1506
+ backtest_storage_directory,
1507
+ checkpoint_cache,
1508
+ show_progress=False,
1509
+ session_cache=session_cache
1510
+ )
1511
+ batch_buffer.clear()
1512
+ gc.collect()
1513
+ return True
1514
+ return False
1515
+
1516
+ def _save_remaining_batch(
1517
+ self,
1518
+ batch_buffer: List[Backtest],
1519
+ backtest_date_range: BacktestDateRange,
1520
+ backtest_storage_directory: str,
1521
+ checkpoint_cache: Dict,
1522
+ session_cache: Dict = None
1523
+ ):
1524
+ """
1525
+ Save any remaining backtests in the buffer.
1526
+
1527
+ Args:
1528
+ batch_buffer: List of backtests to save.
1529
+ backtest_date_range: The backtest date range.
1530
+ backtest_storage_directory: Directory to save to.
1531
+ checkpoint_cache: Checkpoint cache to update.
1532
+ session_cache: Session cache to track backtests from this run.
1533
+ """
1534
+ if len(batch_buffer) > 0:
1535
+ self._batch_save_and_checkpoint(
1536
+ batch_buffer,
1537
+ backtest_date_range,
1538
+ backtest_storage_directory,
1539
+ checkpoint_cache,
1540
+ show_progress=False,
1541
+ session_cache=session_cache
1542
+ )
1543
+ batch_buffer.clear()
1544
+ gc.collect()
1545
+
1546
+ @staticmethod
1547
+ def _run_batch_backtest_worker(args):
1548
+ """
1549
+ Static worker function for parallel BATCH backtest execution.
1550
+
1551
+ Each worker processes a batch of strategies, reusing the same
1552
+ data providers and initialization. This is MUCH more efficient
1553
+ than spawning a worker for each individual backtest because:
1554
+ - Reduces process creation overhead by 99%
1555
+ - Shares data provider initialization across batch
1556
+ - Better memory efficiency and cache locality
1557
+ - Optimal for 1,000+ backtests
1558
+
1559
+ Args:
1560
+ args: Tuple containing (
1561
+ strategy_batch,
1562
+ backtest_date_range,
1563
+ portfolio_configuration,
1564
+ snapshot_interval,
1565
+ risk_free_rate,
1566
+ continue_on_error,
1567
+ data_provider_service,
1568
+ show_progress,
1569
+ dynamic_position_sizing
1570
+ )
1571
+
1572
+ Returns:
1573
+ List[Backtest]: List of completed backtest results
1574
+ """
1575
+ (
1576
+ strategy_batch,
1577
+ backtest_date_range,
1578
+ portfolio_configuration,
1579
+ snapshot_interval,
1580
+ risk_free_rate,
1581
+ continue_on_error,
1582
+ data_provider_service,
1583
+ show_progress,
1584
+ dynamic_position_sizing
1585
+ ) = args
1586
+
1587
+ vector_backtest_service = VectorBacktestService(
1588
+ data_provider_service=data_provider_service
1589
+ )
1590
+
1591
+ batch_results = []
1592
+ start_date = backtest_date_range.start_date.strftime('%Y-%m-%d')
1593
+ end_date = backtest_date_range.end_date.strftime('%Y-%m-%d')
1594
+ if show_progress:
1595
+ strategy_batch = tqdm(
1596
+ strategy_batch,
1597
+ colour="green",
1598
+ desc=f"Running backtests for {start_date} to {end_date}",
1599
+ disable=not show_progress
1600
+ )
1601
+
1602
+ for strategy in strategy_batch:
1603
+ try:
1604
+ backtest_run = vector_backtest_service.run(
1605
+ strategy=strategy,
1606
+ backtest_date_range=backtest_date_range,
1607
+ portfolio_configuration=portfolio_configuration,
1608
+ risk_free_rate=risk_free_rate,
1609
+ dynamic_position_sizing=dynamic_position_sizing,
1610
+ )
1611
+ backtest = Backtest(
1612
+ algorithm_id=strategy.algorithm_id,
1613
+ backtest_runs=[backtest_run],
1614
+ metadata=strategy.metadata if hasattr(
1615
+ strategy, 'metadata') else None,
1616
+ risk_free_rate=risk_free_rate
1617
+ )
1618
+ batch_results.append(backtest)
1619
+
1620
+ except Exception as e:
1621
+ if continue_on_error:
1622
+ logger.error(
1623
+ "Worker error for strategy "
1624
+ f"{strategy.algorithm_id}: {e}"
1625
+ )
1626
+ continue
1627
+ else:
1628
+ raise
1629
+
1630
+ return batch_results
1631
+
1632
+ def run_vector_backtest(
1633
+ self,
1634
+ strategy,
1635
+ backtest_date_range: BacktestDateRange = None,
1636
+ backtest_date_ranges: List[BacktestDateRange] = None,
1637
+ portfolio_configuration: PortfolioConfiguration = None,
1638
+ snapshot_interval: SnapshotInterval = SnapshotInterval.DAILY,
1639
+ metadata: Optional[Dict[str, str]] = None,
1640
+ risk_free_rate: Optional[float] = None,
1641
+ skip_data_sources_initialization: bool = False,
1642
+ initial_amount: float = None,
1643
+ market: str = None,
1644
+ trading_symbol: str = None,
1645
+ continue_on_error: bool = False,
1646
+ backtest_storage_directory: Optional[Union[str, Path]] = None,
1647
+ use_checkpoints: bool = True,
1648
+ show_progress: bool = False,
1649
+ n_workers: Optional[int] = None,
1650
+ batch_size: int = 50,
1651
+ checkpoint_batch_size: int = 25,
1652
+ dynamic_position_sizing: bool = False,
1653
+ ) -> Backtest:
1654
+ """
1655
+ Run optimized vectorized backtest for a single strategy.
1656
+
1657
+ This method leverages the optimized run_vector_backtests
1658
+ implementation, providing the same performance benefits (batching,
1659
+ checkpointing, parallel processing) for single strategy backtests.
1660
+
1661
+ Args:
1662
+ strategy: The strategy object to backtest.
1663
+ backtest_date_range: Single backtest date range to use.
1664
+ backtest_date_ranges: List of backtest date ranges to use.
1665
+ The strategy will be backtested across all date ranges and
1666
+ results will be combined.
1667
+ portfolio_configuration: Portfolio configuration to use. If not
1668
+ provided, will be created from initial_amount, market, and
1669
+ trading_symbol parameters.
1670
+ snapshot_interval: The snapshot interval to use for the backtest.
1671
+ metadata: Metadata to attach to the backtest report.
1672
+ risk_free_rate: The risk-free rate to use for the backtest.
1673
+ If not provided, will be fetched automatically.
1674
+ skip_data_sources_initialization: Whether to skip data source
1675
+ initialization.
1676
+ initial_amount: Initial amount to start the backtest with.
1677
+ Only used if portfolio_configuration is not provided.
1678
+ market: Market to use for the backtest. Only used if
1679
+ portfolio_configuration is not provided.
1680
+ trading_symbol: Trading symbol to use. Only used if
1681
+ portfolio_configuration is not provided.
1682
+ continue_on_error: Whether to continue if an error occurs.
1683
+ backtest_storage_directory: Directory to save the backtest to.
1684
+ use_checkpoints: Whether to use checkpointing to resume interrupted
1685
+ backtests. If True, completed backtests will be saved to disk
1686
+ and skipped on subsequent runs. If False, the backtest will
1687
+ run every time (default: True).
1688
+ show_progress: Whether to show progress bars.
1689
+ n_workers: Number of parallel workers (None = sequential).
1690
+ batch_size: Number of strategies to process in each batch.
1691
+ checkpoint_batch_size: Number of backtests before batch save.
1692
+ dynamic_position_sizing: If True, position sizes are recalculated
1693
+ at each trade based on current portfolio value. If False
1694
+ (default), position sizes are calculated once at the start.
1695
+
1696
+ Returns:
1697
+ Backtest: Instance of Backtest for the single strategy.
1698
+ """
1699
+ # Create portfolio configuration if not provided
1700
+ if portfolio_configuration is None:
1701
+
1702
+ if initial_amount is None:
1703
+ # Try to get from existing portfolio configurations
1704
+ portfolio_configurations = \
1705
+ self._portfolio_configuration_service.get_all()
1706
+
1707
+ if portfolio_configurations \
1708
+ and len(portfolio_configurations) > 0:
1709
+ portfolio_configuration = portfolio_configurations[0]
1710
+ else:
1711
+ raise OperationalException(
1712
+ "No portfolio configuration provided and no "
1713
+ "initial_amount specified. "
1714
+ "Please provide either a portfolio_configuration "
1715
+ "or initial_amount, "
1716
+ "market, and trading_symbol parameters."
1717
+ )
1718
+ else:
1719
+ portfolio_configuration = PortfolioConfiguration(
1720
+ identifier="backtest_portfolio",
1721
+ market=market or "BACKTEST",
1722
+ trading_symbol=trading_symbol or "USDT",
1723
+ initial_balance=initial_amount
1724
+ )
1725
+
1726
+ # Use the optimized run_vector_backtests method
1727
+ backtests = self.run_vector_backtests(
1728
+ strategies=[strategy],
1729
+ portfolio_configuration=portfolio_configuration,
1730
+ backtest_date_range=backtest_date_range,
1731
+ backtest_date_ranges=backtest_date_ranges,
1732
+ snapshot_interval=snapshot_interval,
1733
+ risk_free_rate=risk_free_rate,
1734
+ skip_data_sources_initialization=skip_data_sources_initialization,
1735
+ show_progress=show_progress,
1736
+ continue_on_error=continue_on_error,
1737
+ backtest_storage_directory=backtest_storage_directory,
1738
+ use_checkpoints=use_checkpoints,
1739
+ batch_size=batch_size,
1740
+ checkpoint_batch_size=checkpoint_batch_size,
1741
+ n_workers=n_workers,
1742
+ dynamic_position_sizing=dynamic_position_sizing,
1743
+ )
1744
+
1745
+ # Extract the single backtest result
1746
+ if backtests and len(backtests) > 0:
1747
+ backtest = backtests[0]
1748
+
1749
+ # Add metadata if provided
1750
+ if metadata is not None:
1751
+ backtest.metadata = metadata
1752
+ elif backtest.metadata is None:
1753
+ if (hasattr(strategy, 'metadata')
1754
+ and strategy.metadata is not None):
1755
+ backtest.metadata = strategy.metadata
1756
+ else:
1757
+ backtest.metadata = {}
1758
+
1759
+ return backtest
1760
+ else:
1761
+ # Return empty backtest if no results
1762
+ return Backtest(
1763
+ algorithm_id=strategy.algorithm_id,
1764
+ backtest_runs=[],
1765
+ risk_free_rate=risk_free_rate or 0.0,
1766
+ metadata=metadata or {}
1767
+ )
1768
+
1769
+ def _get_risk_free_rate(self) -> float:
1770
+ """
1771
+ Get the risk-free rate from the configuration service.
1772
+
1773
+ Returns:
1774
+ float: The risk-free rate.
1775
+ """
1776
+ risk_free_rate = get_risk_free_rate_us()
1777
+
1778
+ if risk_free_rate is None:
1779
+ raise OperationalException(
1780
+ "Could not retrieve risk free rate."
1781
+ "Please provide a risk free as an argument when running "
1782
+ "your backtest or make sure you have an internet "
1783
+ "connection"
1784
+ )
1785
+
1786
+ return risk_free_rate
1787
+
1788
+ def run_backtests(
1789
+ self,
1790
+ algorithms: List,
1791
+ context,
1792
+ trade_stop_loss_service,
1793
+ trade_take_profit_service,
1794
+ backtest_date_range: BacktestDateRange = None,
1795
+ backtest_date_ranges: List[BacktestDateRange] = None,
1796
+ risk_free_rate: Optional[float] = None,
1797
+ skip_data_sources_initialization: bool = False,
1798
+ show_progress: bool = True,
1799
+ continue_on_error: bool = False,
1800
+ window_filter_function: Optional[
1801
+ Callable[[List[Backtest], BacktestDateRange], List[Backtest]]
1802
+ ] = None,
1803
+ final_filter_function: Optional[
1804
+ Callable[[List[Backtest]], List[Backtest]]
1805
+ ] = None,
1806
+ backtest_storage_directory: Optional[Union[str, Path]] = None,
1807
+ use_checkpoints: bool = False,
1808
+ batch_size: int = 50,
1809
+ checkpoint_batch_size: int = 25,
1810
+ ) -> List[Backtest]:
1811
+ """
1812
+ Run event-driven backtests for multiple algorithms with optional
1813
+ checkpointing, batching, and storage.
1814
+
1815
+ This method mirrors run_vector_backtests but for event-driven
1816
+ backtesting where strategies' `on_run` methods are called at
1817
+ each scheduled time step.
1818
+
1819
+ Args:
1820
+ algorithms: List of algorithms to backtest.
1821
+ context: The app context for the event loop service.
1822
+ trade_stop_loss_service: Service for handling stop loss orders.
1823
+ trade_take_profit_service: Service for handling take profit orders.
1824
+ backtest_date_range: Single backtest date range to use.
1825
+ backtest_date_ranges: List of backtest date ranges to use.
1826
+ risk_free_rate: Risk-free rate for backtest metrics.
1827
+ skip_data_sources_initialization: Whether to skip data
1828
+ source initialization.
1829
+ show_progress: Whether to show progress bars.
1830
+ continue_on_error: Whether to continue on errors.
1831
+ window_filter_function: Filter function applied after each
1832
+ date range.
1833
+ final_filter_function: Filter function applied at the end.
1834
+ backtest_storage_directory: Directory to store backtests.
1835
+ use_checkpoints: Whether to use checkpointing to resume
1836
+ interrupted backtests.
1837
+ batch_size: Number of algorithms to process in each batch.
1838
+ checkpoint_batch_size: Number of backtests before batch
1839
+ save/checkpoint.
1840
+
1841
+ Returns:
1842
+ List[Backtest]: List of backtest results.
1843
+ """
1844
+ from .event_backtest_service import EventBacktestService
1845
+ from investing_algorithm_framework.app.eventloop import \
1846
+ EventLoopService
1847
+ from investing_algorithm_framework.services import \
1848
+ BacktestTradeOrderEvaluator
1849
+
1850
+ if use_checkpoints and backtest_storage_directory is None:
1851
+ raise OperationalException(
1852
+ "When using checkpoints, a backtest_storage_directory must "
1853
+ "be provided"
1854
+ )
1855
+
1856
+ if backtest_date_range is None and backtest_date_ranges is None:
1857
+ raise OperationalException(
1858
+ "Either backtest_date_range or backtest_date_ranges "
1859
+ "must be provided"
1860
+ )
1861
+
1862
+ # Collect all data sources from all algorithms
1863
+ data_sources = []
1864
+ for algorithm in algorithms:
1865
+ if hasattr(algorithm, 'data_sources') and algorithm.data_sources:
1866
+ data_sources.extend(algorithm.data_sources)
1867
+
1868
+ # Get risk-free rate if not provided
1869
+ if risk_free_rate is None:
1870
+ if show_progress:
1871
+ _print_progress(
1872
+ "Retrieving risk free rate for metrics calculation ...",
1873
+ show_progress
1874
+ )
1875
+ risk_free_rate = self._get_risk_free_rate()
1876
+ if show_progress:
1877
+ _print_progress(
1878
+ f"Retrieved risk free rate of: {risk_free_rate}",
1879
+ show_progress
1880
+ )
1881
+
1882
+ # Load checkpoint cache only if checkpointing is enabled
1883
+ checkpoint_cache = {}
1884
+ if use_checkpoints and backtest_storage_directory is not None:
1885
+ checkpoint_cache = self._load_checkpoint_cache(
1886
+ backtest_storage_directory
1887
+ )
1888
+
1889
+ # Create session cache to track backtests run in this session
1890
+ session_cache = None
1891
+ if backtest_storage_directory is not None:
1892
+ session_cache = self._create_session_cache()
1893
+
1894
+ # Handle single date range case - convert to list
1895
+ if backtest_date_range is not None:
1896
+ backtest_date_ranges = [backtest_date_range]
1897
+
1898
+ # Sort and deduplicate date ranges
1899
+ unique_date_ranges = set(backtest_date_ranges)
1900
+ backtest_date_ranges = sorted(
1901
+ unique_date_ranges, key=lambda x: x.start_date
1902
+ )
1903
+
1904
+ # Track all backtests across date ranges
1905
+ # Use id(algorithm) as key to handle multiple algorithms
1906
+ # with the same algorithm_id (each algorithm object is unique)
1907
+ backtests_by_algorithm = {}
1908
+ algorithm_id_map = {} # Maps id(alg) -> algorithm_id for final output
1909
+ active_algorithms = algorithms.copy()
1910
+
1911
+ # Build algorithm_id_map for tracking
1912
+ for alg in algorithms:
1913
+ alg_id = alg.algorithm_id if (
1914
+ hasattr(alg, 'algorithm_id')
1915
+ ) else alg.id
1916
+ algorithm_id_map[id(alg)] = alg_id
1917
+
1918
+ # Determine if this is a simple single backtest case
1919
+ is_single_backtest = (
1920
+ len(algorithms) == 1 and len(backtest_date_ranges) == 1
1921
+ )
1922
+
1923
+ for backtest_date_range in tqdm(
1924
+ backtest_date_ranges,
1925
+ colour="green",
1926
+ desc="Running event backtests for all date ranges",
1927
+ disable=not show_progress or is_single_backtest
1928
+ ):
1929
+ if not skip_data_sources_initialization:
1930
+ self.initialize_data_sources_backtest(
1931
+ data_sources,
1932
+ backtest_date_range,
1933
+ show_progress=show_progress
1934
+ )
1935
+
1936
+ active_algorithm_ids = []
1937
+ for alg in active_algorithms:
1938
+ alg_id = alg.algorithm_id if hasattr(
1939
+ alg, 'algorithm_id'
1940
+ ) else alg.id
1941
+ active_algorithm_ids.append(alg_id)
1942
+
1943
+ # Only check for checkpoints if use_checkpoints is True
1944
+ if use_checkpoints:
1945
+ _print_progress(
1946
+ "Using checkpoints to "
1947
+ "skip completed backtests ...",
1948
+ show_progress
1949
+ )
1950
+ checkpointed_ids = self._get_checkpointed_from_cache(
1951
+ checkpoint_cache, backtest_date_range
1952
+ )
1953
+ missing_ids = set(active_algorithm_ids) - set(checkpointed_ids)
1954
+ algorithms_to_run = [
1955
+ alg for alg in active_algorithms
1956
+ if (alg.algorithm_id if hasattr(
1957
+ alg, 'algorithm_id'
1958
+ ) else alg.id) in missing_ids
1959
+ ]
1960
+
1961
+ # Add checkpointed IDs to session cache
1962
+ if session_cache is not None:
1963
+ for algo_id in checkpointed_ids:
1964
+ if algo_id in active_algorithm_ids:
1965
+ backtest_path = os.path.join(
1966
+ backtest_storage_directory, algo_id
1967
+ )
1968
+ session_cache["backtests"][algo_id] = backtest_path
1969
+
1970
+ # Count how many active algorithms are in the checkpoint
1971
+ matched_checkpoint_count = len(
1972
+ set(active_algorithm_ids) & set(checkpointed_ids)
1973
+ )
1974
+
1975
+ if show_progress:
1976
+ _print_progress(
1977
+ f"Active algorithms: {len(active_algorithm_ids)}, "
1978
+ f"checkpoint file has: {len(checkpointed_ids)}, "
1979
+ f"matched: {matched_checkpoint_count}, "
1980
+ f"running {len(algorithms_to_run)} new backtests",
1981
+ show_progress
1982
+ )
1983
+ else:
1984
+ algorithms_to_run = active_algorithms
1985
+
1986
+ all_backtests = []
1987
+ batch_buffer = []
1988
+
1989
+ if len(algorithms_to_run) > 0:
1990
+ # Process algorithms in batches
1991
+ algorithm_batches = [
1992
+ algorithms_to_run[i:i + batch_size]
1993
+ for i in range(0, len(algorithms_to_run), batch_size)
1994
+ ]
1995
+
1996
+ if show_progress and len(algorithm_batches) > 1:
1997
+ _print_progress(
1998
+ f"Processing {len(algorithms_to_run)} "
1999
+ f"algorithms in "
2000
+ f"{len(algorithm_batches)} batches "
2001
+ f"of ~{batch_size} each",
2002
+ show_progress
2003
+ )
2004
+
2005
+ for batch_idx, algorithm_batch in enumerate(tqdm(
2006
+ algorithm_batches,
2007
+ colour="green",
2008
+ desc="Processing algorithm batches",
2009
+ disable=not show_progress or len(algorithm_batches) == 1
2010
+ )):
2011
+ for algorithm in algorithm_batch:
2012
+ algorithm_id = (
2013
+ algorithm.algorithm_id
2014
+ if hasattr(algorithm, 'algorithm_id')
2015
+ else algorithm.id
2016
+ )
2017
+
2018
+ try:
2019
+ # Create event backtest service
2020
+ event_backtest_service = EventBacktestService(
2021
+ data_provider_service=(
2022
+ self._data_provider_service
2023
+ ),
2024
+ order_service=self._order_service,
2025
+ portfolio_service=self._portfolio_service,
2026
+ portfolio_snapshot_service=(
2027
+ self._portfolio_snapshot_service
2028
+ ),
2029
+ position_repository=self._position_repository,
2030
+ trade_service=self._trade_service,
2031
+ configuration_service=(
2032
+ self._configuration_service
2033
+ ),
2034
+ portfolio_configuration_service=(
2035
+ self._portfolio_configuration_service
2036
+ ),
2037
+ )
2038
+
2039
+ # Create event loop service
2040
+ event_loop_service = EventLoopService(
2041
+ configuration_service=(
2042
+ self._configuration_service
2043
+ ),
2044
+ portfolio_snapshot_service=(
2045
+ self._portfolio_snapshot_service
2046
+ ),
2047
+ context=context,
2048
+ order_service=self._order_service,
2049
+ portfolio_service=self._portfolio_service,
2050
+ data_provider_service=(
2051
+ self._data_provider_service
2052
+ ),
2053
+ trade_service=self._trade_service,
2054
+ )
2055
+
2056
+ # Create trade order evaluator
2057
+ trade_order_evaluator = (
2058
+ BacktestTradeOrderEvaluator(
2059
+ trade_service=self._trade_service,
2060
+ order_service=self._order_service,
2061
+ trade_stop_loss_service=(
2062
+ trade_stop_loss_service
2063
+ ),
2064
+ trade_take_profit_service=(
2065
+ trade_take_profit_service
2066
+ ),
2067
+ configuration_service=(
2068
+ self._configuration_service
2069
+ )
2070
+ )
2071
+ )
2072
+
2073
+ # Generate schedule
2074
+ schedule = (
2075
+ event_backtest_service.generate_schedule(
2076
+ algorithm.strategies,
2077
+ algorithm.tasks,
2078
+ backtest_date_range.start_date,
2079
+ backtest_date_range.end_date
2080
+ )
2081
+ )
2082
+
2083
+ # Initialize and run
2084
+ event_loop_service.initialize(
2085
+ algorithm=algorithm,
2086
+ trade_order_evaluator=trade_order_evaluator
2087
+ )
2088
+ # Show progress for single backtest,
2089
+ # hide for batches
2090
+ event_loop_service.start(
2091
+ schedule=schedule,
2092
+ show_progress=(
2093
+ show_progress and is_single_backtest
2094
+ )
2095
+ )
2096
+
2097
+ # Create backtest
2098
+ backtest = (
2099
+ event_backtest_service.create_backtest(
2100
+ algorithm=algorithm,
2101
+ backtest_date_range=backtest_date_range,
2102
+ number_of_runs=(
2103
+ event_loop_service.total_number_of_runs
2104
+ ),
2105
+ risk_free_rate=risk_free_rate,
2106
+ )
2107
+ )
2108
+
2109
+ # Add metadata
2110
+ if (hasattr(algorithm, 'metadata')
2111
+ and algorithm.metadata):
2112
+ backtest.metadata = algorithm.metadata
2113
+ else:
2114
+ backtest.metadata = {}
2115
+
2116
+ # Store with algorithm object id for tracking
2117
+ backtest._algorithm_obj_id = id(algorithm)
2118
+ all_backtests.append(backtest)
2119
+ batch_buffer.append(backtest)
2120
+
2121
+ # Save batch if full
2122
+ if backtest_storage_directory is not None:
2123
+ self._save_batch_if_full(
2124
+ batch_buffer,
2125
+ checkpoint_batch_size,
2126
+ backtest_date_range,
2127
+ backtest_storage_directory,
2128
+ checkpoint_cache,
2129
+ session_cache
2130
+ )
2131
+
2132
+ except Exception as e:
2133
+ if continue_on_error:
2134
+ logger.error(
2135
+ f"Error in backtest for "
2136
+ f"{algorithm_id}: {e}"
2137
+ )
2138
+ continue
2139
+ else:
2140
+ raise
2141
+
2142
+ # Periodic garbage collection
2143
+ if (batch_idx + 1) % 5 == 0:
2144
+ gc.collect()
2145
+
2146
+ # Save remaining batch
2147
+ if backtest_storage_directory is not None:
2148
+ self._save_remaining_batch(
2149
+ batch_buffer,
2150
+ backtest_date_range,
2151
+ backtest_storage_directory,
2152
+ checkpoint_cache,
2153
+ session_cache
2154
+ )
2155
+
2156
+ # Store backtests in memory when no storage directory provided
2157
+ if backtest_storage_directory is None:
2158
+ for backtest in all_backtests:
2159
+ # Use algorithm object id if available,
2160
+ # otherwise algorithm_id
2161
+ key = (getattr(backtest, '_algorithm_obj_id', None)
2162
+ or backtest.algorithm_id)
2163
+ if key not in backtests_by_algorithm:
2164
+ backtests_by_algorithm[key] = []
2165
+ backtests_by_algorithm[key].append(backtest)
2166
+
2167
+ # Load checkpointed backtests that were SKIPPED (not run in this
2168
+ # iteration) if needed for filtering. Only load backtests that
2169
+ # were checkpointed from a previous session, not ones that were
2170
+ # just run and checkpointed in this session.
2171
+ if use_checkpoints and (window_filter_function is not None
2172
+ or final_filter_function is not None):
2173
+ # Get IDs of algorithms that were actually run in this
2174
+ # iteration
2175
+ run_algorithm_ids = set(
2176
+ (alg.algorithm_id if hasattr(alg, 'algorithm_id')
2177
+ else alg.id)
2178
+ for alg in algorithms_to_run
2179
+ )
2180
+ # Only load backtests that were SKIPPED
2181
+ # (checkpointed, not run)
2182
+ skipped_algorithm_ids = [
2183
+ algo_id for algo_id in active_algorithm_ids
2184
+ if algo_id not in run_algorithm_ids
2185
+ ]
2186
+
2187
+ if len(skipped_algorithm_ids) > 0:
2188
+ checkpointed_backtests = self._load_backtests_from_cache(
2189
+ checkpoint_cache,
2190
+ backtest_date_range,
2191
+ backtest_storage_directory,
2192
+ skipped_algorithm_ids
2193
+ )
2194
+ all_backtests.extend(checkpointed_backtests)
2195
+
2196
+ # Apply window filter function
2197
+ if window_filter_function is not None:
2198
+ if show_progress:
2199
+ _print_progress(
2200
+ "Applying window filter function ...",
2201
+ show_progress
2202
+ )
2203
+ filtered_backtests = window_filter_function(
2204
+ all_backtests, backtest_date_range
2205
+ )
2206
+ filtered_ids = set(b.algorithm_id for b in filtered_backtests)
2207
+ active_algorithms = [
2208
+ alg for alg in active_algorithms
2209
+ if (alg.algorithm_id if hasattr(alg, 'algorithm_id')
2210
+ else alg.id) in filtered_ids
2211
+ ]
2212
+
2213
+ # Update tracking
2214
+ if backtest_storage_directory is None:
2215
+ algorithms_to_remove = [
2216
+ alg_id for alg_id in backtests_by_algorithm.keys()
2217
+ if alg_id not in filtered_ids
2218
+ ]
2219
+ for alg_id in algorithms_to_remove:
2220
+ del backtests_by_algorithm[alg_id]
2221
+ else:
2222
+ # When using storage, update filtered_out metadata
2223
+ algorithms_to_mark = [
2224
+ alg_id for alg_id in active_algorithm_ids
2225
+ if alg_id not in filtered_ids
2226
+ ]
2227
+
2228
+ # Update session cache to only include filtered backtests
2229
+ if session_cache is not None:
2230
+ session_cache["backtests"] = {
2231
+ k: v for k, v in session_cache["backtests"].items()
2232
+ if k in filtered_ids
2233
+ }
2234
+
2235
+ # Clear filtered_out flag for backtests that passed
2236
+ # the filter (they may have been filtered out before)
2237
+ for alg_id in filtered_ids:
2238
+ backtest_dir = os.path.join(
2239
+ backtest_storage_directory, alg_id
2240
+ )
2241
+ if os.path.exists(backtest_dir):
2242
+ try:
2243
+ backtest = Backtest.open(backtest_dir)
2244
+ if backtest.metadata is not None and \
2245
+ backtest.metadata.get(
2246
+ 'filtered_out', False
2247
+ ):
2248
+ backtest.metadata['filtered_out'] = False
2249
+ if 'filtered_out_at_date_range' in \
2250
+ backtest.metadata:
2251
+ del backtest.metadata[
2252
+ 'filtered_out_at_date_range'
2253
+ ]
2254
+ backtest.save(backtest_dir)
2255
+ except Exception as e:
2256
+ logger.warning(
2257
+ f"Could not clear filtered_out flag "
2258
+ f"for backtest {alg_id}: {e}"
2259
+ )
2260
+
2261
+ # Mark filtered-out backtests with metadata flag
2262
+ for alg_id in algorithms_to_mark:
2263
+ backtest_dir = os.path.join(
2264
+ backtest_storage_directory, alg_id
2265
+ )
2266
+ if os.path.exists(backtest_dir):
2267
+ try:
2268
+ backtest = Backtest.open(backtest_dir)
2269
+ start_date = backtest_date_range.start_date
2270
+ end_date = backtest_date_range.end_date
2271
+ date_key = (
2272
+ f"{start_date.isoformat()}_"
2273
+ f"{end_date.isoformat()}"
2274
+ )
2275
+ if backtest.metadata is None:
2276
+ backtest.metadata = {}
2277
+ backtest.metadata['filtered_out'] = True
2278
+ backtest.metadata[
2279
+ 'filtered_out_at_date_range'
2280
+ ] = (
2281
+ backtest_date_range.name
2282
+ if backtest_date_range.name
2283
+ else date_key
2284
+ )
2285
+ backtest.save(backtest_dir)
2286
+ except Exception as e:
2287
+ logger.warning(
2288
+ f"Could not mark backtest {alg_id} "
2289
+ f"as filtered: {e}"
2290
+ )
2291
+
2292
+ # Clear memory
2293
+ del all_backtests
2294
+ del batch_buffer
2295
+ gc.collect()
2296
+
2297
+ # Combine backtests
2298
+ if show_progress:
2299
+ _print_progress(
2300
+ "Combining backtests across date ranges ...",
2301
+ show_progress
2302
+ )
2303
+
2304
+ active_algorithm_ids_final = set()
2305
+ for alg in active_algorithms:
2306
+ alg_id = alg.algorithm_id if hasattr(alg, 'algorithm_id') \
2307
+ else alg.id
2308
+ active_algorithm_ids_final.add(alg_id)
2309
+
2310
+ loaded_from_storage = False
2311
+ if backtest_storage_directory is not None:
2312
+ # Save session cache to disk before final loading
2313
+ if session_cache is not None:
2314
+ self._save_session_cache(
2315
+ session_cache, backtest_storage_directory
2316
+ )
2317
+
2318
+ # Load ONLY from session cache - this ensures we only get
2319
+ # backtests from this run, not pre-existing ones
2320
+ all_backtests = self._load_backtests_from_session(
2321
+ session_cache,
2322
+ active_algorithm_ids_final,
2323
+ show_progress=show_progress
2324
+ )
2325
+ loaded_from_storage = True
2326
+ else:
2327
+ combined_backtests = []
2328
+ for algorithm_id, backtests_list in backtests_by_algorithm.items():
2329
+ if len(backtests_list) == 1:
2330
+ combined_backtests.append(backtests_list[0])
2331
+ else:
2332
+ combined = combine_backtests(backtests_list)
2333
+ combined_backtests.append(combined)
2334
+ all_backtests = combined_backtests
2335
+
2336
+ # Generate summary metrics
2337
+ for backtest in tqdm(
2338
+ all_backtests,
2339
+ colour="green",
2340
+ desc="Generating backtest summary metrics",
2341
+ disable=not show_progress
2342
+ ):
2343
+ backtest.backtest_summary = generate_backtest_summary_metrics(
2344
+ backtest.get_all_backtest_metrics()
2345
+ )
2346
+
2347
+ # Apply final filter function
2348
+ if final_filter_function is not None:
2349
+ if show_progress:
2350
+ _print_progress(
2351
+ "Applying final filter function ...",
2352
+ show_progress
2353
+ )
2354
+ all_backtests = final_filter_function(all_backtests)
2355
+
2356
+ # Save if not loaded from storage
2357
+ if (backtest_storage_directory is not None
2358
+ and not loaded_from_storage):
2359
+ save_backtests_to_directory(
2360
+ backtests=all_backtests,
2361
+ directory_path=backtest_storage_directory,
2362
+ show_progress=show_progress
2363
+ )
2364
+
2365
+ # Cleanup session file at the end
2366
+ if backtest_storage_directory is not None:
2367
+ session_file = os.path.join(
2368
+ backtest_storage_directory, "backtest_session.json"
2369
+ )
2370
+ if os.path.exists(session_file):
2371
+ os.remove(session_file)
2372
+
2373
+ return all_backtests
2374
+
2375
+ def run_backtest(
2376
+ self,
2377
+ algorithm,
2378
+ backtest_date_range: BacktestDateRange,
2379
+ context,
2380
+ trade_stop_loss_service,
2381
+ trade_take_profit_service,
2382
+ backtest_date_ranges: List[BacktestDateRange] = None,
2383
+ risk_free_rate: Optional[float] = None,
2384
+ metadata: Optional[Dict[str, str]] = None,
2385
+ skip_data_sources_initialization: bool = False,
2386
+ backtest_storage_directory: Optional[Union[str, Path]] = None,
2387
+ use_checkpoints: bool = False,
2388
+ show_progress: bool = True,
2389
+ initial_amount: float = None,
2390
+ market: str = None,
2391
+ trading_symbol: str = None,
2392
+ ) -> tuple:
2393
+ """
2394
+ Run an event-driven backtest for a single algorithm.
2395
+
2396
+ This method leverages the run_backtests implementation,
2397
+ providing the same features (checkpointing, storage) for
2398
+ single algorithm backtests.
2399
+
2400
+ Args:
2401
+ algorithm: The algorithm to backtest.
2402
+ backtest_date_range: Single backtest date range to use.
2403
+ context: The app context for the event loop service.
2404
+ trade_stop_loss_service: Service for handling stop loss orders.
2405
+ trade_take_profit_service: Service for handling take profit orders.
2406
+ backtest_date_ranges: List of backtest date ranges to use.
2407
+ If provided, algorithm will be backtested across all date
2408
+ ranges and results will be combined.
2409
+ risk_free_rate: The risk-free rate for calculating metrics.
2410
+ metadata: Metadata to attach to the backtest report.
2411
+ skip_data_sources_initialization: Whether to skip data source
2412
+ initialization.
2413
+ backtest_storage_directory: Directory to save the backtest to.
2414
+ use_checkpoints: Whether to use checkpointing.
2415
+ show_progress: Whether to show progress bars.
2416
+ initial_amount: Initial amount (for compatibility, not used here
2417
+ as algorithm already has portfolio config).
2418
+ market: Market (for compatibility).
2419
+ trading_symbol: Trading symbol (for compatibility).
2420
+
2421
+ Returns:
2422
+ Tuple[Backtest, Dict]: A tuple containing:
2423
+ - Backtest: Instance of Backtest containing the results.
2424
+ - Dict: Empty dict (for compatibility with event loop history).
2425
+ """
2426
+ # Use run_backtests with single algorithm
2427
+ backtests = self.run_backtests(
2428
+ algorithms=[algorithm],
2429
+ context=context,
2430
+ trade_stop_loss_service=trade_stop_loss_service,
2431
+ trade_take_profit_service=trade_take_profit_service,
2432
+ backtest_date_range=backtest_date_range,
2433
+ backtest_date_ranges=backtest_date_ranges,
2434
+ risk_free_rate=risk_free_rate,
2435
+ skip_data_sources_initialization=skip_data_sources_initialization,
2436
+ show_progress=show_progress,
2437
+ continue_on_error=False,
2438
+ backtest_storage_directory=backtest_storage_directory,
2439
+ use_checkpoints=use_checkpoints,
2440
+ )
2441
+
2442
+ # Extract the single backtest result
2443
+ if backtests and len(backtests) > 0:
2444
+ backtest = backtests[0]
2445
+
2446
+ # Add metadata if provided
2447
+ if metadata is not None:
2448
+ backtest.metadata = metadata
2449
+ elif backtest.metadata is None:
2450
+ if hasattr(algorithm, 'metadata') and algorithm.metadata:
2451
+ backtest.metadata = algorithm.metadata
2452
+ else:
2453
+ backtest.metadata = {}
2454
+
2455
+ return backtest, {}
2456
+ else:
2457
+ # Return empty backtest if no results
2458
+ algorithm_id = (
2459
+ algorithm.algorithm_id
2460
+ if hasattr(algorithm, 'algorithm_id')
2461
+ else algorithm.id
2462
+ )
2463
+ return Backtest(
2464
+ algorithm_id=algorithm_id,
2465
+ backtest_runs=[],
2466
+ risk_free_rate=risk_free_rate or 0.0,
2467
+ metadata=metadata or {}
2468
+ ), {}
2469
+
2470
+ def create_ohlcv_permutation(
2471
+ self,
2472
+ data: Union[pd.DataFrame, pl.DataFrame],
2473
+ start_index: int = 0,
2474
+ seed: int | None = None,
2475
+ ) -> Union[pd.DataFrame, pl.DataFrame]:
2476
+ """
2477
+ Create a permuted OHLCV dataset by shuffling relative price moves.
2478
+
2479
+ Args:
2480
+ data: A single OHLCV DataFrame (pandas or polars)
2481
+ with columns ['Open', 'High', 'Low', 'Close', 'Volume'].
2482
+ For pandas: Datetime can be either
2483
+ index or a 'Datetime' column. For polars: Datetime
2484
+ must be a 'Datetime' column.
2485
+ start_index: Index at which the permutation should begin
2486
+ (bars before remain unchanged).
2487
+ seed: Random seed for reproducibility.
2488
+
2489
+ Returns:
2490
+ DataFrame of the same type (pandas or polars) with
2491
+ permuted OHLCV values, preserving the datetime
2492
+ structure (index vs column) of the input.
2493
+ """
2494
+
2495
+ if start_index < 0:
2496
+ raise OperationalException("start_index must be >= 0")
2497
+
2498
+ if seed is None:
2499
+ seed = np.random.randint(0, 1_000_000)
2500
+
2501
+ np.random.seed(seed)
2502
+ is_polars = isinstance(data, pl.DataFrame)
2503
+
2504
+ # Normalize input to pandas
2505
+ if is_polars:
2506
+ has_datetime_col = "Datetime" in data.columns
2507
+ ohlcv_pd = data.to_pandas().copy()
2508
+ if has_datetime_col:
2509
+ time_index = pd.to_datetime(ohlcv_pd["Datetime"])
2510
+ else:
2511
+ time_index = np.arange(len(ohlcv_pd))
2512
+ else:
2513
+ has_datetime_col = "Datetime" in data.columns
2514
+ if isinstance(data.index, pd.DatetimeIndex):
2515
+ time_index = data.index
2516
+ elif has_datetime_col:
2517
+ time_index = pd.to_datetime(data["Datetime"])
2518
+ else:
2519
+ time_index = np.arange(len(data))
2520
+ ohlcv_pd = data.copy()
2521
+
2522
+ # Prepare data
2523
+ n_bars = len(ohlcv_pd)
2524
+ perm_index = start_index + 1
2525
+ perm_n = n_bars - perm_index
2526
+
2527
+ # Ensure all OHLCV values are positive before taking log
2528
+ # Replace non-positive values with NaN and forward fill
2529
+ ohlcv_cols = ["Open", "High", "Low", "Close"]
2530
+ for col in ohlcv_cols:
2531
+ ohlcv_pd.loc[ohlcv_pd[col] <= 0, col] = np.nan
2532
+
2533
+ # Forward fill NaN values to maintain data continuity
2534
+ ohlcv_pd[ohlcv_cols] = ohlcv_pd[ohlcv_cols].ffill()
2535
+
2536
+ # If there are still NaN values at the start, backward fill
2537
+ ohlcv_pd[ohlcv_cols] = ohlcv_pd[ohlcv_cols].bfill()
2538
+
2539
+ # If all values are still invalid, raise an error
2540
+ if ohlcv_pd[ohlcv_cols].isna().any().any():
2541
+ raise ValueError(
2542
+ "OHLCV data contains invalid (zero or negative) values "
2543
+ "that cannot be processed"
2544
+ )
2545
+
2546
+ log_bars = np.log(ohlcv_pd[ohlcv_cols])
2547
+
2548
+ # Start bar
2549
+ start_bar = log_bars.iloc[start_index].to_numpy()
2550
+
2551
+ # Relative series
2552
+ rel_open = (log_bars["Open"] - log_bars["Close"].shift()).to_numpy()
2553
+ rel_high = (log_bars["High"] - log_bars["Open"]).to_numpy()
2554
+ rel_low = (log_bars["Low"] - log_bars["Open"]).to_numpy()
2555
+ rel_close = (log_bars["Close"] - log_bars["Open"]).to_numpy()
2556
+
2557
+ # Shuffle independently
2558
+ idx = np.arange(perm_n)
2559
+ rel_high = rel_high[perm_index:][np.random.permutation(idx)]
2560
+ rel_low = rel_low[perm_index:][np.random.permutation(idx)]
2561
+ rel_close = rel_close[perm_index:][np.random.permutation(idx)]
2562
+ rel_open = rel_open[perm_index:][np.random.permutation(idx)]
2563
+
2564
+ # Build permuted OHLC
2565
+ perm_bars = np.zeros((n_bars, 4))
2566
+ perm_bars[:start_index] = log_bars.iloc[:start_index].to_numpy()
2567
+ perm_bars[start_index] = start_bar
2568
+
2569
+ for i in range(perm_index, n_bars):
2570
+ k = i - perm_index
2571
+ perm_bars[i, 0] = perm_bars[i - 1, 3] + rel_open[k] # Open
2572
+ perm_bars[i, 1] = perm_bars[i, 0] + rel_high[k] # High
2573
+ perm_bars[i, 2] = perm_bars[i, 0] + rel_low[k] # Low
2574
+ perm_bars[i, 3] = perm_bars[i, 0] + rel_close[k] # Close
2575
+
2576
+ perm_bars = np.exp(perm_bars)
2577
+
2578
+ # Rebuild OHLCV
2579
+ perm_df = pd.DataFrame(
2580
+ perm_bars,
2581
+ columns=["Open", "High", "Low", "Close"],
2582
+ )
2583
+ perm_df["Volume"] = ohlcv_pd["Volume"].values
2584
+
2585
+ # Restore datetime structure
2586
+ if is_polars:
2587
+ if has_datetime_col:
2588
+ perm_df.insert(0, "Datetime", time_index)
2589
+ return pl.from_pandas(perm_df)
2590
+ else:
2591
+ if isinstance(data.index, pd.DatetimeIndex):
2592
+ perm_df.index = time_index
2593
+ perm_df.index.name = data.index.name or "Datetime"
2594
+ elif has_datetime_col:
2595
+ perm_df.insert(0, "Datetime", time_index)
2596
+ return perm_df