dijkies 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dijkies/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from dijkies.executors import ExchangeAssetClient, Order, State
2
+ from dijkies.strategy import Strategy
3
+
4
+ __all__ = ["Strategy", "State", "Order", "ExchangeAssetClient"]
dijkies/backtest.py ADDED
@@ -0,0 +1,98 @@
1
+ from datetime import datetime, timedelta
2
+ from typing import Optional
3
+
4
+ import pandas as pd
5
+ from pandas.core.frame import DataFrame as PandasDataFrame
6
+
7
+ from dijkies.evaluate import EvaluationFramework
8
+ from dijkies.executors import BacktestExchangeAssetClient
9
+ from dijkies.performance import PerformanceInformationRow
10
+ from dijkies.exceptions import (
11
+ DataTimeWindowShorterThanSuggestedAnalysisWindowError,
12
+ InvalidExchangeAssetClientError,
13
+ InvalidTypeForTimeColumnError,
14
+ MissingOHLCVColumnsError,
15
+ TimeColumnNotDefinedError,
16
+ )
17
+ from dijkies.strategy import Strategy
18
+
19
+
20
+ class Backtester:
21
+ def __init__(
22
+ self,
23
+ evaluation: Optional[EvaluationFramework] = None,
24
+ ):
25
+ self.evaluation = evaluation
26
+
27
+ @staticmethod
28
+ def get_analysis_df(
29
+ data: PandasDataFrame, current_time: datetime, look_back_in_min: int
30
+ ) -> PandasDataFrame:
31
+ start_analysis_df = current_time - timedelta(minutes=look_back_in_min)
32
+
33
+ analysis_df = data.loc[
34
+ (data.time >= start_analysis_df) & (data.time <= current_time)
35
+ ]
36
+
37
+ return analysis_df
38
+
39
+ def simulate(
40
+ self,
41
+ data: PandasDataFrame,
42
+ strategy: Strategy,
43
+ ) -> PandasDataFrame:
44
+ """
45
+ This method runs the backtest. It expects data, this should have the following properties:
46
+ """
47
+
48
+ # validate args
49
+
50
+ if "time" not in data.columns:
51
+ raise TimeColumnNotDefinedError()
52
+
53
+ if not pd.api.types.is_datetime64_any_dtype(data.time):
54
+ raise InvalidTypeForTimeColumnError()
55
+
56
+ lookback_in_min = strategy.analysis_dataframe_size_in_minutes
57
+ timespan_data_in_min = (data.time.max() - data.time.min()).total_seconds() / 60
58
+
59
+ if lookback_in_min > timespan_data_in_min:
60
+ raise DataTimeWindowShorterThanSuggestedAnalysisWindowError()
61
+
62
+ if not {"open", "high", "low", "close", "volume"}.issubset(data.columns):
63
+ raise MissingOHLCVColumnsError()
64
+
65
+ if not isinstance(strategy.executor, BacktestExchangeAssetClient):
66
+ raise InvalidExchangeAssetClientError()
67
+
68
+ start_time = data.iloc[0].time + timedelta(minutes=lookback_in_min)
69
+ simulation_df: PandasDataFrame = data.loc[data.time >= start_time]
70
+ start_candle = simulation_df.iloc[0]
71
+ start_value_in_quote = strategy.state.total_value_in_quote(start_candle.open)
72
+ result = []
73
+
74
+ for _, candle in simulation_df.iterrows():
75
+ analysis_df = self.get_analysis_df(data, candle.time, lookback_in_min)
76
+ strategy.executor.update_current_candle(candle)
77
+
78
+ strategy.run(analysis_df)
79
+
80
+ result.append(
81
+ PerformanceInformationRow.from_objects(
82
+ candle, start_candle, strategy.state, start_value_in_quote
83
+ )
84
+ )
85
+
86
+ return pd.DataFrame([r.dict() for r in result])
87
+
88
+ def run(
89
+ self,
90
+ candle_df: PandasDataFrame,
91
+ strategy: Strategy,
92
+ ) -> PandasDataFrame:
93
+
94
+ results = self.simulate(candle_df, strategy)
95
+ if isinstance(self.evaluation, EvaluationFramework):
96
+ self.evaluation.evaluate(results)
97
+
98
+ return results
dijkies/credentials.py ADDED
@@ -0,0 +1,6 @@
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class Credentials(BaseModel):
5
+ api_key: str
6
+ api_secret_key: str
@@ -0,0 +1,15 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ import pandas as pd
4
+ from pandas.core.frame import DataFrame as PandasDataFrame
5
+
6
+
7
+ class DataPipeline(ABC):
8
+ @abstractmethod
9
+ def run(self) -> PandasDataFrame:
10
+ pass
11
+
12
+
13
+ class NoDataPipeline(ABC):
14
+ def run(self) -> PandasDataFrame:
15
+ return pd.DataFrame({})
dijkies/deployment.py ADDED
@@ -0,0 +1,127 @@
1
+ import os
2
+ import json
3
+ import pickle
4
+
5
+ from abc import ABC, abstractmethod
6
+
7
+ from dijkies.strategy import Strategy
8
+ from dijkies.executors import State
9
+ from dijkies.credentials import Credentials
10
+ from dijkies.executors import BitvavoExchangeAssetClient
11
+ from dijkies.logger import get_logger
12
+
13
+
14
+ class StrategyRepository(ABC):
15
+ @abstractmethod
16
+ def store(self, strategy: Strategy, id: str) -> None:
17
+ pass
18
+
19
+ @abstractmethod
20
+ def store_state(self, state: State, id: str) -> None:
21
+ pass
22
+
23
+ @abstractmethod
24
+ def read(self, id: str) -> Strategy:
25
+ pass
26
+
27
+ @abstractmethod
28
+ def read_state(self, id: str) -> State:
29
+ pass
30
+
31
+
32
+ class LocalStrategyRepository(StrategyRepository):
33
+ def __init__(self, root_directory: str) -> None:
34
+ self.root_directory = root_directory
35
+
36
+ def store(self, strategy: Strategy, id: str):
37
+ path = os.path.join(self.root_directory, id + ".pkl")
38
+ with open(path, "wb") as file:
39
+ pickle.dump(strategy, file)
40
+
41
+ def store_state(self, state: State, id: str) -> None:
42
+ path = os.path.join(self.root_directory, "state_" + id, ".json")
43
+ with open(path, "w") as file:
44
+ json.dump(state, file)
45
+
46
+ def read(self, id: str) -> Strategy:
47
+ path = os.path.join(self.root_directory, id + ".pkl")
48
+ with open(path, "rb") as file:
49
+ strategy = pickle.load(file)
50
+ return strategy
51
+
52
+ def read_state(self, id: str) -> State:
53
+ path = os.path.join(self.root_directory, "state_" + id, ".json")
54
+ with open(path, "r") as file:
55
+ state = json.load(file)
56
+ return state
57
+
58
+
59
+ class CredentialsRepository(ABC):
60
+ @abstractmethod
61
+ def get_api_key(self, id: str) -> str:
62
+ pass
63
+
64
+ @abstractmethod
65
+ def store_api_key(self, id: str, api_key: str) -> None:
66
+ pass
67
+
68
+ @abstractmethod
69
+ def get_api_secret_key(self, id: str) -> str:
70
+ pass
71
+
72
+ @abstractmethod
73
+ def store_api_secret_key(self, id: str, api_secret_key: str) -> None:
74
+ pass
75
+
76
+ def get_credentials(self, id: str) -> Credentials:
77
+ return Credentials(
78
+ api_key=self.get_api_key(id),
79
+ api_secret_key=self.get_api_secret_key(id)
80
+ )
81
+
82
+
83
+ class LocalCredentialRepository(CredentialsRepository):
84
+ def get_api_key(self, id: str) -> str:
85
+ return os.environ.get(f"{id}_api_key")
86
+
87
+ def store_api_key(self, id: str, api_key: str) -> None:
88
+ pass
89
+
90
+ def get_api_secret_key(self, id: str) -> str:
91
+ return os.environ.get(f"{id}_api_secret_key")
92
+
93
+ def store_api_secret_key(self, id: str, api_secret_key: str) -> None:
94
+ pass
95
+
96
+
97
+ class Bot:
98
+ def __init__(
99
+ self,
100
+ strategy_repository: StrategyRepository,
101
+ credential_repository: CredentialsRepository,
102
+ ) -> None:
103
+ self.strategy_repository = strategy_repository
104
+ self.credential_repository = credential_repository
105
+
106
+ def set_executor(self, strategy: Strategy):
107
+ api_key = self.credential_repository.get_api_key(id)
108
+ api_secret_key = self.credential_repository.get_api_secret_key(id)
109
+
110
+ if strategy.exchange == "bitvavo":
111
+ strategy.executor = BitvavoExchangeAssetClient(
112
+ strategy.state,
113
+ api_key,
114
+ api_secret_key,
115
+ 1,
116
+ get_logger()
117
+ )
118
+
119
+ def run(self, id: str) -> None:
120
+ strategy = self.strategy_repository.read(id)
121
+ self.set_executor(strategy)
122
+
123
+ data_pipeline = strategy.get_data_pipeline()
124
+ data = data_pipeline.run()
125
+ strategy.run(data)
126
+
127
+ self.strategy_repository.store(strategy)
dijkies/evaluate.py ADDED
@@ -0,0 +1,235 @@
1
+ import logging
2
+ import os
3
+ import tempfile
4
+ from abc import ABC, abstractmethod
5
+ from datetime import datetime, timezone
6
+ from typing import Optional, Union
7
+
8
+ import matplotlib.pyplot as plt
9
+ import mlflow
10
+ import pandas as pd
11
+ from pandas.core.frame import DataFrame as PandasDataFrame
12
+
13
+ from dijkies.performance import Metric
14
+
15
+
16
+ class EvaluationFramework(ABC):
17
+ @abstractmethod
18
+ def evaluate(self, performance_results: PandasDataFrame) -> None:
19
+ pass
20
+
21
+
22
+ class MLFlowEvaluator(ABC):
23
+ @abstractmethod
24
+ def evaluate(self, performance_results: PandasDataFrame) -> None:
25
+ pass
26
+
27
+
28
+ class MLFlowEvaluationFramework(EvaluationFramework):
29
+ def __init__(
30
+ self,
31
+ evaluators: list[MLFlowEvaluator],
32
+ experiment_name: str,
33
+ logger: logging.Logger,
34
+ strategy_parameters: Optional[dict[str, Union[int, str, float, bool]]],
35
+ log_dataset: bool = False,
36
+ ) -> None:
37
+ self.evaluators = evaluators
38
+ self.logger = logger
39
+ self.experiment_name = experiment_name
40
+ self.log_dataset = log_dataset
41
+ self.strategy_parameters = strategy_parameters
42
+
43
+ def evaluate(self, performance_results: PandasDataFrame) -> None:
44
+ mlflow.set_experiment(self.experiment_name)
45
+ # for results:
46
+ # poetry run mlflow server --host 127.0.0.1 --port 8080
47
+
48
+ run_name = "run__" + datetime.now(tz=timezone.utc).strftime("%Y_%m_%d_%H_%M%Z")
49
+
50
+ with mlflow.start_run(run_name=run_name) as run:
51
+ run_id = run.info.run_id
52
+ self.logger.info("Run created: " + run_id)
53
+ if self.log_dataset:
54
+ dataset = mlflow.data.from_pandas(
55
+ performance_results, source="local", name="training_data"
56
+ )
57
+ mlflow.log_input(dataset, context="training")
58
+ mlflow.log_params(self.strategy_parameters)
59
+ with tempfile.TemporaryDirectory() as tmpdir:
60
+ file_path = os.path.join(tmpdir, "data.csv")
61
+ performance_results.to_csv(file_path, index=False)
62
+ mlflow.log_artifact(file_path)
63
+
64
+ [
65
+ evaluator.evaluate(performance_results) for evaluator in self.evaluators
66
+ ] # type: ignore
67
+
68
+
69
+ class MLFlowOverallEvaluator(MLFlowEvaluator):
70
+ def __init__(self, metrics: list[Metric], logger: logging.Logger) -> None:
71
+ self.metrics = metrics
72
+ self.logger = logger
73
+
74
+ def log_metrics(self, performance_results: PandasDataFrame) -> None:
75
+ for metric in self.metrics:
76
+ mlflow.log_metric(
77
+ "strategy_" + metric.metric_name,
78
+ round(metric.calculate(performance_results.total_value_strategy), 2),
79
+ )
80
+ mlflow.log_metric(
81
+ "hodl_" + metric.metric_name,
82
+ round(metric.calculate(performance_results.total_value_hodl), 2),
83
+ )
84
+
85
+ @staticmethod
86
+ def plot_fee(performance_results: PandasDataFrame) -> None:
87
+ plt.figure(figsize=(8, 5))
88
+ plt.plot(
89
+ performance_results.candle_time,
90
+ performance_results.total_fee_paid,
91
+ color="blue",
92
+ label="fee",
93
+ )
94
+ plt.xlabel("Time")
95
+ plt.xticks(rotation=45)
96
+ plt.ylabel("fee paid in €")
97
+ plt.title("total transaction fee paid to Exchange")
98
+ plt.grid(True)
99
+ plt.legend()
100
+
101
+ # Log figure directly to MLflow
102
+ mlflow.log_figure(plt.gcf(), "total_fee_paid.png")
103
+
104
+ plt.close() # free memory
105
+
106
+ @staticmethod
107
+ def plot_balance_fractions(performance_results: PandasDataFrame) -> None:
108
+ perc_in_quote = (
109
+ performance_results.balance_total_quote
110
+ / performance_results.total_value_strategy
111
+ )
112
+ perc_in_base = (
113
+ performance_results.balance_total_base
114
+ / performance_results.total_value_strategy
115
+ * performance_results.candle_close
116
+ )
117
+
118
+ plt.figure(figsize=(8, 5))
119
+ plt.plot(
120
+ performance_results.candle_time,
121
+ perc_in_quote,
122
+ color="blue",
123
+ label="percentage value in quote",
124
+ )
125
+ plt.plot(
126
+ performance_results.candle_time,
127
+ perc_in_base,
128
+ color="red",
129
+ label="percentage value in base",
130
+ )
131
+ plt.xlabel("Time")
132
+ plt.xticks(rotation=45)
133
+ plt.ylabel("fraction")
134
+ plt.title("fraction of total value in quote")
135
+ plt.grid(True)
136
+ plt.legend()
137
+
138
+ # Log figure directly to MLflow
139
+ mlflow.log_figure(plt.gcf(), "balance_fractions.png")
140
+
141
+ plt.close() # free memory
142
+
143
+ @staticmethod
144
+ def plot_strategy_vs_hodl(performance_results: PandasDataFrame) -> None:
145
+ plt.figure(figsize=(8, 5))
146
+ plt.plot(
147
+ performance_results.candle_time,
148
+ performance_results.total_value_strategy,
149
+ color="blue",
150
+ label="strategy",
151
+ )
152
+ plt.plot(
153
+ performance_results.candle_time,
154
+ performance_results.total_value_hodl,
155
+ color="red",
156
+ label="hodl",
157
+ )
158
+ plt.xlabel("Time")
159
+ plt.xticks(rotation=45)
160
+ plt.ylabel("Value")
161
+ plt.title("strategy vs. hodl")
162
+ plt.grid(True)
163
+ plt.legend()
164
+
165
+ # Log figure directly to MLflow
166
+ mlflow.log_figure(plt.gcf(), "overal_result.png")
167
+
168
+ plt.close() # free memory
169
+
170
+ def plot_results(self, performance_results: PandasDataFrame) -> None:
171
+ self.plot_strategy_vs_hodl(performance_results)
172
+ self.plot_fee(performance_results)
173
+ self.plot_balance_fractions(performance_results)
174
+
175
+ def evaluate(self, performance_results: PandasDataFrame) -> None:
176
+ self.log_metrics(performance_results)
177
+ self.plot_results(performance_results)
178
+
179
+
180
+ class MLFlowSliceEvaluator(MLFlowEvaluator):
181
+ def __init__(
182
+ self, window_size_in_min: int, metrics: list[Metric], logger: logging.Logger
183
+ ) -> None:
184
+ self.window_size_in_min = window_size_in_min
185
+ self.metrics = metrics
186
+ self.logger = logger
187
+
188
+ def results_window_slicer(self, results: PandasDataFrame) -> PandasDataFrame:
189
+ candle_interval_in_minutes = (
190
+ results.iloc[1].candle_time - results.iloc[0].candle_time
191
+ ).total_seconds() / 60
192
+ window_size = self.window_size_in_min / candle_interval_in_minutes
193
+
194
+ evaluation = []
195
+
196
+ for sub_result in [
197
+ results.loc[i : i + window_size]
198
+ for i in range(len(results) - (int(window_size) + 1))
199
+ ]:
200
+ row = {}
201
+ for metric in self.metrics:
202
+ row["strategy_" + metric.metric_name] = metric.calculate(
203
+ sub_result.total_value_strategy
204
+ )
205
+ row["hodl_" + metric.metric_name] = metric.calculate(
206
+ sub_result.total_value_hodl
207
+ )
208
+ evaluation.append(row)
209
+
210
+ return pd.DataFrame(evaluation)
211
+
212
+ def plot_results(self, slicer_results: PandasDataFrame) -> None:
213
+ for col in slicer_results.columns:
214
+ self.logger.info(f"create plot {col}")
215
+ plt.figure(figsize=(8, 5))
216
+ plt.hist(slicer_results[col])
217
+ plt.title(f"Column: {col}")
218
+ plt.xlabel(col)
219
+ plt.grid(True)
220
+
221
+ # Log figure directly to MLflow
222
+ mlflow.log_figure(plt.gcf(), f"{col}.png")
223
+
224
+ plt.close() # free memory
225
+
226
+ def log_metrics(self, slicer_results: PandasDataFrame) -> None:
227
+ for col in slicer_results.columns:
228
+ self.logger.info(f"compute metrics for {col}")
229
+ mlflow.log_metric(f"{col}_mean", round(slicer_results[col].mean(), 3))
230
+ mlflow.log_metric(f"{col}_std", round(slicer_results[col].std(), 3))
231
+
232
+ def evaluate(self, performance_results: PandasDataFrame) -> None:
233
+ slicer_results = self.results_window_slicer(performance_results)
234
+ self.log_metrics(slicer_results)
235
+ self.plot_results(slicer_results)
dijkies/exceptions.py ADDED
@@ -0,0 +1,60 @@
1
+ class TimeColumnNotDefinedError(Exception):
2
+ def __init__(self):
3
+ super().__init__("the provided data should have a 'time' column")
4
+
5
+
6
+ class InvalidTypeForTimeColumnError(Exception):
7
+ def __init__(self):
8
+ super().__init__("'time' column has not the right dtype")
9
+
10
+
11
+ class DataTimeWindowShorterThanSuggestedAnalysisWindowError(Exception):
12
+ def __init__(self):
13
+ super().__init__(
14
+ "the timespan of provided data is shorter than the analysis window, so no backtest can be executed."
15
+ )
16
+
17
+
18
+ class MissingOHLCVColumnsError(Exception):
19
+ def __init__(self):
20
+ super().__init__("one of the OHLCV columns is missing in the provided data")
21
+
22
+
23
+ class DataTimeSpanDifferentFromAlgorithmSetting(Exception):
24
+ def __init__(self):
25
+ super().__init__("one of the OHLCV columns is missing in the provided data")
26
+
27
+
28
+ class InvalidExchangeAssetClientError(Exception):
29
+ def __init__(self):
30
+ super().__init__("use BacktestExchangeAssetExecutor! glad that I saved you :)")
31
+
32
+
33
+ class NoOrderFoundError(Exception):
34
+ def __init__(self, order_id):
35
+ super().__init__(f"order with order_id {order_id} not found.")
36
+
37
+
38
+ class MultipleOrdersFoundError(Exception):
39
+ def __init__(self, order_id):
40
+ super().__init__(f"multiple orders found with order_id {order_id}.")
41
+
42
+
43
+ class PlaceOrderError(Exception):
44
+ def __init__(self, message: str):
45
+ super().__init__(f"an error occured during order creation: {message}")
46
+
47
+
48
+ class GetOrderInfoError(Exception):
49
+ def __init__(self, message: str):
50
+ super().__init__(f"an error occured during order info retrieval: {message}")
51
+
52
+
53
+ class CancelOrderError(Exception):
54
+ def __init__(self, message: str):
55
+ super().__init__(f"an error occured during cancelling order: {message}")
56
+
57
+
58
+ class MethodNotDefinedError(Exception):
59
+ def __init__(self):
60
+ super().__init__("method not implemented...")