signalflow-trading 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- signalflow/__init__.py +21 -0
- signalflow/analytics/__init__.py +0 -0
- signalflow/core/__init__.py +46 -0
- signalflow/core/base_mixin.py +232 -0
- signalflow/core/containers/__init__.py +21 -0
- signalflow/core/containers/order.py +216 -0
- signalflow/core/containers/portfolio.py +211 -0
- signalflow/core/containers/position.py +296 -0
- signalflow/core/containers/raw_data.py +167 -0
- signalflow/core/containers/raw_data_view.py +169 -0
- signalflow/core/containers/signals.py +198 -0
- signalflow/core/containers/strategy_state.py +147 -0
- signalflow/core/containers/trade.py +112 -0
- signalflow/core/decorators.py +103 -0
- signalflow/core/enums.py +270 -0
- signalflow/core/registry.py +322 -0
- signalflow/core/rolling_aggregator.py +362 -0
- signalflow/core/signal_transforms/__init__.py +5 -0
- signalflow/core/signal_transforms/base_signal_transform.py +186 -0
- signalflow/data/__init__.py +11 -0
- signalflow/data/raw_data_factory.py +225 -0
- signalflow/data/raw_store/__init__.py +7 -0
- signalflow/data/raw_store/base.py +271 -0
- signalflow/data/raw_store/duckdb_stores.py +696 -0
- signalflow/data/source/__init__.py +10 -0
- signalflow/data/source/base.py +300 -0
- signalflow/data/source/binance.py +442 -0
- signalflow/data/strategy_store/__init__.py +8 -0
- signalflow/data/strategy_store/base.py +278 -0
- signalflow/data/strategy_store/duckdb.py +409 -0
- signalflow/data/strategy_store/schema.py +36 -0
- signalflow/detector/__init__.py +7 -0
- signalflow/detector/adapter/__init__.py +5 -0
- signalflow/detector/adapter/pandas_detector.py +46 -0
- signalflow/detector/base.py +390 -0
- signalflow/detector/sma_cross.py +105 -0
- signalflow/feature/__init__.py +16 -0
- signalflow/feature/adapter/__init__.py +5 -0
- signalflow/feature/adapter/pandas_feature_extractor.py +54 -0
- signalflow/feature/base.py +330 -0
- signalflow/feature/feature_set.py +286 -0
- signalflow/feature/oscillator/__init__.py +5 -0
- signalflow/feature/oscillator/rsi_extractor.py +42 -0
- signalflow/feature/pandasta/__init__.py +10 -0
- signalflow/feature/pandasta/pandas_ta_extractor.py +141 -0
- signalflow/feature/pandasta/top_pandasta_extractors.py +64 -0
- signalflow/feature/smoother/__init__.py +5 -0
- signalflow/feature/smoother/sma_extractor.py +46 -0
- signalflow/strategy/__init__.py +9 -0
- signalflow/strategy/broker/__init__.py +15 -0
- signalflow/strategy/broker/backtest.py +172 -0
- signalflow/strategy/broker/base.py +186 -0
- signalflow/strategy/broker/executor/__init__.py +9 -0
- signalflow/strategy/broker/executor/base.py +35 -0
- signalflow/strategy/broker/executor/binance_spot.py +12 -0
- signalflow/strategy/broker/executor/virtual_spot.py +81 -0
- signalflow/strategy/broker/realtime_spot.py +12 -0
- signalflow/strategy/component/__init__.py +9 -0
- signalflow/strategy/component/base.py +65 -0
- signalflow/strategy/component/entry/__init__.py +7 -0
- signalflow/strategy/component/entry/fixed_size.py +57 -0
- signalflow/strategy/component/entry/signal.py +127 -0
- signalflow/strategy/component/exit/__init__.py +5 -0
- signalflow/strategy/component/exit/time_based.py +47 -0
- signalflow/strategy/component/exit/tp_sl.py +80 -0
- signalflow/strategy/component/metric/__init__.py +8 -0
- signalflow/strategy/component/metric/main_metrics.py +181 -0
- signalflow/strategy/runner/__init__.py +8 -0
- signalflow/strategy/runner/backtest_runner.py +208 -0
- signalflow/strategy/runner/base.py +19 -0
- signalflow/strategy/runner/optimized_backtest_runner.py +178 -0
- signalflow/strategy/runner/realtime_runner.py +0 -0
- signalflow/target/__init__.py +14 -0
- signalflow/target/adapter/__init__.py +5 -0
- signalflow/target/adapter/pandas_labeler.py +45 -0
- signalflow/target/base.py +409 -0
- signalflow/target/fixed_horizon_labeler.py +93 -0
- signalflow/target/static_triple_barrier.py +162 -0
- signalflow/target/triple_barrier.py +188 -0
- signalflow/utils/__init__.py +7 -0
- signalflow/utils/import_utils.py +11 -0
- signalflow/utils/tune_utils.py +19 -0
- signalflow/validator/__init__.py +6 -0
- signalflow/validator/base.py +139 -0
- signalflow/validator/sklearn_validator.py +527 -0
- signalflow_trading-0.2.1.dist-info/METADATA +149 -0
- signalflow_trading-0.2.1.dist-info/RECORD +90 -0
- signalflow_trading-0.2.1.dist-info/WHEEL +5 -0
- signalflow_trading-0.2.1.dist-info/licenses/LICENSE +21 -0
- signalflow_trading-0.2.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,527 @@
|
|
|
1
|
+
# IMPORTANT
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import pickle
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import polars as pl
|
|
10
|
+
|
|
11
|
+
from signalflow.core import sf_component, Signals
|
|
12
|
+
from signalflow.utils import import_model_class, build_optuna_params
|
|
13
|
+
from signalflow.validator.base import SignalValidator
|
|
14
|
+
|
|
15
|
+
SKLEARN_MODELS: dict[str, dict[str, Any]] = {
|
|
16
|
+
"lightgbm": {
|
|
17
|
+
"class": "lightgbm.LGBMClassifier",
|
|
18
|
+
"default_params": {
|
|
19
|
+
"n_estimators": 100,
|
|
20
|
+
"max_depth": 6,
|
|
21
|
+
"learning_rate": 0.1,
|
|
22
|
+
"num_leaves": 31,
|
|
23
|
+
"min_child_samples": 20,
|
|
24
|
+
"subsample": 0.8,
|
|
25
|
+
"colsample_bytree": 0.8,
|
|
26
|
+
"random_state": 42,
|
|
27
|
+
"n_jobs": -1,
|
|
28
|
+
"verbosity": -1,
|
|
29
|
+
},
|
|
30
|
+
"tune_space": {
|
|
31
|
+
"n_estimators": ("int", 50, 500),
|
|
32
|
+
"max_depth": ("int", 3, 12),
|
|
33
|
+
"learning_rate": ("log_float", 0.01, 0.3),
|
|
34
|
+
"num_leaves": ("int", 15, 127),
|
|
35
|
+
"min_child_samples": ("int", 5, 100),
|
|
36
|
+
"subsample": ("float", 0.6, 1.0),
|
|
37
|
+
"colsample_bytree": ("float", 0.6, 1.0),
|
|
38
|
+
},
|
|
39
|
+
},
|
|
40
|
+
"xgboost": {
|
|
41
|
+
"class": "xgboost.XGBClassifier",
|
|
42
|
+
"default_params": {
|
|
43
|
+
"n_estimators": 100,
|
|
44
|
+
"max_depth": 6,
|
|
45
|
+
"learning_rate": 0.1,
|
|
46
|
+
"subsample": 0.8,
|
|
47
|
+
"colsample_bytree": 0.8,
|
|
48
|
+
"random_state": 42,
|
|
49
|
+
"n_jobs": -1,
|
|
50
|
+
"verbosity": 0,
|
|
51
|
+
"use_label_encoder": False,
|
|
52
|
+
"eval_metric": "logloss",
|
|
53
|
+
},
|
|
54
|
+
"tune_space": {
|
|
55
|
+
"n_estimators": ("int", 50, 500),
|
|
56
|
+
"max_depth": ("int", 3, 12),
|
|
57
|
+
"learning_rate": ("log_float", 0.01, 0.3),
|
|
58
|
+
"subsample": ("float", 0.6, 1.0),
|
|
59
|
+
"colsample_bytree": ("float", 0.6, 1.0),
|
|
60
|
+
"min_child_weight": ("int", 1, 10),
|
|
61
|
+
"gamma": ("float", 0, 0.5),
|
|
62
|
+
},
|
|
63
|
+
},
|
|
64
|
+
"random_forest": {
|
|
65
|
+
"class": "sklearn.ensemble.RandomForestClassifier",
|
|
66
|
+
"default_params": {
|
|
67
|
+
"n_estimators": 100,
|
|
68
|
+
"max_depth": 10,
|
|
69
|
+
"min_samples_split": 5,
|
|
70
|
+
"min_samples_leaf": 2,
|
|
71
|
+
"max_features": "sqrt",
|
|
72
|
+
"random_state": 42,
|
|
73
|
+
"n_jobs": -1,
|
|
74
|
+
},
|
|
75
|
+
"tune_space": {
|
|
76
|
+
"n_estimators": ("int", 50, 300),
|
|
77
|
+
"max_depth": ("int", 5, 30),
|
|
78
|
+
"min_samples_split": ("int", 2, 20),
|
|
79
|
+
"min_samples_leaf": ("int", 1, 10),
|
|
80
|
+
},
|
|
81
|
+
},
|
|
82
|
+
"logistic_regression": {
|
|
83
|
+
"class": "sklearn.linear_model.LogisticRegression",
|
|
84
|
+
"default_params": {
|
|
85
|
+
"C": 1.0,
|
|
86
|
+
"max_iter": 1000,
|
|
87
|
+
"random_state": 42,
|
|
88
|
+
"n_jobs": -1,
|
|
89
|
+
},
|
|
90
|
+
"tune_space": {
|
|
91
|
+
"C": ("log_float", 1e-4, 100),
|
|
92
|
+
"penalty": ("categorical", ["l1", "l2"]),
|
|
93
|
+
"solver": ("categorical", ["saga"]),
|
|
94
|
+
},
|
|
95
|
+
},
|
|
96
|
+
"svm": {
|
|
97
|
+
"class": "sklearn.svm.SVC",
|
|
98
|
+
"default_params": {
|
|
99
|
+
"C": 1.0,
|
|
100
|
+
"kernel": "rbf",
|
|
101
|
+
"probability": True,
|
|
102
|
+
"random_state": 42,
|
|
103
|
+
},
|
|
104
|
+
"tune_space": {
|
|
105
|
+
"C": ("log_float", 1e-3, 100),
|
|
106
|
+
"kernel": ("categorical", ["rbf", "linear", "poly"]),
|
|
107
|
+
"gamma": ("categorical", ["scale", "auto"]),
|
|
108
|
+
},
|
|
109
|
+
},
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
AUTO_SELECT_MODELS = ["lightgbm", "xgboost", "random_forest", "logistic_regression"]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
@dataclass
|
|
116
|
+
@sf_component(name="sklearn")
|
|
117
|
+
class SklearnSignalValidator(SignalValidator):
|
|
118
|
+
"""Sklearn-based signal validator.
|
|
119
|
+
|
|
120
|
+
Supports:
|
|
121
|
+
- Multiple sklearn-compatible models (LightGBM, XGBoost, RF, etc.)
|
|
122
|
+
- Automatic model selection via cross-validation
|
|
123
|
+
- Hyperparameter tuning with Optuna
|
|
124
|
+
- Early stopping for boosting models
|
|
125
|
+
|
|
126
|
+
Note: Filter data to active signals (not NONE) BEFORE calling fit().
|
|
127
|
+
This gives you full control over data preparation.
|
|
128
|
+
|
|
129
|
+
Example:
|
|
130
|
+
>>> # Prepare data - filter to active signals
|
|
131
|
+
>>> df = df.filter(pl.col("signal_type") != "none")
|
|
132
|
+
>>>
|
|
133
|
+
>>> validator = SklearnSignalValidator(model_type="lightgbm")
|
|
134
|
+
>>> validator.fit(
|
|
135
|
+
... train_df.select(["pair", "timestamp"] + feature_cols),
|
|
136
|
+
... train_df.select("label"),
|
|
137
|
+
... )
|
|
138
|
+
>>>
|
|
139
|
+
>>> # validate_signals returns Signals object
|
|
140
|
+
>>> validated = validator.validate_signals(
|
|
141
|
+
... Signals(test_df.select(signal_cols)),
|
|
142
|
+
... test_df.select(["pair", "timestamp"] + feature_cols),
|
|
143
|
+
... )
|
|
144
|
+
>>> validated.value.filter(pl.col("probability_rise") > 0.7)
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
auto_select_metric: str = "roc_auc"
|
|
148
|
+
auto_select_cv_folds: int = 5
|
|
149
|
+
|
|
150
|
+
def __post_init__(self) -> None:
|
|
151
|
+
if self.model_params is None:
|
|
152
|
+
self.model_params = {}
|
|
153
|
+
if self.train_params is None:
|
|
154
|
+
self.train_params = {}
|
|
155
|
+
if self.tune_params is None:
|
|
156
|
+
self.tune_params = {"n_trials": 50, "cv_folds": 5, "timeout": 600}
|
|
157
|
+
|
|
158
|
+
def _get_model_config(self, model_type: str) -> dict[str, Any]:
|
|
159
|
+
"""Get model configuration from catalog."""
|
|
160
|
+
if model_type not in SKLEARN_MODELS:
|
|
161
|
+
available = ", ".join(SKLEARN_MODELS.keys())
|
|
162
|
+
raise ValueError(f"Unknown model_type: {model_type}. Available: {available}")
|
|
163
|
+
return SKLEARN_MODELS[model_type]
|
|
164
|
+
|
|
165
|
+
def _create_model(self, model_type: str, params: dict | None = None) -> Any:
|
|
166
|
+
"""Create model instance."""
|
|
167
|
+
config = self._get_model_config(model_type)
|
|
168
|
+
model_class = import_model_class(config["class"])
|
|
169
|
+
|
|
170
|
+
final_params = {**config["default_params"]}
|
|
171
|
+
if params:
|
|
172
|
+
final_params.update(params)
|
|
173
|
+
|
|
174
|
+
return model_class(**final_params)
|
|
175
|
+
|
|
176
|
+
def _extract_features(
|
|
177
|
+
self,
|
|
178
|
+
X: pl.DataFrame,
|
|
179
|
+
fit_mode: bool = False,
|
|
180
|
+
) -> np.ndarray:
|
|
181
|
+
"""Extract feature matrix from DataFrame.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
X: Input DataFrame
|
|
185
|
+
fit_mode: If True, infer and store feature columns
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Feature matrix as numpy array
|
|
189
|
+
"""
|
|
190
|
+
exclude_cols = {self.pair_col, self.ts_col}
|
|
191
|
+
|
|
192
|
+
if fit_mode:
|
|
193
|
+
self.feature_columns = [c for c in X.columns if c not in exclude_cols]
|
|
194
|
+
|
|
195
|
+
if self.feature_columns is None:
|
|
196
|
+
raise ValueError("feature_columns not set. Call fit() first.")
|
|
197
|
+
|
|
198
|
+
missing = set(self.feature_columns) - set(X.columns)
|
|
199
|
+
if missing:
|
|
200
|
+
raise ValueError(f"Missing feature columns: {sorted(missing)}")
|
|
201
|
+
|
|
202
|
+
return X.select(self.feature_columns).to_numpy()
|
|
203
|
+
|
|
204
|
+
def _extract_labels(self, y: pl.DataFrame | pl.Series) -> np.ndarray:
|
|
205
|
+
"""Extract label array."""
|
|
206
|
+
if isinstance(y, pl.DataFrame):
|
|
207
|
+
if y.width == 1:
|
|
208
|
+
return y.to_numpy().ravel()
|
|
209
|
+
elif "label" in y.columns:
|
|
210
|
+
return y["label"].to_numpy()
|
|
211
|
+
else:
|
|
212
|
+
raise ValueError("y DataFrame must have single column or 'label' column")
|
|
213
|
+
return y.to_numpy()
|
|
214
|
+
|
|
215
|
+
def _auto_select_model(
|
|
216
|
+
self,
|
|
217
|
+
X: np.ndarray,
|
|
218
|
+
y: np.ndarray,
|
|
219
|
+
) -> tuple[str, dict]:
|
|
220
|
+
"""Select best model using cross-validation.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
Tuple of (best_model_type, best_params)
|
|
224
|
+
"""
|
|
225
|
+
from sklearn.model_selection import cross_val_score
|
|
226
|
+
|
|
227
|
+
best_score = -np.inf
|
|
228
|
+
best_model_type = None
|
|
229
|
+
best_params = None
|
|
230
|
+
|
|
231
|
+
for model_type in AUTO_SELECT_MODELS:
|
|
232
|
+
try:
|
|
233
|
+
config = self._get_model_config(model_type)
|
|
234
|
+
model = self._create_model(model_type)
|
|
235
|
+
|
|
236
|
+
scores = cross_val_score(
|
|
237
|
+
model, X, y,
|
|
238
|
+
cv=self.auto_select_cv_folds,
|
|
239
|
+
scoring=self.auto_select_metric,
|
|
240
|
+
n_jobs=-1,
|
|
241
|
+
)
|
|
242
|
+
mean_score = scores.mean()
|
|
243
|
+
|
|
244
|
+
if mean_score > best_score:
|
|
245
|
+
best_score = mean_score
|
|
246
|
+
best_model_type = model_type
|
|
247
|
+
best_params = config["default_params"].copy()
|
|
248
|
+
|
|
249
|
+
except ImportError:
|
|
250
|
+
continue
|
|
251
|
+
except Exception:
|
|
252
|
+
continue
|
|
253
|
+
|
|
254
|
+
if best_model_type is None:
|
|
255
|
+
raise RuntimeError("No suitable model found. Install lightgbm, xgboost, or scikit-learn.")
|
|
256
|
+
|
|
257
|
+
return best_model_type, best_params
|
|
258
|
+
|
|
259
|
+
def fit(
|
|
260
|
+
self,
|
|
261
|
+
X_train: pl.DataFrame,
|
|
262
|
+
y_train: pl.DataFrame | pl.Series,
|
|
263
|
+
X_val: pl.DataFrame | None = None,
|
|
264
|
+
y_val: pl.DataFrame | pl.Series | None = None,
|
|
265
|
+
) -> "SklearnSignalValidator":
|
|
266
|
+
"""Train the validator.
|
|
267
|
+
|
|
268
|
+
Note: Filter to active signals BEFORE calling this method.
|
|
269
|
+
|
|
270
|
+
For boosting models with validation data, early stopping is applied.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
X_train: Training features (already filtered to active signals)
|
|
274
|
+
y_train: Training labels
|
|
275
|
+
X_val: Validation features (optional)
|
|
276
|
+
y_val: Validation labels (optional)
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
Self for method chaining
|
|
280
|
+
"""
|
|
281
|
+
X_np = self._extract_features(X_train, fit_mode=True)
|
|
282
|
+
y_np = self._extract_labels(y_train)
|
|
283
|
+
|
|
284
|
+
if self.model_type == "auto" or self.model_type is None:
|
|
285
|
+
self.model_type, self.model_params = self._auto_select_model(X_np, y_np)
|
|
286
|
+
|
|
287
|
+
self.model = self._create_model(self.model_type, self.model_params)
|
|
288
|
+
|
|
289
|
+
fit_kwargs: dict[str, Any] = {}
|
|
290
|
+
|
|
291
|
+
if X_val is not None and y_val is not None:
|
|
292
|
+
X_val_np = self._extract_features(X_val)
|
|
293
|
+
y_val_np = self._extract_labels(y_val)
|
|
294
|
+
|
|
295
|
+
if self.model_type in ("lightgbm", "xgboost"):
|
|
296
|
+
early_stopping = self.train_params.get("early_stopping_rounds", 50)
|
|
297
|
+
|
|
298
|
+
if self.model_type == "lightgbm":
|
|
299
|
+
fit_kwargs["eval_set"] = [(X_val_np, y_val_np)]
|
|
300
|
+
fit_kwargs["callbacks"] = [
|
|
301
|
+
__import__("lightgbm").early_stopping(early_stopping, verbose=False)
|
|
302
|
+
]
|
|
303
|
+
elif self.model_type == "xgboost":
|
|
304
|
+
fit_kwargs["eval_set"] = [(X_val_np, y_val_np)]
|
|
305
|
+
fit_kwargs["early_stopping_rounds"] = early_stopping
|
|
306
|
+
fit_kwargs["verbose"] = False
|
|
307
|
+
|
|
308
|
+
self.model.fit(X_np, y_np, **fit_kwargs)
|
|
309
|
+
|
|
310
|
+
return self
|
|
311
|
+
|
|
312
|
+
def tune(
|
|
313
|
+
self,
|
|
314
|
+
X_train: pl.DataFrame,
|
|
315
|
+
y_train: pl.DataFrame | pl.Series,
|
|
316
|
+
X_val: pl.DataFrame | None = None,
|
|
317
|
+
y_val: pl.DataFrame | pl.Series | None = None,
|
|
318
|
+
) -> dict[str, Any]:
|
|
319
|
+
"""Tune hyperparameters using Optuna.
|
|
320
|
+
|
|
321
|
+
Note: Filter to active signals BEFORE calling this method.
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
Best parameters found
|
|
325
|
+
"""
|
|
326
|
+
import optuna
|
|
327
|
+
from sklearn.model_selection import cross_val_score
|
|
328
|
+
|
|
329
|
+
if self.model_type is None or self.model_type == "auto":
|
|
330
|
+
raise ValueError("Set model_type before tuning (not 'auto')")
|
|
331
|
+
|
|
332
|
+
config = self._get_model_config(self.model_type)
|
|
333
|
+
tune_space = config["tune_space"]
|
|
334
|
+
|
|
335
|
+
X_np = self._extract_features(X_train, fit_mode=True)
|
|
336
|
+
y_np = self._extract_labels(y_train)
|
|
337
|
+
|
|
338
|
+
n_trials = self.tune_params.get("n_trials", 50)
|
|
339
|
+
cv_folds = self.tune_params.get("cv_folds", 5)
|
|
340
|
+
timeout = self.tune_params.get("timeout", 600)
|
|
341
|
+
|
|
342
|
+
def objective(trial: optuna.Trial) -> float:
|
|
343
|
+
params = build_optuna_params(trial, tune_space)
|
|
344
|
+
params.update(config["default_params"]) # Base params
|
|
345
|
+
|
|
346
|
+
model = self._create_model(self.model_type, params)
|
|
347
|
+
|
|
348
|
+
scores = cross_val_score(
|
|
349
|
+
model, X_np, y_np,
|
|
350
|
+
cv=cv_folds,
|
|
351
|
+
scoring=self.auto_select_metric,
|
|
352
|
+
n_jobs=-1,
|
|
353
|
+
)
|
|
354
|
+
return scores.mean()
|
|
355
|
+
|
|
356
|
+
study = optuna.create_study(direction="maximize")
|
|
357
|
+
study.optimize(
|
|
358
|
+
objective,
|
|
359
|
+
n_trials=n_trials,
|
|
360
|
+
timeout=timeout,
|
|
361
|
+
show_progress_bar=True,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
best_params = {**config["default_params"], **study.best_params}
|
|
365
|
+
self.model_params = best_params
|
|
366
|
+
|
|
367
|
+
return best_params
|
|
368
|
+
|
|
369
|
+
def predict(self, signals: Signals, X: pl.DataFrame) -> Signals:
|
|
370
|
+
"""Predict class labels and return updated Signals.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
signals: Input signals container
|
|
374
|
+
X: Features DataFrame with (pair, timestamp) + feature columns
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
New Signals with 'validation_pred' column added
|
|
378
|
+
"""
|
|
379
|
+
if self.model is None:
|
|
380
|
+
raise ValueError("Model not fitted. Call fit() first.")
|
|
381
|
+
|
|
382
|
+
signals_df = signals.value
|
|
383
|
+
|
|
384
|
+
# Join features to signals by keys
|
|
385
|
+
X_matched = signals_df.select([self.pair_col, self.ts_col]).join(
|
|
386
|
+
X,
|
|
387
|
+
on=[self.pair_col, self.ts_col],
|
|
388
|
+
how="left",
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
X_np = self._extract_features(X_matched)
|
|
392
|
+
predictions = self.model.predict(X_np)
|
|
393
|
+
|
|
394
|
+
result_df = signals_df.with_columns(
|
|
395
|
+
pl.Series(name="validation_pred", values=predictions)
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
return Signals(result_df)
|
|
399
|
+
|
|
400
|
+
def predict_proba(self, signals: Signals, X: pl.DataFrame) -> Signals:
|
|
401
|
+
"""Predict class probabilities and return updated Signals.
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
signals: Input signals container
|
|
405
|
+
X: Features DataFrame with (pair, timestamp) + feature columns
|
|
406
|
+
|
|
407
|
+
Returns:
|
|
408
|
+
New Signals with probability columns (probability_none, probability_rise, probability_fall)
|
|
409
|
+
"""
|
|
410
|
+
if self.model is None:
|
|
411
|
+
raise ValueError("Model not fitted. Call fit() first.")
|
|
412
|
+
|
|
413
|
+
signals_df = signals.value
|
|
414
|
+
classes = self._get_class_labels()
|
|
415
|
+
|
|
416
|
+
# Join features to signals by keys
|
|
417
|
+
X_matched = signals_df.select([self.pair_col, self.ts_col]).join(
|
|
418
|
+
X,
|
|
419
|
+
on=[self.pair_col, self.ts_col],
|
|
420
|
+
how="left",
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
X_np = self._extract_features(X_matched)
|
|
424
|
+
probas = self.model.predict_proba(X_np)
|
|
425
|
+
|
|
426
|
+
# Add probability columns
|
|
427
|
+
result_df = signals_df
|
|
428
|
+
for i, class_label in enumerate(classes):
|
|
429
|
+
col_name = f"probability_{class_label}"
|
|
430
|
+
result_df = result_df.with_columns(
|
|
431
|
+
pl.Series(name=col_name, values=probas[:, i])
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
return Signals(result_df)
|
|
435
|
+
|
|
436
|
+
def validate_signals(
|
|
437
|
+
self,
|
|
438
|
+
signals: Signals,
|
|
439
|
+
features: pl.DataFrame,
|
|
440
|
+
prefix: str = "probability_",
|
|
441
|
+
) -> Signals:
|
|
442
|
+
"""Add validation probabilities to signals.
|
|
443
|
+
|
|
444
|
+
Adds probability columns for each class:
|
|
445
|
+
- probability_none: P(signal is noise / not actionable)
|
|
446
|
+
- probability_rise: P(signal leads to price rise)
|
|
447
|
+
- probability_fall: P(signal leads to price fall)
|
|
448
|
+
|
|
449
|
+
Args:
|
|
450
|
+
signals: Input Signals container
|
|
451
|
+
features: Features DataFrame with (pair, timestamp) + features
|
|
452
|
+
prefix: Prefix for probability columns (default: "probability_")
|
|
453
|
+
|
|
454
|
+
Returns:
|
|
455
|
+
New Signals with probability columns added.
|
|
456
|
+
|
|
457
|
+
Example:
|
|
458
|
+
>>> validated = validator.validate_signals(signals, features)
|
|
459
|
+
>>> df = validated.value
|
|
460
|
+
>>> confident_rise = df.filter(
|
|
461
|
+
... (pl.col("signal_type") == "rise") &
|
|
462
|
+
... (pl.col("probability_rise") > 0.7)
|
|
463
|
+
... )
|
|
464
|
+
"""
|
|
465
|
+
return self.predict_proba(signals, features)
|
|
466
|
+
|
|
467
|
+
def _get_class_labels(self) -> list[str]:
|
|
468
|
+
"""Get class labels for probability columns.
|
|
469
|
+
|
|
470
|
+
Maps numeric classes to SignalType names.
|
|
471
|
+
"""
|
|
472
|
+
if self.model is None:
|
|
473
|
+
raise ValueError("Model not fitted.")
|
|
474
|
+
|
|
475
|
+
classes = getattr(self.model, "classes_", None)
|
|
476
|
+
if classes is None:
|
|
477
|
+
return ["none", "rise", "fall"]
|
|
478
|
+
|
|
479
|
+
label_map = {
|
|
480
|
+
0: "none",
|
|
481
|
+
1: "rise",
|
|
482
|
+
2: "fall",
|
|
483
|
+
"none": "none",
|
|
484
|
+
"rise": "rise",
|
|
485
|
+
"fall": "fall",
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
return [label_map.get(c, str(c)) for c in classes]
|
|
489
|
+
|
|
490
|
+
def save(self, path: str | Path) -> None:
|
|
491
|
+
"""Save validator to file."""
|
|
492
|
+
path = Path(path)
|
|
493
|
+
|
|
494
|
+
state = {
|
|
495
|
+
"model": self.model,
|
|
496
|
+
"model_type": self.model_type,
|
|
497
|
+
"model_params": self.model_params,
|
|
498
|
+
"train_params": self.train_params,
|
|
499
|
+
"tune_params": self.tune_params,
|
|
500
|
+
"feature_columns": self.feature_columns,
|
|
501
|
+
"pair_col": self.pair_col,
|
|
502
|
+
"ts_col": self.ts_col,
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
with open(path, "wb") as f:
|
|
506
|
+
pickle.dump(state, f)
|
|
507
|
+
|
|
508
|
+
@classmethod
|
|
509
|
+
def load(cls, path: str | Path) -> "SklearnSignalValidator":
|
|
510
|
+
"""Load validator from file."""
|
|
511
|
+
path = Path(path)
|
|
512
|
+
|
|
513
|
+
with open(path, "rb") as f:
|
|
514
|
+
state = pickle.load(f)
|
|
515
|
+
|
|
516
|
+
validator = cls(
|
|
517
|
+
model=state["model"],
|
|
518
|
+
model_type=state["model_type"],
|
|
519
|
+
model_params=state["model_params"],
|
|
520
|
+
train_params=state["train_params"],
|
|
521
|
+
tune_params=state["tune_params"],
|
|
522
|
+
feature_columns=state["feature_columns"],
|
|
523
|
+
pair_col=state.get("pair_col", "pair"),
|
|
524
|
+
ts_col=state.get("ts_col", "timestamp"),
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
return validator
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: signalflow-trading
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: SignalFlow: modular framework for trading signal generation, validation and execution
|
|
5
|
+
Author: pathway2nothing
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 SignalFlow-Trading
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
Project-URL: Homepage, https://github.com/pathway2nothing/signalflow-trading
|
|
28
|
+
Project-URL: Source, https://github.com/pathway2nothing/signalflow-trading
|
|
29
|
+
Project-URL: Issues, https://github.com/pathway2nothing/signalflow-trading/issues
|
|
30
|
+
Requires-Python: >=3.12
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
License-File: LICENSE
|
|
33
|
+
Requires-Dist: pandas>=2.0.0
|
|
34
|
+
Requires-Dist: polars>=0.20.0
|
|
35
|
+
Requires-Dist: duckdb>=1.0.0
|
|
36
|
+
Requires-Dist: loguru>=0.7.0
|
|
37
|
+
Requires-Dist: aiohttp>=3.9.0
|
|
38
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
39
|
+
Requires-Dist: optuna>=3.0.0
|
|
40
|
+
Requires-Dist: numba>=0.58.0
|
|
41
|
+
Requires-Dist: lightning>=2.1.0
|
|
42
|
+
Requires-Dist: numpy>=1.24.0
|
|
43
|
+
Requires-Dist: pandas-ta>=0.3.14b
|
|
44
|
+
Provides-Extra: docs
|
|
45
|
+
Requires-Dist: mkdocs>=1.6; extra == "docs"
|
|
46
|
+
Requires-Dist: mkdocs-material>=9.5; extra == "docs"
|
|
47
|
+
Requires-Dist: mkdocstrings[python]>=0.25; extra == "docs"
|
|
48
|
+
Requires-Dist: pymdown-extensions>=10.0; extra == "docs"
|
|
49
|
+
Requires-Dist: mkdocs-minify-plugin>=0.7.0; extra == "docs"
|
|
50
|
+
Dynamic: license-file
|
|
51
|
+
|
|
52
|
+
# SignalFlow
|
|
53
|
+
|
|
54
|
+
**SignalFlow** is a high-performance Python framework for algorithmic trading, designed to manage the full strategy lifecycle from signal detection to execution. It bridges the gap between research and production by providing a robust pipeline for signal generation, meta-labeling validation, and automated trading.
|
|
55
|
+
|
|
56
|
+
## Core Architecture: The Signal Pipeline
|
|
57
|
+
|
|
58
|
+
The framework implements a modular three-stage processing logic:
|
|
59
|
+
|
|
60
|
+
1. **🕵️ Signal Detector**: Scans market data (OHLCV or tick) to identify potential market events. Detectors can range from simple SMA crossovers to complex deep learning models.
|
|
61
|
+
|
|
62
|
+
2. **⚖️ Signal Validator (Meta-Labeling)**: Based on Lopez de Prado's methodology, this stage assesses the quality and risk of detected signals using classification models (e.g., LightGBM, XGBoost).
|
|
63
|
+
|
|
64
|
+
3. **♟️ Trading Strategy**: Converts validated signals into actionable trade positions, managing entry, exit, and risk.
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
## Key Features
|
|
68
|
+
|
|
69
|
+
* **Polars-First Performance**: Core data processing utilizes `polars` for extreme efficiency with large datasets.
|
|
70
|
+
|
|
71
|
+
* **Production Ready**: Code written for research and backtesting is designed for direct deployment to live trading.
|
|
72
|
+
|
|
73
|
+
* **Advanced Labeling**: Native support for Triple-Barrier Method and Fixed-Horizon labeling for ML training.
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
* **Kedro Integration**: Fully compatible with Kedro for reproducible R&D and automated data pipelines.
|
|
77
|
+
|
|
78
|
+
* **Flexible Extensibility**: Easily add custom features via the `@sf_component` registry.
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
## Quick Start
|
|
82
|
+
|
|
83
|
+
### Installation
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
pip install signalflow-trading
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Signal Detection Example
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
from signalflow.core import RawDataView
|
|
94
|
+
from signalflow.detector import SmaCrossSignalDetector
|
|
95
|
+
|
|
96
|
+
# Initialize a detector (SMA 20/50 crossover)
|
|
97
|
+
detector = SmaCrossSignalDetector(fast_period=20, slow_period=50)
|
|
98
|
+
|
|
99
|
+
# Run detection on a data snapshot
|
|
100
|
+
signals = detector.run(raw_data_view)
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Signal Validation (Meta-Labeling)
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from signalflow.validator import SklearnSignalValidator
|
|
108
|
+
|
|
109
|
+
# Create a validator using LightGBM
|
|
110
|
+
validator = SklearnSignalValidator(model_type="lightgbm")
|
|
111
|
+
|
|
112
|
+
# Fit the model on labeled historical signals
|
|
113
|
+
validator.fit(X_train, y_train)
|
|
114
|
+
|
|
115
|
+
# Validate new signals to get success probabilities
|
|
116
|
+
validated_signals = validator.validate_signals(signals, features)
|
|
117
|
+
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Tech Stack
|
|
121
|
+
|
|
122
|
+
* **Data**: `polars`, `pandas`, `duckdb`.
|
|
123
|
+
|
|
124
|
+
* **ML/Compute**: `pytorch`, `lightning`, `scikit-learn`, `numba`, `optuna`.
|
|
125
|
+
|
|
126
|
+
* **Technical Analysis**: `pandas-ta`.
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
## Package Structure
|
|
130
|
+
|
|
131
|
+
* `signalflow.core`: Core data containers (`RawData`, `Signals`) and registries.
|
|
132
|
+
|
|
133
|
+
* `signalflow.data`: Binance API loaders and DuckDB storage.
|
|
134
|
+
|
|
135
|
+
* `signalflow.feature`: Feature extractors and technical indicator adapters.
|
|
136
|
+
|
|
137
|
+
* `signalflow.target`: Advanced labeling techniques for machine learning.
|
|
138
|
+
|
|
139
|
+
* `signalflow.detector`: Ready-to-use signal detection algorithms.
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
**License:** MIT
|
|
146
|
+
|
|
147
|
+
**Author:** pathway2nothing
|
|
148
|
+
|
|
149
|
+
---
|