signalflow-trading 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. signalflow/__init__.py +21 -0
  2. signalflow/analytics/__init__.py +0 -0
  3. signalflow/core/__init__.py +46 -0
  4. signalflow/core/base_mixin.py +232 -0
  5. signalflow/core/containers/__init__.py +21 -0
  6. signalflow/core/containers/order.py +216 -0
  7. signalflow/core/containers/portfolio.py +211 -0
  8. signalflow/core/containers/position.py +296 -0
  9. signalflow/core/containers/raw_data.py +167 -0
  10. signalflow/core/containers/raw_data_view.py +169 -0
  11. signalflow/core/containers/signals.py +198 -0
  12. signalflow/core/containers/strategy_state.py +147 -0
  13. signalflow/core/containers/trade.py +112 -0
  14. signalflow/core/decorators.py +103 -0
  15. signalflow/core/enums.py +270 -0
  16. signalflow/core/registry.py +322 -0
  17. signalflow/core/rolling_aggregator.py +362 -0
  18. signalflow/core/signal_transforms/__init__.py +5 -0
  19. signalflow/core/signal_transforms/base_signal_transform.py +186 -0
  20. signalflow/data/__init__.py +11 -0
  21. signalflow/data/raw_data_factory.py +225 -0
  22. signalflow/data/raw_store/__init__.py +7 -0
  23. signalflow/data/raw_store/base.py +271 -0
  24. signalflow/data/raw_store/duckdb_stores.py +696 -0
  25. signalflow/data/source/__init__.py +10 -0
  26. signalflow/data/source/base.py +300 -0
  27. signalflow/data/source/binance.py +442 -0
  28. signalflow/data/strategy_store/__init__.py +8 -0
  29. signalflow/data/strategy_store/base.py +278 -0
  30. signalflow/data/strategy_store/duckdb.py +409 -0
  31. signalflow/data/strategy_store/schema.py +36 -0
  32. signalflow/detector/__init__.py +7 -0
  33. signalflow/detector/adapter/__init__.py +5 -0
  34. signalflow/detector/adapter/pandas_detector.py +46 -0
  35. signalflow/detector/base.py +390 -0
  36. signalflow/detector/sma_cross.py +105 -0
  37. signalflow/feature/__init__.py +16 -0
  38. signalflow/feature/adapter/__init__.py +5 -0
  39. signalflow/feature/adapter/pandas_feature_extractor.py +54 -0
  40. signalflow/feature/base.py +330 -0
  41. signalflow/feature/feature_set.py +286 -0
  42. signalflow/feature/oscillator/__init__.py +5 -0
  43. signalflow/feature/oscillator/rsi_extractor.py +42 -0
  44. signalflow/feature/pandasta/__init__.py +10 -0
  45. signalflow/feature/pandasta/pandas_ta_extractor.py +141 -0
  46. signalflow/feature/pandasta/top_pandasta_extractors.py +64 -0
  47. signalflow/feature/smoother/__init__.py +5 -0
  48. signalflow/feature/smoother/sma_extractor.py +46 -0
  49. signalflow/strategy/__init__.py +9 -0
  50. signalflow/strategy/broker/__init__.py +15 -0
  51. signalflow/strategy/broker/backtest.py +172 -0
  52. signalflow/strategy/broker/base.py +186 -0
  53. signalflow/strategy/broker/executor/__init__.py +9 -0
  54. signalflow/strategy/broker/executor/base.py +35 -0
  55. signalflow/strategy/broker/executor/binance_spot.py +12 -0
  56. signalflow/strategy/broker/executor/virtual_spot.py +81 -0
  57. signalflow/strategy/broker/realtime_spot.py +12 -0
  58. signalflow/strategy/component/__init__.py +9 -0
  59. signalflow/strategy/component/base.py +65 -0
  60. signalflow/strategy/component/entry/__init__.py +7 -0
  61. signalflow/strategy/component/entry/fixed_size.py +57 -0
  62. signalflow/strategy/component/entry/signal.py +127 -0
  63. signalflow/strategy/component/exit/__init__.py +5 -0
  64. signalflow/strategy/component/exit/time_based.py +47 -0
  65. signalflow/strategy/component/exit/tp_sl.py +80 -0
  66. signalflow/strategy/component/metric/__init__.py +8 -0
  67. signalflow/strategy/component/metric/main_metrics.py +181 -0
  68. signalflow/strategy/runner/__init__.py +8 -0
  69. signalflow/strategy/runner/backtest_runner.py +208 -0
  70. signalflow/strategy/runner/base.py +19 -0
  71. signalflow/strategy/runner/optimized_backtest_runner.py +178 -0
  72. signalflow/strategy/runner/realtime_runner.py +0 -0
  73. signalflow/target/__init__.py +14 -0
  74. signalflow/target/adapter/__init__.py +5 -0
  75. signalflow/target/adapter/pandas_labeler.py +45 -0
  76. signalflow/target/base.py +409 -0
  77. signalflow/target/fixed_horizon_labeler.py +93 -0
  78. signalflow/target/static_triple_barrier.py +162 -0
  79. signalflow/target/triple_barrier.py +188 -0
  80. signalflow/utils/__init__.py +7 -0
  81. signalflow/utils/import_utils.py +11 -0
  82. signalflow/utils/tune_utils.py +19 -0
  83. signalflow/validator/__init__.py +6 -0
  84. signalflow/validator/base.py +139 -0
  85. signalflow/validator/sklearn_validator.py +527 -0
  86. signalflow_trading-0.2.1.dist-info/METADATA +149 -0
  87. signalflow_trading-0.2.1.dist-info/RECORD +90 -0
  88. signalflow_trading-0.2.1.dist-info/WHEEL +5 -0
  89. signalflow_trading-0.2.1.dist-info/licenses/LICENSE +21 -0
  90. signalflow_trading-0.2.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,527 @@
1
+ # IMPORTANT
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+ from pathlib import Path
6
+ import pickle
7
+
8
+ import numpy as np
9
+ import polars as pl
10
+
11
+ from signalflow.core import sf_component, Signals
12
+ from signalflow.utils import import_model_class, build_optuna_params
13
+ from signalflow.validator.base import SignalValidator
14
+
15
+ SKLEARN_MODELS: dict[str, dict[str, Any]] = {
16
+ "lightgbm": {
17
+ "class": "lightgbm.LGBMClassifier",
18
+ "default_params": {
19
+ "n_estimators": 100,
20
+ "max_depth": 6,
21
+ "learning_rate": 0.1,
22
+ "num_leaves": 31,
23
+ "min_child_samples": 20,
24
+ "subsample": 0.8,
25
+ "colsample_bytree": 0.8,
26
+ "random_state": 42,
27
+ "n_jobs": -1,
28
+ "verbosity": -1,
29
+ },
30
+ "tune_space": {
31
+ "n_estimators": ("int", 50, 500),
32
+ "max_depth": ("int", 3, 12),
33
+ "learning_rate": ("log_float", 0.01, 0.3),
34
+ "num_leaves": ("int", 15, 127),
35
+ "min_child_samples": ("int", 5, 100),
36
+ "subsample": ("float", 0.6, 1.0),
37
+ "colsample_bytree": ("float", 0.6, 1.0),
38
+ },
39
+ },
40
+ "xgboost": {
41
+ "class": "xgboost.XGBClassifier",
42
+ "default_params": {
43
+ "n_estimators": 100,
44
+ "max_depth": 6,
45
+ "learning_rate": 0.1,
46
+ "subsample": 0.8,
47
+ "colsample_bytree": 0.8,
48
+ "random_state": 42,
49
+ "n_jobs": -1,
50
+ "verbosity": 0,
51
+ "use_label_encoder": False,
52
+ "eval_metric": "logloss",
53
+ },
54
+ "tune_space": {
55
+ "n_estimators": ("int", 50, 500),
56
+ "max_depth": ("int", 3, 12),
57
+ "learning_rate": ("log_float", 0.01, 0.3),
58
+ "subsample": ("float", 0.6, 1.0),
59
+ "colsample_bytree": ("float", 0.6, 1.0),
60
+ "min_child_weight": ("int", 1, 10),
61
+ "gamma": ("float", 0, 0.5),
62
+ },
63
+ },
64
+ "random_forest": {
65
+ "class": "sklearn.ensemble.RandomForestClassifier",
66
+ "default_params": {
67
+ "n_estimators": 100,
68
+ "max_depth": 10,
69
+ "min_samples_split": 5,
70
+ "min_samples_leaf": 2,
71
+ "max_features": "sqrt",
72
+ "random_state": 42,
73
+ "n_jobs": -1,
74
+ },
75
+ "tune_space": {
76
+ "n_estimators": ("int", 50, 300),
77
+ "max_depth": ("int", 5, 30),
78
+ "min_samples_split": ("int", 2, 20),
79
+ "min_samples_leaf": ("int", 1, 10),
80
+ },
81
+ },
82
+ "logistic_regression": {
83
+ "class": "sklearn.linear_model.LogisticRegression",
84
+ "default_params": {
85
+ "C": 1.0,
86
+ "max_iter": 1000,
87
+ "random_state": 42,
88
+ "n_jobs": -1,
89
+ },
90
+ "tune_space": {
91
+ "C": ("log_float", 1e-4, 100),
92
+ "penalty": ("categorical", ["l1", "l2"]),
93
+ "solver": ("categorical", ["saga"]),
94
+ },
95
+ },
96
+ "svm": {
97
+ "class": "sklearn.svm.SVC",
98
+ "default_params": {
99
+ "C": 1.0,
100
+ "kernel": "rbf",
101
+ "probability": True,
102
+ "random_state": 42,
103
+ },
104
+ "tune_space": {
105
+ "C": ("log_float", 1e-3, 100),
106
+ "kernel": ("categorical", ["rbf", "linear", "poly"]),
107
+ "gamma": ("categorical", ["scale", "auto"]),
108
+ },
109
+ },
110
+ }
111
+
112
+ AUTO_SELECT_MODELS = ["lightgbm", "xgboost", "random_forest", "logistic_regression"]
113
+
114
+
115
+ @dataclass
116
+ @sf_component(name="sklearn")
117
+ class SklearnSignalValidator(SignalValidator):
118
+ """Sklearn-based signal validator.
119
+
120
+ Supports:
121
+ - Multiple sklearn-compatible models (LightGBM, XGBoost, RF, etc.)
122
+ - Automatic model selection via cross-validation
123
+ - Hyperparameter tuning with Optuna
124
+ - Early stopping for boosting models
125
+
126
+ Note: Filter data to active signals (not NONE) BEFORE calling fit().
127
+ This gives you full control over data preparation.
128
+
129
+ Example:
130
+ >>> # Prepare data - filter to active signals
131
+ >>> df = df.filter(pl.col("signal_type") != "none")
132
+ >>>
133
+ >>> validator = SklearnSignalValidator(model_type="lightgbm")
134
+ >>> validator.fit(
135
+ ... train_df.select(["pair", "timestamp"] + feature_cols),
136
+ ... train_df.select("label"),
137
+ ... )
138
+ >>>
139
+ >>> # validate_signals returns Signals object
140
+ >>> validated = validator.validate_signals(
141
+ ... Signals(test_df.select(signal_cols)),
142
+ ... test_df.select(["pair", "timestamp"] + feature_cols),
143
+ ... )
144
+ >>> validated.value.filter(pl.col("probability_rise") > 0.7)
145
+ """
146
+
147
+ auto_select_metric: str = "roc_auc"
148
+ auto_select_cv_folds: int = 5
149
+
150
+ def __post_init__(self) -> None:
151
+ if self.model_params is None:
152
+ self.model_params = {}
153
+ if self.train_params is None:
154
+ self.train_params = {}
155
+ if self.tune_params is None:
156
+ self.tune_params = {"n_trials": 50, "cv_folds": 5, "timeout": 600}
157
+
158
+ def _get_model_config(self, model_type: str) -> dict[str, Any]:
159
+ """Get model configuration from catalog."""
160
+ if model_type not in SKLEARN_MODELS:
161
+ available = ", ".join(SKLEARN_MODELS.keys())
162
+ raise ValueError(f"Unknown model_type: {model_type}. Available: {available}")
163
+ return SKLEARN_MODELS[model_type]
164
+
165
+ def _create_model(self, model_type: str, params: dict | None = None) -> Any:
166
+ """Create model instance."""
167
+ config = self._get_model_config(model_type)
168
+ model_class = import_model_class(config["class"])
169
+
170
+ final_params = {**config["default_params"]}
171
+ if params:
172
+ final_params.update(params)
173
+
174
+ return model_class(**final_params)
175
+
176
+ def _extract_features(
177
+ self,
178
+ X: pl.DataFrame,
179
+ fit_mode: bool = False,
180
+ ) -> np.ndarray:
181
+ """Extract feature matrix from DataFrame.
182
+
183
+ Args:
184
+ X: Input DataFrame
185
+ fit_mode: If True, infer and store feature columns
186
+
187
+ Returns:
188
+ Feature matrix as numpy array
189
+ """
190
+ exclude_cols = {self.pair_col, self.ts_col}
191
+
192
+ if fit_mode:
193
+ self.feature_columns = [c for c in X.columns if c not in exclude_cols]
194
+
195
+ if self.feature_columns is None:
196
+ raise ValueError("feature_columns not set. Call fit() first.")
197
+
198
+ missing = set(self.feature_columns) - set(X.columns)
199
+ if missing:
200
+ raise ValueError(f"Missing feature columns: {sorted(missing)}")
201
+
202
+ return X.select(self.feature_columns).to_numpy()
203
+
204
+ def _extract_labels(self, y: pl.DataFrame | pl.Series) -> np.ndarray:
205
+ """Extract label array."""
206
+ if isinstance(y, pl.DataFrame):
207
+ if y.width == 1:
208
+ return y.to_numpy().ravel()
209
+ elif "label" in y.columns:
210
+ return y["label"].to_numpy()
211
+ else:
212
+ raise ValueError("y DataFrame must have single column or 'label' column")
213
+ return y.to_numpy()
214
+
215
+ def _auto_select_model(
216
+ self,
217
+ X: np.ndarray,
218
+ y: np.ndarray,
219
+ ) -> tuple[str, dict]:
220
+ """Select best model using cross-validation.
221
+
222
+ Returns:
223
+ Tuple of (best_model_type, best_params)
224
+ """
225
+ from sklearn.model_selection import cross_val_score
226
+
227
+ best_score = -np.inf
228
+ best_model_type = None
229
+ best_params = None
230
+
231
+ for model_type in AUTO_SELECT_MODELS:
232
+ try:
233
+ config = self._get_model_config(model_type)
234
+ model = self._create_model(model_type)
235
+
236
+ scores = cross_val_score(
237
+ model, X, y,
238
+ cv=self.auto_select_cv_folds,
239
+ scoring=self.auto_select_metric,
240
+ n_jobs=-1,
241
+ )
242
+ mean_score = scores.mean()
243
+
244
+ if mean_score > best_score:
245
+ best_score = mean_score
246
+ best_model_type = model_type
247
+ best_params = config["default_params"].copy()
248
+
249
+ except ImportError:
250
+ continue
251
+ except Exception:
252
+ continue
253
+
254
+ if best_model_type is None:
255
+ raise RuntimeError("No suitable model found. Install lightgbm, xgboost, or scikit-learn.")
256
+
257
+ return best_model_type, best_params
258
+
259
+ def fit(
260
+ self,
261
+ X_train: pl.DataFrame,
262
+ y_train: pl.DataFrame | pl.Series,
263
+ X_val: pl.DataFrame | None = None,
264
+ y_val: pl.DataFrame | pl.Series | None = None,
265
+ ) -> "SklearnSignalValidator":
266
+ """Train the validator.
267
+
268
+ Note: Filter to active signals BEFORE calling this method.
269
+
270
+ For boosting models with validation data, early stopping is applied.
271
+
272
+ Args:
273
+ X_train: Training features (already filtered to active signals)
274
+ y_train: Training labels
275
+ X_val: Validation features (optional)
276
+ y_val: Validation labels (optional)
277
+
278
+ Returns:
279
+ Self for method chaining
280
+ """
281
+ X_np = self._extract_features(X_train, fit_mode=True)
282
+ y_np = self._extract_labels(y_train)
283
+
284
+ if self.model_type == "auto" or self.model_type is None:
285
+ self.model_type, self.model_params = self._auto_select_model(X_np, y_np)
286
+
287
+ self.model = self._create_model(self.model_type, self.model_params)
288
+
289
+ fit_kwargs: dict[str, Any] = {}
290
+
291
+ if X_val is not None and y_val is not None:
292
+ X_val_np = self._extract_features(X_val)
293
+ y_val_np = self._extract_labels(y_val)
294
+
295
+ if self.model_type in ("lightgbm", "xgboost"):
296
+ early_stopping = self.train_params.get("early_stopping_rounds", 50)
297
+
298
+ if self.model_type == "lightgbm":
299
+ fit_kwargs["eval_set"] = [(X_val_np, y_val_np)]
300
+ fit_kwargs["callbacks"] = [
301
+ __import__("lightgbm").early_stopping(early_stopping, verbose=False)
302
+ ]
303
+ elif self.model_type == "xgboost":
304
+ fit_kwargs["eval_set"] = [(X_val_np, y_val_np)]
305
+ fit_kwargs["early_stopping_rounds"] = early_stopping
306
+ fit_kwargs["verbose"] = False
307
+
308
+ self.model.fit(X_np, y_np, **fit_kwargs)
309
+
310
+ return self
311
+
312
+ def tune(
313
+ self,
314
+ X_train: pl.DataFrame,
315
+ y_train: pl.DataFrame | pl.Series,
316
+ X_val: pl.DataFrame | None = None,
317
+ y_val: pl.DataFrame | pl.Series | None = None,
318
+ ) -> dict[str, Any]:
319
+ """Tune hyperparameters using Optuna.
320
+
321
+ Note: Filter to active signals BEFORE calling this method.
322
+
323
+ Returns:
324
+ Best parameters found
325
+ """
326
+ import optuna
327
+ from sklearn.model_selection import cross_val_score
328
+
329
+ if self.model_type is None or self.model_type == "auto":
330
+ raise ValueError("Set model_type before tuning (not 'auto')")
331
+
332
+ config = self._get_model_config(self.model_type)
333
+ tune_space = config["tune_space"]
334
+
335
+ X_np = self._extract_features(X_train, fit_mode=True)
336
+ y_np = self._extract_labels(y_train)
337
+
338
+ n_trials = self.tune_params.get("n_trials", 50)
339
+ cv_folds = self.tune_params.get("cv_folds", 5)
340
+ timeout = self.tune_params.get("timeout", 600)
341
+
342
+ def objective(trial: optuna.Trial) -> float:
343
+ params = build_optuna_params(trial, tune_space)
344
+ params.update(config["default_params"]) # Base params
345
+
346
+ model = self._create_model(self.model_type, params)
347
+
348
+ scores = cross_val_score(
349
+ model, X_np, y_np,
350
+ cv=cv_folds,
351
+ scoring=self.auto_select_metric,
352
+ n_jobs=-1,
353
+ )
354
+ return scores.mean()
355
+
356
+ study = optuna.create_study(direction="maximize")
357
+ study.optimize(
358
+ objective,
359
+ n_trials=n_trials,
360
+ timeout=timeout,
361
+ show_progress_bar=True,
362
+ )
363
+
364
+ best_params = {**config["default_params"], **study.best_params}
365
+ self.model_params = best_params
366
+
367
+ return best_params
368
+
369
+ def predict(self, signals: Signals, X: pl.DataFrame) -> Signals:
370
+ """Predict class labels and return updated Signals.
371
+
372
+ Args:
373
+ signals: Input signals container
374
+ X: Features DataFrame with (pair, timestamp) + feature columns
375
+
376
+ Returns:
377
+ New Signals with 'validation_pred' column added
378
+ """
379
+ if self.model is None:
380
+ raise ValueError("Model not fitted. Call fit() first.")
381
+
382
+ signals_df = signals.value
383
+
384
+ # Join features to signals by keys
385
+ X_matched = signals_df.select([self.pair_col, self.ts_col]).join(
386
+ X,
387
+ on=[self.pair_col, self.ts_col],
388
+ how="left",
389
+ )
390
+
391
+ X_np = self._extract_features(X_matched)
392
+ predictions = self.model.predict(X_np)
393
+
394
+ result_df = signals_df.with_columns(
395
+ pl.Series(name="validation_pred", values=predictions)
396
+ )
397
+
398
+ return Signals(result_df)
399
+
400
+ def predict_proba(self, signals: Signals, X: pl.DataFrame) -> Signals:
401
+ """Predict class probabilities and return updated Signals.
402
+
403
+ Args:
404
+ signals: Input signals container
405
+ X: Features DataFrame with (pair, timestamp) + feature columns
406
+
407
+ Returns:
408
+ New Signals with probability columns (probability_none, probability_rise, probability_fall)
409
+ """
410
+ if self.model is None:
411
+ raise ValueError("Model not fitted. Call fit() first.")
412
+
413
+ signals_df = signals.value
414
+ classes = self._get_class_labels()
415
+
416
+ # Join features to signals by keys
417
+ X_matched = signals_df.select([self.pair_col, self.ts_col]).join(
418
+ X,
419
+ on=[self.pair_col, self.ts_col],
420
+ how="left",
421
+ )
422
+
423
+ X_np = self._extract_features(X_matched)
424
+ probas = self.model.predict_proba(X_np)
425
+
426
+ # Add probability columns
427
+ result_df = signals_df
428
+ for i, class_label in enumerate(classes):
429
+ col_name = f"probability_{class_label}"
430
+ result_df = result_df.with_columns(
431
+ pl.Series(name=col_name, values=probas[:, i])
432
+ )
433
+
434
+ return Signals(result_df)
435
+
436
+ def validate_signals(
437
+ self,
438
+ signals: Signals,
439
+ features: pl.DataFrame,
440
+ prefix: str = "probability_",
441
+ ) -> Signals:
442
+ """Add validation probabilities to signals.
443
+
444
+ Adds probability columns for each class:
445
+ - probability_none: P(signal is noise / not actionable)
446
+ - probability_rise: P(signal leads to price rise)
447
+ - probability_fall: P(signal leads to price fall)
448
+
449
+ Args:
450
+ signals: Input Signals container
451
+ features: Features DataFrame with (pair, timestamp) + features
452
+ prefix: Prefix for probability columns (default: "probability_")
453
+
454
+ Returns:
455
+ New Signals with probability columns added.
456
+
457
+ Example:
458
+ >>> validated = validator.validate_signals(signals, features)
459
+ >>> df = validated.value
460
+ >>> confident_rise = df.filter(
461
+ ... (pl.col("signal_type") == "rise") &
462
+ ... (pl.col("probability_rise") > 0.7)
463
+ ... )
464
+ """
465
+ return self.predict_proba(signals, features)
466
+
467
+ def _get_class_labels(self) -> list[str]:
468
+ """Get class labels for probability columns.
469
+
470
+ Maps numeric classes to SignalType names.
471
+ """
472
+ if self.model is None:
473
+ raise ValueError("Model not fitted.")
474
+
475
+ classes = getattr(self.model, "classes_", None)
476
+ if classes is None:
477
+ return ["none", "rise", "fall"]
478
+
479
+ label_map = {
480
+ 0: "none",
481
+ 1: "rise",
482
+ 2: "fall",
483
+ "none": "none",
484
+ "rise": "rise",
485
+ "fall": "fall",
486
+ }
487
+
488
+ return [label_map.get(c, str(c)) for c in classes]
489
+
490
+ def save(self, path: str | Path) -> None:
491
+ """Save validator to file."""
492
+ path = Path(path)
493
+
494
+ state = {
495
+ "model": self.model,
496
+ "model_type": self.model_type,
497
+ "model_params": self.model_params,
498
+ "train_params": self.train_params,
499
+ "tune_params": self.tune_params,
500
+ "feature_columns": self.feature_columns,
501
+ "pair_col": self.pair_col,
502
+ "ts_col": self.ts_col,
503
+ }
504
+
505
+ with open(path, "wb") as f:
506
+ pickle.dump(state, f)
507
+
508
+ @classmethod
509
+ def load(cls, path: str | Path) -> "SklearnSignalValidator":
510
+ """Load validator from file."""
511
+ path = Path(path)
512
+
513
+ with open(path, "rb") as f:
514
+ state = pickle.load(f)
515
+
516
+ validator = cls(
517
+ model=state["model"],
518
+ model_type=state["model_type"],
519
+ model_params=state["model_params"],
520
+ train_params=state["train_params"],
521
+ tune_params=state["tune_params"],
522
+ feature_columns=state["feature_columns"],
523
+ pair_col=state.get("pair_col", "pair"),
524
+ ts_col=state.get("ts_col", "timestamp"),
525
+ )
526
+
527
+ return validator
@@ -0,0 +1,149 @@
1
+ Metadata-Version: 2.4
2
+ Name: signalflow-trading
3
+ Version: 0.2.1
4
+ Summary: SignalFlow: modular framework for trading signal generation, validation and execution
5
+ Author: pathway2nothing
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 SignalFlow-Trading
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ Project-URL: Homepage, https://github.com/pathway2nothing/signalflow-trading
28
+ Project-URL: Source, https://github.com/pathway2nothing/signalflow-trading
29
+ Project-URL: Issues, https://github.com/pathway2nothing/signalflow-trading/issues
30
+ Requires-Python: >=3.12
31
+ Description-Content-Type: text/markdown
32
+ License-File: LICENSE
33
+ Requires-Dist: pandas>=2.0.0
34
+ Requires-Dist: polars>=0.20.0
35
+ Requires-Dist: duckdb>=1.0.0
36
+ Requires-Dist: loguru>=0.7.0
37
+ Requires-Dist: aiohttp>=3.9.0
38
+ Requires-Dist: scikit-learn>=1.3.0
39
+ Requires-Dist: optuna>=3.0.0
40
+ Requires-Dist: numba>=0.58.0
41
+ Requires-Dist: lightning>=2.1.0
42
+ Requires-Dist: numpy>=1.24.0
43
+ Requires-Dist: pandas-ta>=0.3.14b
44
+ Provides-Extra: docs
45
+ Requires-Dist: mkdocs>=1.6; extra == "docs"
46
+ Requires-Dist: mkdocs-material>=9.5; extra == "docs"
47
+ Requires-Dist: mkdocstrings[python]>=0.25; extra == "docs"
48
+ Requires-Dist: pymdown-extensions>=10.0; extra == "docs"
49
+ Requires-Dist: mkdocs-minify-plugin>=0.7.0; extra == "docs"
50
+ Dynamic: license-file
51
+
52
+ # SignalFlow
53
+
54
+ **SignalFlow** is a high-performance Python framework for algorithmic trading, designed to manage the full strategy lifecycle from signal detection to execution. It bridges the gap between research and production by providing a robust pipeline for signal generation, meta-labeling validation, and automated trading.
55
+
56
+ ## Core Architecture: The Signal Pipeline
57
+
58
+ The framework implements a modular three-stage processing logic:
59
+
60
+ 1. **🕵️ Signal Detector**: Scans market data (OHLCV or tick) to identify potential market events. Detectors can range from simple SMA crossovers to complex deep learning models.
61
+
62
+ 2. **⚖️ Signal Validator (Meta-Labeling)**: Based on Lopez de Prado's methodology, this stage assesses the quality and risk of detected signals using classification models (e.g., LightGBM, XGBoost).
63
+
64
+ 3. **♟️ Trading Strategy**: Converts validated signals into actionable trade positions, managing entry, exit, and risk.
65
+
66
+
67
+ ## Key Features
68
+
69
+ * **Polars-First Performance**: Core data processing utilizes `polars` for extreme efficiency with large datasets.
70
+
71
+ * **Production Ready**: Code written for research and backtesting is designed for direct deployment to live trading.
72
+
73
+ * **Advanced Labeling**: Native support for Triple-Barrier Method and Fixed-Horizon labeling for ML training.
74
+
75
+
76
+ * **Kedro Integration**: Fully compatible with Kedro for reproducible R&D and automated data pipelines.
77
+
78
+ * **Flexible Extensibility**: Easily add custom features via the `@sf_component` registry.
79
+
80
+
81
+ ## Quick Start
82
+
83
+ ### Installation
84
+
85
+ ```bash
86
+ pip install signalflow-trading
87
+
88
+ ```
89
+
90
+ ### Signal Detection Example
91
+
92
+ ```python
93
+ from signalflow.core import RawDataView
94
+ from signalflow.detector import SmaCrossSignalDetector
95
+
96
+ # Initialize a detector (SMA 20/50 crossover)
97
+ detector = SmaCrossSignalDetector(fast_period=20, slow_period=50)
98
+
99
+ # Run detection on a data snapshot
100
+ signals = detector.run(raw_data_view)
101
+
102
+ ```
103
+
104
+ ### Signal Validation (Meta-Labeling)
105
+
106
+ ```python
107
+ from signalflow.validator import SklearnSignalValidator
108
+
109
+ # Create a validator using LightGBM
110
+ validator = SklearnSignalValidator(model_type="lightgbm")
111
+
112
+ # Fit the model on labeled historical signals
113
+ validator.fit(X_train, y_train)
114
+
115
+ # Validate new signals to get success probabilities
116
+ validated_signals = validator.validate_signals(signals, features)
117
+
118
+ ```
119
+
120
+ ## Tech Stack
121
+
122
+ * **Data**: `polars`, `pandas`, `duckdb`.
123
+
124
+ * **ML/Compute**: `pytorch`, `lightning`, `scikit-learn`, `numba`, `optuna`.
125
+
126
+ * **Technical Analysis**: `pandas-ta`.
127
+
128
+
129
+ ## Package Structure
130
+
131
+ * `signalflow.core`: Core data containers (`RawData`, `Signals`) and registries.
132
+
133
+ * `signalflow.data`: Binance API loaders and DuckDB storage.
134
+
135
+ * `signalflow.feature`: Feature extractors and technical indicator adapters.
136
+
137
+ * `signalflow.target`: Advanced labeling techniques for machine learning.
138
+
139
+ * `signalflow.detector`: Ready-to-use signal detection algorithms.
140
+
141
+
142
+
143
+ ---
144
+
145
+ **License:** MIT
146
+
147
+ **Author:** pathway2nothing
148
+
149
+ ---