signalflow-trading 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- signalflow/__init__.py +21 -0
- signalflow/analytics/__init__.py +0 -0
- signalflow/core/__init__.py +46 -0
- signalflow/core/base_mixin.py +232 -0
- signalflow/core/containers/__init__.py +21 -0
- signalflow/core/containers/order.py +216 -0
- signalflow/core/containers/portfolio.py +211 -0
- signalflow/core/containers/position.py +296 -0
- signalflow/core/containers/raw_data.py +167 -0
- signalflow/core/containers/raw_data_view.py +169 -0
- signalflow/core/containers/signals.py +198 -0
- signalflow/core/containers/strategy_state.py +147 -0
- signalflow/core/containers/trade.py +112 -0
- signalflow/core/decorators.py +103 -0
- signalflow/core/enums.py +270 -0
- signalflow/core/registry.py +322 -0
- signalflow/core/rolling_aggregator.py +362 -0
- signalflow/core/signal_transforms/__init__.py +5 -0
- signalflow/core/signal_transforms/base_signal_transform.py +186 -0
- signalflow/data/__init__.py +11 -0
- signalflow/data/raw_data_factory.py +225 -0
- signalflow/data/raw_store/__init__.py +7 -0
- signalflow/data/raw_store/base.py +271 -0
- signalflow/data/raw_store/duckdb_stores.py +696 -0
- signalflow/data/source/__init__.py +10 -0
- signalflow/data/source/base.py +300 -0
- signalflow/data/source/binance.py +442 -0
- signalflow/data/strategy_store/__init__.py +8 -0
- signalflow/data/strategy_store/base.py +278 -0
- signalflow/data/strategy_store/duckdb.py +409 -0
- signalflow/data/strategy_store/schema.py +36 -0
- signalflow/detector/__init__.py +7 -0
- signalflow/detector/adapter/__init__.py +5 -0
- signalflow/detector/adapter/pandas_detector.py +46 -0
- signalflow/detector/base.py +390 -0
- signalflow/detector/sma_cross.py +105 -0
- signalflow/feature/__init__.py +16 -0
- signalflow/feature/adapter/__init__.py +5 -0
- signalflow/feature/adapter/pandas_feature_extractor.py +54 -0
- signalflow/feature/base.py +330 -0
- signalflow/feature/feature_set.py +286 -0
- signalflow/feature/oscillator/__init__.py +5 -0
- signalflow/feature/oscillator/rsi_extractor.py +42 -0
- signalflow/feature/pandasta/__init__.py +10 -0
- signalflow/feature/pandasta/pandas_ta_extractor.py +141 -0
- signalflow/feature/pandasta/top_pandasta_extractors.py +64 -0
- signalflow/feature/smoother/__init__.py +5 -0
- signalflow/feature/smoother/sma_extractor.py +46 -0
- signalflow/strategy/__init__.py +9 -0
- signalflow/strategy/broker/__init__.py +15 -0
- signalflow/strategy/broker/backtest.py +172 -0
- signalflow/strategy/broker/base.py +186 -0
- signalflow/strategy/broker/executor/__init__.py +9 -0
- signalflow/strategy/broker/executor/base.py +35 -0
- signalflow/strategy/broker/executor/binance_spot.py +12 -0
- signalflow/strategy/broker/executor/virtual_spot.py +81 -0
- signalflow/strategy/broker/realtime_spot.py +12 -0
- signalflow/strategy/component/__init__.py +9 -0
- signalflow/strategy/component/base.py +65 -0
- signalflow/strategy/component/entry/__init__.py +7 -0
- signalflow/strategy/component/entry/fixed_size.py +57 -0
- signalflow/strategy/component/entry/signal.py +127 -0
- signalflow/strategy/component/exit/__init__.py +5 -0
- signalflow/strategy/component/exit/time_based.py +47 -0
- signalflow/strategy/component/exit/tp_sl.py +80 -0
- signalflow/strategy/component/metric/__init__.py +8 -0
- signalflow/strategy/component/metric/main_metrics.py +181 -0
- signalflow/strategy/runner/__init__.py +8 -0
- signalflow/strategy/runner/backtest_runner.py +208 -0
- signalflow/strategy/runner/base.py +19 -0
- signalflow/strategy/runner/optimized_backtest_runner.py +178 -0
- signalflow/strategy/runner/realtime_runner.py +0 -0
- signalflow/target/__init__.py +14 -0
- signalflow/target/adapter/__init__.py +5 -0
- signalflow/target/adapter/pandas_labeler.py +45 -0
- signalflow/target/base.py +409 -0
- signalflow/target/fixed_horizon_labeler.py +93 -0
- signalflow/target/static_triple_barrier.py +162 -0
- signalflow/target/triple_barrier.py +188 -0
- signalflow/utils/__init__.py +7 -0
- signalflow/utils/import_utils.py +11 -0
- signalflow/utils/tune_utils.py +19 -0
- signalflow/validator/__init__.py +6 -0
- signalflow/validator/base.py +139 -0
- signalflow/validator/sklearn_validator.py +527 -0
- signalflow_trading-0.2.1.dist-info/METADATA +149 -0
- signalflow_trading-0.2.1.dist-info/RECORD +90 -0
- signalflow_trading-0.2.1.dist-info/WHEEL +5 -0
- signalflow_trading-0.2.1.dist-info/licenses/LICENSE +21 -0
- signalflow_trading-0.2.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# src/signalflow/data/strategy_store/schema.py
|
|
2
|
+
SCHEMA_SQL = """
|
|
3
|
+
CREATE TABLE IF NOT EXISTS strategy_state (
|
|
4
|
+
strategy_id TEXT PRIMARY KEY,
|
|
5
|
+
last_ts TIMESTAMP,
|
|
6
|
+
last_event_id TEXT,
|
|
7
|
+
payload_json TEXT NOT NULL
|
|
8
|
+
);
|
|
9
|
+
|
|
10
|
+
CREATE TABLE IF NOT EXISTS positions (
|
|
11
|
+
strategy_id TEXT NOT NULL,
|
|
12
|
+
ts TIMESTAMP NOT NULL,
|
|
13
|
+
position_id TEXT NOT NULL,
|
|
14
|
+
payload_json TEXT NOT NULL,
|
|
15
|
+
PRIMARY KEY (strategy_id, ts, position_id)
|
|
16
|
+
);
|
|
17
|
+
|
|
18
|
+
CREATE TABLE IF NOT EXISTS trades (
|
|
19
|
+
strategy_id TEXT NOT NULL,
|
|
20
|
+
ts TIMESTAMP NOT NULL,
|
|
21
|
+
trade_id TEXT NOT NULL,
|
|
22
|
+
payload_json TEXT NOT NULL,
|
|
23
|
+
PRIMARY KEY (strategy_id, trade_id)
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
CREATE TABLE IF NOT EXISTS metrics (
|
|
27
|
+
strategy_id TEXT NOT NULL,
|
|
28
|
+
ts TIMESTAMP NOT NULL,
|
|
29
|
+
name TEXT NOT NULL,
|
|
30
|
+
value DOUBLE NOT NULL,
|
|
31
|
+
PRIMARY KEY (strategy_id, ts, name)
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
CREATE INDEX IF NOT EXISTS idx_metrics_strategy_ts ON metrics(strategy_id, ts);
|
|
35
|
+
CREATE INDEX IF NOT EXISTS idx_positions_strategy_ts ON positions(strategy_id, ts);
|
|
36
|
+
"""
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import abstractmethod
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import polars as pl
|
|
9
|
+
|
|
10
|
+
from signalflow.core import Signals
|
|
11
|
+
from signalflow.detector.base_detector import SignalDetector
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class PandasSignalDetector(SignalDetector):
|
|
16
|
+
"""
|
|
17
|
+
Adapter: pandas-based detector logic, polars-first public interface.
|
|
18
|
+
|
|
19
|
+
Rule:
|
|
20
|
+
- preprocess() still returns pl.DataFrame (from FeatureSet or overridden)
|
|
21
|
+
- detect(pl.DataFrame) converts to pandas -> detect_pd() -> back to pl -> Signals
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def detect(self, features: pl.DataFrame, context: dict[str, Any] | None = None) -> Signals:
|
|
25
|
+
if not isinstance(features, pl.DataFrame):
|
|
26
|
+
raise TypeError(f"{self.__class__.__name__}.detect expects pl.DataFrame, got {type(features)}")
|
|
27
|
+
|
|
28
|
+
pdf = features.to_pandas()
|
|
29
|
+
out_pd = self.detect_pd(pdf, context=context)
|
|
30
|
+
|
|
31
|
+
if not isinstance(out_pd, pd.DataFrame):
|
|
32
|
+
raise TypeError(f"{self.__class__.__name__}.detect_pd must return pd.DataFrame, got {type(out_pd)}")
|
|
33
|
+
|
|
34
|
+
out_pl = pl.from_pandas(out_pd, include_index=False)
|
|
35
|
+
out_pl = self._normalize_index(out_pl)
|
|
36
|
+
return Signals(out_pl)
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def detect_pd(self, features: pd.DataFrame, context: dict[str, Any] | None = None) -> pd.DataFrame:
|
|
40
|
+
"""
|
|
41
|
+
Pandas detection implementation.
|
|
42
|
+
|
|
43
|
+
Must return a DataFrame with at least:
|
|
44
|
+
- pair, timestamp, signal_type
|
|
45
|
+
"""
|
|
46
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, ClassVar
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
|
|
9
|
+
from signalflow.core import RawDataView, Signals, SfComponentType, SignalType, RawDataType
|
|
10
|
+
from signalflow.feature import FeatureSet
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class SignalDetector(ABC):
|
|
15
|
+
"""Base class for Polars-first signal detection.
|
|
16
|
+
|
|
17
|
+
Provides standardized pipeline for detecting trading signals from raw data:
|
|
18
|
+
1. preprocess: Extract features from raw data
|
|
19
|
+
2. detect: Generate signals from features
|
|
20
|
+
3. validate: Ensure data quality
|
|
21
|
+
|
|
22
|
+
Key features:
|
|
23
|
+
- Polars-native for performance
|
|
24
|
+
- Automatic feature extraction via FeatureSet
|
|
25
|
+
- Built-in validation (schema, duplicates, timezones)
|
|
26
|
+
- Optional probability requirement
|
|
27
|
+
- Keep latest signal per pair option
|
|
28
|
+
|
|
29
|
+
Public API:
|
|
30
|
+
- run(): Complete pipeline (preprocess → detect → validate)
|
|
31
|
+
- preprocess(): Feature extraction (delegates to FeatureSet)
|
|
32
|
+
- detect(): Signal generation (must implement)
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
component_type (ClassVar[SfComponentType]): Always DETECTOR for registry.
|
|
36
|
+
pair_col (str): Trading pair column name. Default: "pair".
|
|
37
|
+
ts_col (str): Timestamp column name. Default: "timestamp".
|
|
38
|
+
raw_data_type (RawDataType): Type of raw data to process. Default: SPOT.
|
|
39
|
+
feature_set (FeatureSet | None): Feature extractor. Default: None.
|
|
40
|
+
require_probability (bool): Require probability column in signals. Default: False.
|
|
41
|
+
keep_only_latest_per_pair (bool): Keep only latest signal per pair. Default: False.
|
|
42
|
+
|
|
43
|
+
Example:
|
|
44
|
+
```python
|
|
45
|
+
from signalflow.detector import SignalDetector
|
|
46
|
+
from signalflow.core import Signals, SignalType
|
|
47
|
+
import polars as pl
|
|
48
|
+
|
|
49
|
+
class SmaCrossDetector(SignalDetector):
|
|
50
|
+
'''Simple SMA crossover detector'''
|
|
51
|
+
|
|
52
|
+
def __init__(self, fast_window: int = 10, slow_window: int = 20):
|
|
53
|
+
super().__init__()
|
|
54
|
+
# Auto-generate features
|
|
55
|
+
from signalflow.feature import FeatureSet, SmaExtractor
|
|
56
|
+
self.feature_set = FeatureSet([
|
|
57
|
+
SmaExtractor(window=fast_window, column="close"),
|
|
58
|
+
SmaExtractor(window=slow_window, column="close")
|
|
59
|
+
])
|
|
60
|
+
|
|
61
|
+
def detect(self, features: pl.DataFrame, context=None) -> Signals:
|
|
62
|
+
signals = features.with_columns([
|
|
63
|
+
# Detect crossover
|
|
64
|
+
(pl.col("sma_10") > pl.col("sma_20")).alias("is_bull"),
|
|
65
|
+
(pl.col("sma_10") < pl.col("sma_20")).alias("is_bear")
|
|
66
|
+
]).with_columns([
|
|
67
|
+
# Assign signal type
|
|
68
|
+
pl.when(pl.col("is_bull"))
|
|
69
|
+
.then(pl.lit(SignalType.RISE.value))
|
|
70
|
+
.when(pl.col("is_bear"))
|
|
71
|
+
.then(pl.lit(SignalType.FALL.value))
|
|
72
|
+
.otherwise(pl.lit(SignalType.NONE.value))
|
|
73
|
+
.alias("signal_type")
|
|
74
|
+
]).select([
|
|
75
|
+
self.pair_col,
|
|
76
|
+
self.ts_col,
|
|
77
|
+
"signal_type",
|
|
78
|
+
pl.lit(1).alias("signal")
|
|
79
|
+
])
|
|
80
|
+
|
|
81
|
+
return Signals(signals)
|
|
82
|
+
|
|
83
|
+
# Usage
|
|
84
|
+
detector = SmaCrossDetector(fast_window=10, slow_window=20)
|
|
85
|
+
signals = detector.run(raw_data_view)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Note:
|
|
89
|
+
Subclasses must implement detect() method.
|
|
90
|
+
All DataFrames must use timezone-naive timestamps.
|
|
91
|
+
Duplicate (pair, timestamp) combinations are rejected.
|
|
92
|
+
|
|
93
|
+
See Also:
|
|
94
|
+
FeatureSet: Orchestrates feature extraction.
|
|
95
|
+
Signals: Container for signal output.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
component_type: ClassVar[SfComponentType] = SfComponentType.DETECTOR
|
|
99
|
+
|
|
100
|
+
pair_col: str = "pair"
|
|
101
|
+
ts_col: str = "timestamp"
|
|
102
|
+
|
|
103
|
+
raw_data_type: RawDataType = RawDataType.SPOT
|
|
104
|
+
|
|
105
|
+
feature_set: FeatureSet | None = None
|
|
106
|
+
|
|
107
|
+
require_probability: bool = False
|
|
108
|
+
keep_only_latest_per_pair: bool = False
|
|
109
|
+
|
|
110
|
+
def run(self, raw_data_view: RawDataView, context: dict[str, Any] | None = None) -> Signals:
|
|
111
|
+
"""Execute complete detection pipeline.
|
|
112
|
+
|
|
113
|
+
Pipeline steps:
|
|
114
|
+
1. preprocess: Extract features
|
|
115
|
+
2. normalize: Ensure timezone-naive timestamps
|
|
116
|
+
3. validate features: Check schema and duplicates
|
|
117
|
+
4. detect: Generate signals
|
|
118
|
+
5. validate signals: Check output quality
|
|
119
|
+
6. (optional) keep latest: Filter to latest per pair
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
raw_data_view (RawDataView): View to raw market data.
|
|
123
|
+
context (dict[str, Any] | None): Additional context for detection.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Signals: Detected signals.
|
|
127
|
+
|
|
128
|
+
Raises:
|
|
129
|
+
TypeError: If preprocess doesn't return pl.DataFrame.
|
|
130
|
+
ValueError: If features/signals fail validation.
|
|
131
|
+
|
|
132
|
+
Example:
|
|
133
|
+
```python
|
|
134
|
+
from signalflow.core import RawData, RawDataView
|
|
135
|
+
|
|
136
|
+
# Create view
|
|
137
|
+
view = RawDataView(raw=raw_data)
|
|
138
|
+
|
|
139
|
+
# Run detection
|
|
140
|
+
signals = detector.run(view)
|
|
141
|
+
|
|
142
|
+
# With context
|
|
143
|
+
signals = detector.run(view, context={"threshold": 0.7})
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Note:
|
|
147
|
+
Can also be called directly: detector(raw_data_view).
|
|
148
|
+
All validation errors include helpful diagnostic information.
|
|
149
|
+
"""
|
|
150
|
+
feats = self.preprocess(raw_data_view, context=context)
|
|
151
|
+
feats = self._normalize_index(feats)
|
|
152
|
+
self._validate_features(feats)
|
|
153
|
+
|
|
154
|
+
signals = self.detect(feats, context=context)
|
|
155
|
+
self._validate_signals(signals)
|
|
156
|
+
|
|
157
|
+
if self.keep_only_latest_per_pair:
|
|
158
|
+
signals = self._keep_only_latest(signals)
|
|
159
|
+
|
|
160
|
+
return signals
|
|
161
|
+
|
|
162
|
+
__call__ = run
|
|
163
|
+
|
|
164
|
+
def preprocess(self, raw_data_view: RawDataView, context: dict[str, Any] | None = None) -> pl.DataFrame:
|
|
165
|
+
"""Extract features from raw data.
|
|
166
|
+
|
|
167
|
+
Default implementation delegates to FeatureSet. Override for custom
|
|
168
|
+
feature extraction logic.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
raw_data_view (RawDataView): View to raw market data.
|
|
172
|
+
context (dict[str, Any] | None): Additional context.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
pl.DataFrame: Features with at minimum pair and timestamp columns.
|
|
176
|
+
|
|
177
|
+
Raises:
|
|
178
|
+
NotImplementedError: If feature_set is None and not overridden.
|
|
179
|
+
TypeError: If FeatureSet doesn't return pl.DataFrame.
|
|
180
|
+
|
|
181
|
+
Example:
|
|
182
|
+
```python
|
|
183
|
+
# Default: uses FeatureSet
|
|
184
|
+
features = detector.preprocess(raw_data_view)
|
|
185
|
+
|
|
186
|
+
# Custom override
|
|
187
|
+
class CustomDetector(SignalDetector):
|
|
188
|
+
def preprocess(self, raw_data_view, context=None):
|
|
189
|
+
df = raw_data_view.to_polars("spot")
|
|
190
|
+
return df.with_columns([
|
|
191
|
+
pl.col("close").rolling_mean(10).alias("sma_10")
|
|
192
|
+
])
|
|
193
|
+
```
|
|
194
|
+
"""
|
|
195
|
+
if self.feature_set is None:
|
|
196
|
+
raise NotImplementedError(
|
|
197
|
+
f"{self.__class__.__name__}.preprocess is not implemented and feature_set is None"
|
|
198
|
+
)
|
|
199
|
+
out = self.feature_set.extract(raw_data_view, context=context)
|
|
200
|
+
if not isinstance(out, pl.DataFrame):
|
|
201
|
+
raise TypeError(f"{self.__class__.__name__}.feature_set.extract must return pl.DataFrame, got {type(out)}")
|
|
202
|
+
return out
|
|
203
|
+
|
|
204
|
+
@abstractmethod
|
|
205
|
+
def detect(self, features: pl.DataFrame, context: dict[str, Any] | None = None) -> Signals:
|
|
206
|
+
"""Generate signals from features.
|
|
207
|
+
|
|
208
|
+
Core detection logic - must be implemented by subclasses.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
features (pl.DataFrame): Preprocessed features.
|
|
212
|
+
context (dict[str, Any] | None): Additional context.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Signals: Detected signals with columns:
|
|
216
|
+
- pair (str): Trading pair
|
|
217
|
+
- timestamp (datetime): Signal timestamp (timezone-naive)
|
|
218
|
+
- signal_type (int): SignalType enum value
|
|
219
|
+
- signal (int | float): Signal value
|
|
220
|
+
- probability (float, optional): Signal probability
|
|
221
|
+
|
|
222
|
+
Example:
|
|
223
|
+
```python
|
|
224
|
+
def detect(self, features, context=None):
|
|
225
|
+
# Simple threshold detector
|
|
226
|
+
signals = features.filter(
|
|
227
|
+
pl.col("rsi") > 70 # Overbought
|
|
228
|
+
).with_columns([
|
|
229
|
+
pl.lit(SignalType.FALL.value).alias("signal_type"),
|
|
230
|
+
pl.lit(-1).alias("signal"),
|
|
231
|
+
pl.lit(0.8).alias("probability")
|
|
232
|
+
]).select([
|
|
233
|
+
self.pair_col,
|
|
234
|
+
self.ts_col,
|
|
235
|
+
"signal_type",
|
|
236
|
+
"signal",
|
|
237
|
+
"probability"
|
|
238
|
+
])
|
|
239
|
+
|
|
240
|
+
return Signals(signals)
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
Note:
|
|
244
|
+
Must return Signals with at minimum: pair, timestamp, signal_type.
|
|
245
|
+
Timestamps must be timezone-naive.
|
|
246
|
+
No duplicate (pair, timestamp) combinations allowed.
|
|
247
|
+
"""
|
|
248
|
+
raise NotImplementedError
|
|
249
|
+
|
|
250
|
+
def _normalize_index(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
251
|
+
"""Normalize timestamps to timezone-naive.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
df (pl.DataFrame): Input DataFrame.
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
pl.DataFrame: DataFrame with timezone-naive timestamps.
|
|
258
|
+
|
|
259
|
+
Raises:
|
|
260
|
+
TypeError: If df is not pl.DataFrame.
|
|
261
|
+
"""
|
|
262
|
+
if not isinstance(df, pl.DataFrame):
|
|
263
|
+
raise TypeError(f"Expected pl.DataFrame, got {type(df)}")
|
|
264
|
+
|
|
265
|
+
if self.ts_col in df.columns:
|
|
266
|
+
ts_dtype = df.schema.get(self.ts_col)
|
|
267
|
+
if isinstance(ts_dtype, pl.Datetime) and ts_dtype.time_zone is not None:
|
|
268
|
+
df = df.with_columns(pl.col(self.ts_col).dt.replace_time_zone(None))
|
|
269
|
+
return df
|
|
270
|
+
|
|
271
|
+
def _validate_features(self, df: pl.DataFrame) -> None:
|
|
272
|
+
"""Validate feature DataFrame.
|
|
273
|
+
|
|
274
|
+
Checks:
|
|
275
|
+
- Is pl.DataFrame
|
|
276
|
+
- Has required columns (pair, timestamp)
|
|
277
|
+
- Timestamps are timezone-naive
|
|
278
|
+
- No duplicate (pair, timestamp) combinations
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
df (pl.DataFrame): Features to validate.
|
|
282
|
+
|
|
283
|
+
Raises:
|
|
284
|
+
TypeError: If not pl.DataFrame.
|
|
285
|
+
ValueError: If validation fails.
|
|
286
|
+
"""
|
|
287
|
+
if not isinstance(df, pl.DataFrame):
|
|
288
|
+
raise TypeError(f"preprocess must return polars.DataFrame, got {type(df)}")
|
|
289
|
+
|
|
290
|
+
missing = [c for c in (self.pair_col, self.ts_col) if c not in df.columns]
|
|
291
|
+
if missing:
|
|
292
|
+
raise ValueError(f"Features missing required columns: {missing}")
|
|
293
|
+
|
|
294
|
+
ts_dtype = df.schema.get(self.ts_col)
|
|
295
|
+
if isinstance(ts_dtype, pl.Datetime) and ts_dtype.time_zone is not None:
|
|
296
|
+
raise ValueError(
|
|
297
|
+
f"Features column '{self.ts_col}' must be timezone-naive, got tz={ts_dtype.time_zone}. "
|
|
298
|
+
f"Use .dt.replace_time_zone(None)."
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
dup = (
|
|
302
|
+
df.group_by([self.pair_col, self.ts_col])
|
|
303
|
+
.len()
|
|
304
|
+
.filter(pl.col("len") > 1)
|
|
305
|
+
)
|
|
306
|
+
if dup.height > 0:
|
|
307
|
+
raise ValueError(
|
|
308
|
+
"Features contain duplicate keys (pair,timestamp). "
|
|
309
|
+
f"Examples:\n{dup.select([self.pair_col, self.ts_col]).head(10)}"
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
def _validate_signals(self, signals: Signals) -> None:
|
|
313
|
+
"""Validate signal output.
|
|
314
|
+
|
|
315
|
+
Checks:
|
|
316
|
+
- Is Signals instance with pl.DataFrame value
|
|
317
|
+
- Has required columns (pair, timestamp, signal_type)
|
|
318
|
+
- signal_type values are valid SignalType enums
|
|
319
|
+
- Timestamps are timezone-naive
|
|
320
|
+
- No duplicate (pair, timestamp) combinations
|
|
321
|
+
- (optional) Has probability column if required
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
signals (Signals): Signals to validate.
|
|
325
|
+
|
|
326
|
+
Raises:
|
|
327
|
+
TypeError: If not Signals or value not pl.DataFrame.
|
|
328
|
+
ValueError: If validation fails.
|
|
329
|
+
"""
|
|
330
|
+
if not isinstance(signals, Signals):
|
|
331
|
+
raise TypeError(f"detect must return Signals, got {type(signals)}")
|
|
332
|
+
|
|
333
|
+
s = signals.value
|
|
334
|
+
if not isinstance(s, pl.DataFrame):
|
|
335
|
+
raise TypeError(f"Signals.value must be polars.DataFrame, got {type(s)}")
|
|
336
|
+
|
|
337
|
+
required = {self.pair_col, self.ts_col, "signal_type"}
|
|
338
|
+
missing = sorted(required - set(s.columns))
|
|
339
|
+
if missing:
|
|
340
|
+
raise ValueError(f"Signals missing required columns: {missing}")
|
|
341
|
+
|
|
342
|
+
allowed = {t.value for t in SignalType}
|
|
343
|
+
bad = (
|
|
344
|
+
s.select(pl.col("signal_type"))
|
|
345
|
+
.unique()
|
|
346
|
+
.filter(~pl.col("signal_type").is_in(list(allowed)))
|
|
347
|
+
)
|
|
348
|
+
if bad.height > 0:
|
|
349
|
+
raise ValueError(
|
|
350
|
+
f"Signals contain unknown signal_type values: {bad.get_column('signal_type').to_list()}"
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
if self.require_probability and "probability" not in s.columns:
|
|
354
|
+
raise ValueError("Signals must contain 'probability' column (require_probability=True)")
|
|
355
|
+
|
|
356
|
+
ts_dtype = s.schema.get(self.ts_col)
|
|
357
|
+
if isinstance(ts_dtype, pl.Datetime) and ts_dtype.time_zone is not None:
|
|
358
|
+
raise ValueError(f"Signals column '{self.ts_col}' must be timezone-naive, got tz={ts_dtype.time_zone}.")
|
|
359
|
+
|
|
360
|
+
# optional: hard guarantee no duplicates in signals
|
|
361
|
+
dup = (
|
|
362
|
+
s.group_by([self.pair_col, self.ts_col])
|
|
363
|
+
.len()
|
|
364
|
+
.filter(pl.col("len") > 1)
|
|
365
|
+
)
|
|
366
|
+
if dup.height > 0:
|
|
367
|
+
raise ValueError(
|
|
368
|
+
"Signals contain duplicate keys (pair,timestamp). "
|
|
369
|
+
f"Examples:\n{dup.select([self.pair_col, self.ts_col]).head(10)}"
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
def _keep_only_latest(self, signals: Signals) -> Signals:
|
|
373
|
+
"""Keep only latest signal per pair.
|
|
374
|
+
|
|
375
|
+
Useful for strategies that only trade most recent signal.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
signals (Signals): Input signals.
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
Signals: Filtered signals with one per pair.
|
|
382
|
+
"""
|
|
383
|
+
s = signals.value
|
|
384
|
+
out = (
|
|
385
|
+
s.sort([self.pair_col, self.ts_col])
|
|
386
|
+
.group_by(self.pair_col, maintain_order=True)
|
|
387
|
+
.tail(1)
|
|
388
|
+
.sort([self.pair_col, self.ts_col])
|
|
389
|
+
)
|
|
390
|
+
return Signals(out)
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# IMPORTANT
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import polars as pl
|
|
7
|
+
|
|
8
|
+
from signalflow.core import RawDataType, Signals, SignalType, sf_component
|
|
9
|
+
from signalflow.detector import SignalDetector
|
|
10
|
+
from signalflow.feature import FeatureSet
|
|
11
|
+
from signalflow.feature.smoother import SmaExtractor
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
@sf_component(name="sma_cross")
|
|
16
|
+
class SmaCrossSignalDetector(SignalDetector):
|
|
17
|
+
"""
|
|
18
|
+
SMA crossover signal detector.
|
|
19
|
+
|
|
20
|
+
Signal rules (per pair, per timestamp):
|
|
21
|
+
- RISE : fast crosses above slow (fast_t > slow_t) and (fast_{t-1} <= slow_{t-1})
|
|
22
|
+
- FALL : fast crosses below slow (fast_t < slow_t) and (fast_{t-1} >= slow_{t-1})
|
|
23
|
+
- NONE : otherwise
|
|
24
|
+
|
|
25
|
+
Output Signals columns:
|
|
26
|
+
- pair, timestamp, signal_type, signal
|
|
27
|
+
- signal: +1 for RISE, -1 for FALL, 0 for NONE
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
fast_period: int = 20
|
|
31
|
+
slow_period: int = 50
|
|
32
|
+
price_col: str = "close"
|
|
33
|
+
|
|
34
|
+
fast_col: str | None = None
|
|
35
|
+
slow_col: str | None = None
|
|
36
|
+
|
|
37
|
+
def __post_init__(self) -> None:
|
|
38
|
+
if self.fast_period <= 0 or self.slow_period <= 0:
|
|
39
|
+
raise ValueError("fast_period and slow_period must be > 0")
|
|
40
|
+
if self.fast_period >= self.slow_period:
|
|
41
|
+
raise ValueError(f"fast_period must be < slow_period, got {self.fast_period} >= {self.slow_period}")
|
|
42
|
+
|
|
43
|
+
self.fast_col = self.fast_col or f"sma_{self.fast_period}"
|
|
44
|
+
self.slow_col = self.slow_col or f"sma_{self.slow_period}"
|
|
45
|
+
|
|
46
|
+
self.feature_set = FeatureSet(
|
|
47
|
+
extractors=[
|
|
48
|
+
SmaExtractor(
|
|
49
|
+
offset_window=1,
|
|
50
|
+
sma_period=self.fast_period,
|
|
51
|
+
price_col=self.price_col,
|
|
52
|
+
out_col=self.fast_col,
|
|
53
|
+
use_resample=True,
|
|
54
|
+
raw_data_type=RawDataType.SPOT,
|
|
55
|
+
),
|
|
56
|
+
SmaExtractor(
|
|
57
|
+
offset_window=1,
|
|
58
|
+
sma_period=self.slow_period,
|
|
59
|
+
price_col=self.price_col,
|
|
60
|
+
out_col=self.slow_col,
|
|
61
|
+
use_resample=True,
|
|
62
|
+
raw_data_type=RawDataType.SPOT,
|
|
63
|
+
),
|
|
64
|
+
]
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def detect(self, features: pl.DataFrame, context: dict[str, Any] | None = None) -> Signals:
|
|
68
|
+
df = features.sort([self.pair_col, self.ts_col])
|
|
69
|
+
|
|
70
|
+
if self.fast_col not in df.columns or self.slow_col not in df.columns:
|
|
71
|
+
raise ValueError(
|
|
72
|
+
f"Expected columns '{self.fast_col}' and '{self.slow_col}' in features. "
|
|
73
|
+
f"Got: {df.columns}"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
df = df.filter(pl.col(self.fast_col).is_not_null() & pl.col(self.slow_col).is_not_null())
|
|
77
|
+
|
|
78
|
+
fast = pl.col(self.fast_col)
|
|
79
|
+
slow = pl.col(self.slow_col)
|
|
80
|
+
|
|
81
|
+
fast_prev = fast.shift(1).over(self.pair_col)
|
|
82
|
+
slow_prev = slow.shift(1).over(self.pair_col)
|
|
83
|
+
|
|
84
|
+
cross_up = (fast > slow) & (fast_prev <= slow_prev)
|
|
85
|
+
cross_down = (fast < slow) & (fast_prev >= slow_prev)
|
|
86
|
+
|
|
87
|
+
out = (
|
|
88
|
+
df.select([self.pair_col, self.ts_col, self.fast_col, self.slow_col])
|
|
89
|
+
.with_columns(
|
|
90
|
+
pl.when(cross_up)
|
|
91
|
+
.then(pl.lit(SignalType.RISE.value))
|
|
92
|
+
.when(cross_down)
|
|
93
|
+
.then(pl.lit(SignalType.FALL.value))
|
|
94
|
+
.otherwise(pl.lit(SignalType.NONE.value))
|
|
95
|
+
.alias("signal_type")
|
|
96
|
+
)
|
|
97
|
+
.with_columns(
|
|
98
|
+
pl.when(pl.col("signal_type") == SignalType.RISE.value).then(pl.lit(1))
|
|
99
|
+
.when(pl.col("signal_type") == SignalType.FALL.value).then(pl.lit(-1))
|
|
100
|
+
.otherwise(pl.lit(0))
|
|
101
|
+
.alias("signal")
|
|
102
|
+
)
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
return Signals(out.select([self.pair_col, self.ts_col, "signal_type", "signal"]))
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from signalflow.feature.feature_set import FeatureSet
|
|
2
|
+
from signalflow.feature.base import FeatureExtractor
|
|
3
|
+
import signalflow.feature.smoother as smoother
|
|
4
|
+
import signalflow.feature.oscillator as oscillator
|
|
5
|
+
import signalflow.feature.pandasta as pandasta
|
|
6
|
+
import signalflow.feature.adapter as adapter
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"FeatureSet",
|
|
11
|
+
"FeatureExtractor",
|
|
12
|
+
"adapter",
|
|
13
|
+
"pandasta",
|
|
14
|
+
"smoother",
|
|
15
|
+
"oscillator",
|
|
16
|
+
]
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any, Callable
|
|
5
|
+
|
|
6
|
+
import polars as pl
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from signalflow.feature.base import FeatureExtractor
|
|
10
|
+
|
|
11
|
+
PandasGroupFn = Callable[[pd.DataFrame, dict[str, Any] | None], pd.DataFrame | pd.Series]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class PandasFeatureExtractor(FeatureExtractor):
|
|
16
|
+
pandas_group_fn: PandasGroupFn | None = field(default=None, kw_only=True)
|
|
17
|
+
|
|
18
|
+
out_cols: list[str] | None = None
|
|
19
|
+
series_name: str = "feature"
|
|
20
|
+
rename_outputs: dict[str, str] = field(default_factory=dict)
|
|
21
|
+
|
|
22
|
+
def __post_init__(self) -> None:
|
|
23
|
+
super().__post_init__()
|
|
24
|
+
if self.pandas_group_fn is None or not callable(self.pandas_group_fn):
|
|
25
|
+
raise TypeError("pandas_group_fn must be provided and callable (keyword-only argument)")
|
|
26
|
+
|
|
27
|
+
def compute_group(self, group_df: pl.DataFrame, data_context: dict[str, Any] | None) -> pl.DataFrame:
|
|
28
|
+
pdf = group_df.to_pandas()
|
|
29
|
+
result = self.pandas_group_fn(pdf, data_context)
|
|
30
|
+
|
|
31
|
+
if isinstance(result, pd.Series):
|
|
32
|
+
if result.name is None:
|
|
33
|
+
result = result.rename(self.series_name)
|
|
34
|
+
result = result.to_frame()
|
|
35
|
+
|
|
36
|
+
if not isinstance(result, pd.DataFrame):
|
|
37
|
+
raise TypeError("pandas_group_fn must return pd.DataFrame or pd.Series")
|
|
38
|
+
|
|
39
|
+
if len(result) != len(pdf):
|
|
40
|
+
raise ValueError(f"pandas_group_fn must preserve row count: got {len(result)} != {len(pdf)}")
|
|
41
|
+
|
|
42
|
+
if self.rename_outputs:
|
|
43
|
+
result = result.rename(columns=self.rename_outputs)
|
|
44
|
+
|
|
45
|
+
if self.out_cols is not None:
|
|
46
|
+
missing = set(self.out_cols) - set(result.columns)
|
|
47
|
+
if missing:
|
|
48
|
+
raise ValueError(f"pandas_group_fn output missing columns: {sorted(missing)}")
|
|
49
|
+
|
|
50
|
+
out = group_df
|
|
51
|
+
for col in result.columns:
|
|
52
|
+
out = out.with_columns(pl.Series(col, result[col].to_numpy()))
|
|
53
|
+
|
|
54
|
+
return out
|