signalflow-trading 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- signalflow/__init__.py +21 -0
- signalflow/analytics/__init__.py +0 -0
- signalflow/core/__init__.py +46 -0
- signalflow/core/base_mixin.py +232 -0
- signalflow/core/containers/__init__.py +21 -0
- signalflow/core/containers/order.py +216 -0
- signalflow/core/containers/portfolio.py +211 -0
- signalflow/core/containers/position.py +296 -0
- signalflow/core/containers/raw_data.py +167 -0
- signalflow/core/containers/raw_data_view.py +169 -0
- signalflow/core/containers/signals.py +198 -0
- signalflow/core/containers/strategy_state.py +147 -0
- signalflow/core/containers/trade.py +112 -0
- signalflow/core/decorators.py +103 -0
- signalflow/core/enums.py +270 -0
- signalflow/core/registry.py +322 -0
- signalflow/core/rolling_aggregator.py +362 -0
- signalflow/core/signal_transforms/__init__.py +5 -0
- signalflow/core/signal_transforms/base_signal_transform.py +186 -0
- signalflow/data/__init__.py +11 -0
- signalflow/data/raw_data_factory.py +225 -0
- signalflow/data/raw_store/__init__.py +7 -0
- signalflow/data/raw_store/base.py +271 -0
- signalflow/data/raw_store/duckdb_stores.py +696 -0
- signalflow/data/source/__init__.py +10 -0
- signalflow/data/source/base.py +300 -0
- signalflow/data/source/binance.py +442 -0
- signalflow/data/strategy_store/__init__.py +8 -0
- signalflow/data/strategy_store/base.py +278 -0
- signalflow/data/strategy_store/duckdb.py +409 -0
- signalflow/data/strategy_store/schema.py +36 -0
- signalflow/detector/__init__.py +7 -0
- signalflow/detector/adapter/__init__.py +5 -0
- signalflow/detector/adapter/pandas_detector.py +46 -0
- signalflow/detector/base.py +390 -0
- signalflow/detector/sma_cross.py +105 -0
- signalflow/feature/__init__.py +16 -0
- signalflow/feature/adapter/__init__.py +5 -0
- signalflow/feature/adapter/pandas_feature_extractor.py +54 -0
- signalflow/feature/base.py +330 -0
- signalflow/feature/feature_set.py +286 -0
- signalflow/feature/oscillator/__init__.py +5 -0
- signalflow/feature/oscillator/rsi_extractor.py +42 -0
- signalflow/feature/pandasta/__init__.py +10 -0
- signalflow/feature/pandasta/pandas_ta_extractor.py +141 -0
- signalflow/feature/pandasta/top_pandasta_extractors.py +64 -0
- signalflow/feature/smoother/__init__.py +5 -0
- signalflow/feature/smoother/sma_extractor.py +46 -0
- signalflow/strategy/__init__.py +9 -0
- signalflow/strategy/broker/__init__.py +15 -0
- signalflow/strategy/broker/backtest.py +172 -0
- signalflow/strategy/broker/base.py +186 -0
- signalflow/strategy/broker/executor/__init__.py +9 -0
- signalflow/strategy/broker/executor/base.py +35 -0
- signalflow/strategy/broker/executor/binance_spot.py +12 -0
- signalflow/strategy/broker/executor/virtual_spot.py +81 -0
- signalflow/strategy/broker/realtime_spot.py +12 -0
- signalflow/strategy/component/__init__.py +9 -0
- signalflow/strategy/component/base.py +65 -0
- signalflow/strategy/component/entry/__init__.py +7 -0
- signalflow/strategy/component/entry/fixed_size.py +57 -0
- signalflow/strategy/component/entry/signal.py +127 -0
- signalflow/strategy/component/exit/__init__.py +5 -0
- signalflow/strategy/component/exit/time_based.py +47 -0
- signalflow/strategy/component/exit/tp_sl.py +80 -0
- signalflow/strategy/component/metric/__init__.py +8 -0
- signalflow/strategy/component/metric/main_metrics.py +181 -0
- signalflow/strategy/runner/__init__.py +8 -0
- signalflow/strategy/runner/backtest_runner.py +208 -0
- signalflow/strategy/runner/base.py +19 -0
- signalflow/strategy/runner/optimized_backtest_runner.py +178 -0
- signalflow/strategy/runner/realtime_runner.py +0 -0
- signalflow/target/__init__.py +14 -0
- signalflow/target/adapter/__init__.py +5 -0
- signalflow/target/adapter/pandas_labeler.py +45 -0
- signalflow/target/base.py +409 -0
- signalflow/target/fixed_horizon_labeler.py +93 -0
- signalflow/target/static_triple_barrier.py +162 -0
- signalflow/target/triple_barrier.py +188 -0
- signalflow/utils/__init__.py +7 -0
- signalflow/utils/import_utils.py +11 -0
- signalflow/utils/tune_utils.py +19 -0
- signalflow/validator/__init__.py +6 -0
- signalflow/validator/base.py +139 -0
- signalflow/validator/sklearn_validator.py +527 -0
- signalflow_trading-0.2.1.dist-info/METADATA +149 -0
- signalflow_trading-0.2.1.dist-info/RECORD +90 -0
- signalflow_trading-0.2.1.dist-info/WHEEL +5 -0
- signalflow_trading-0.2.1.dist-info/licenses/LICENSE +21 -0
- signalflow_trading-0.2.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Literal
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
|
|
9
|
+
from signalflow.core import RawDataType, RollingAggregator, SfComponentType
|
|
10
|
+
from typing import ClassVar
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class FeatureExtractor(ABC):
|
|
15
|
+
"""Base class for Polars-first feature extraction.
|
|
16
|
+
|
|
17
|
+
Extracts technical indicators and derived features from raw OHLCV data
|
|
18
|
+
with optional sliding window resampling for multi-timeframe features.
|
|
19
|
+
|
|
20
|
+
Key features:
|
|
21
|
+
- Polars-native for performance
|
|
22
|
+
- Optional sliding window resampling (e.g., 5m bars from 1m bars)
|
|
23
|
+
- Per-pair, per-offset-window processing
|
|
24
|
+
- Length-preserving operations
|
|
25
|
+
- Automatic projection (keep only new features)
|
|
26
|
+
|
|
27
|
+
Processing pipeline:
|
|
28
|
+
1. Sort by (pair, timestamp)
|
|
29
|
+
2. Add resample_offset column
|
|
30
|
+
3. (optional) Apply sliding resample
|
|
31
|
+
4. (optional) Filter to last offset
|
|
32
|
+
5. Group by (pair, resample_offset) and compute features
|
|
33
|
+
6. Sort output
|
|
34
|
+
7. Project columns (keep input or features only)
|
|
35
|
+
|
|
36
|
+
Attributes:
|
|
37
|
+
offset_window (int): Sliding window size in bars. Default: 1.
|
|
38
|
+
compute_last_offset (bool): Keep only last offset. Default: False.
|
|
39
|
+
pair_col (str): Trading pair column. Default: "pair".
|
|
40
|
+
ts_col (str): Timestamp column. Default: "timestamp".
|
|
41
|
+
offset_col (str): Offset tracking column. Default: "resample_offset".
|
|
42
|
+
use_resample (bool): Apply sliding resample. Default: False.
|
|
43
|
+
resample_mode (Literal["add", "replace"]): Resample mode. Default: "add".
|
|
44
|
+
resample_prefix (str | None): Prefix for resampled columns. Default: None.
|
|
45
|
+
raw_data_type (RawDataType): Type of raw data. Default: SPOT.
|
|
46
|
+
component_type (ClassVar[SfComponentType]): Always FEATURE_EXTRACTOR.
|
|
47
|
+
keep_input_columns (bool): Keep all input columns. Default: False.
|
|
48
|
+
|
|
49
|
+
Example:
|
|
50
|
+
```python
|
|
51
|
+
from signalflow.feature import FeatureExtractor
|
|
52
|
+
import polars as pl
|
|
53
|
+
|
|
54
|
+
class RsiExtractor(FeatureExtractor):
|
|
55
|
+
'''RSI indicator extractor'''
|
|
56
|
+
|
|
57
|
+
def __init__(self, window: int = 14, column: str = "close"):
|
|
58
|
+
super().__init__()
|
|
59
|
+
self.window = window
|
|
60
|
+
self.column = column
|
|
61
|
+
|
|
62
|
+
def compute_group(self, group_df, data_context=None):
|
|
63
|
+
# Compute RSI per group
|
|
64
|
+
delta = group_df.select(pl.col(self.column).diff().alias("delta"))
|
|
65
|
+
|
|
66
|
+
gain = delta.select(
|
|
67
|
+
pl.when(pl.col("delta") > 0)
|
|
68
|
+
.then(pl.col("delta"))
|
|
69
|
+
.otherwise(0)
|
|
70
|
+
.alias("gain")
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
loss = delta.select(
|
|
74
|
+
pl.when(pl.col("delta") < 0)
|
|
75
|
+
.then(-pl.col("delta"))
|
|
76
|
+
.otherwise(0)
|
|
77
|
+
.alias("loss")
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
avg_gain = gain.select(
|
|
81
|
+
pl.col("gain").rolling_mean(self.window).alias("avg_gain")
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
avg_loss = loss.select(
|
|
85
|
+
pl.col("loss").rolling_mean(self.window).alias("avg_loss")
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
rs = avg_gain.select(
|
|
89
|
+
(pl.col("avg_gain") / pl.col("avg_loss")).alias("rs")
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
rsi = group_df.with_columns([
|
|
93
|
+
(100 - (100 / (1 + rs.get_column("rs")))).alias(f"rsi_{self.window}")
|
|
94
|
+
])
|
|
95
|
+
|
|
96
|
+
return rsi
|
|
97
|
+
|
|
98
|
+
# Usage
|
|
99
|
+
extractor = RsiExtractor(window=14)
|
|
100
|
+
features = extractor.extract(ohlcv_df)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Note:
|
|
104
|
+
compute_group() must preserve row count (length-preserving).
|
|
105
|
+
All timestamps must be timezone-naive.
|
|
106
|
+
For multi-timeframe features, use use_resample=True.
|
|
107
|
+
|
|
108
|
+
See Also:
|
|
109
|
+
RollingAggregator: Sliding window resampler.
|
|
110
|
+
FeatureSet: Orchestrates multiple extractors.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
offset_window: int = 1
|
|
114
|
+
compute_last_offset: bool = False
|
|
115
|
+
|
|
116
|
+
pair_col: str = "pair"
|
|
117
|
+
ts_col: str = "timestamp"
|
|
118
|
+
offset_col: str = "resample_offset"
|
|
119
|
+
|
|
120
|
+
use_resample: bool = False
|
|
121
|
+
resample_mode: Literal["add", "replace"] = "add"
|
|
122
|
+
resample_prefix: str | None = None
|
|
123
|
+
raw_data_type: RawDataType = RawDataType.SPOT
|
|
124
|
+
component_type: ClassVar[SfComponentType] = SfComponentType.FEATURE_EXTRACTOR
|
|
125
|
+
keep_input_columns: bool = False
|
|
126
|
+
|
|
127
|
+
def __post_init__(self) -> None:
|
|
128
|
+
"""Validate configuration after initialization.
|
|
129
|
+
|
|
130
|
+
Raises:
|
|
131
|
+
ValueError: If offset_window <= 0, invalid resample_mode, or wrong offset_col.
|
|
132
|
+
TypeError: If column names not strings.
|
|
133
|
+
"""
|
|
134
|
+
if self.offset_window <= 0:
|
|
135
|
+
raise ValueError(f"offset_window must be > 0, got {self.offset_window}")
|
|
136
|
+
|
|
137
|
+
if self.resample_mode not in ("add", "replace"):
|
|
138
|
+
raise ValueError(f"Invalid resample_mode: {self.resample_mode}")
|
|
139
|
+
|
|
140
|
+
if self.offset_col != RollingAggregator.OFFSET_COL:
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"offset_col must be '{RollingAggregator.OFFSET_COL}', got '{self.offset_col}'"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
if not isinstance(self.pair_col, str) or not isinstance(self.ts_col, str) or not isinstance(self.offset_col, str):
|
|
146
|
+
raise TypeError("pair_col/ts_col/offset_col must be str")
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def _resampler(self) -> RollingAggregator:
|
|
150
|
+
"""Get configured RollingAggregator instance.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
RollingAggregator: Resampler with current configuration.
|
|
154
|
+
"""
|
|
155
|
+
return RollingAggregator(
|
|
156
|
+
offset_window=self.offset_window,
|
|
157
|
+
ts_col=self.ts_col,
|
|
158
|
+
pair_col=self.pair_col,
|
|
159
|
+
mode=self.resample_mode,
|
|
160
|
+
prefix=self.resample_prefix,
|
|
161
|
+
raw_data_type=self.raw_data_type,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def extract(self, df: pl.DataFrame, data_context: dict[str, Any] | None = None) -> pl.DataFrame:
|
|
165
|
+
"""Extract features from input DataFrame.
|
|
166
|
+
|
|
167
|
+
Main entry point - handles sorting, resampling, grouping, and projection.
|
|
168
|
+
|
|
169
|
+
Processing pipeline:
|
|
170
|
+
1. Validate input (required columns)
|
|
171
|
+
2. Sort by (pair, timestamp)
|
|
172
|
+
3. Add resample_offset column if missing
|
|
173
|
+
4. (optional) Apply sliding resample
|
|
174
|
+
5. (optional) Filter to last offset
|
|
175
|
+
6. Group by (pair, resample_offset) and compute features
|
|
176
|
+
7. Sort output
|
|
177
|
+
8. Project to output columns
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
df (pl.DataFrame): Input OHLCV data with pair and timestamp columns.
|
|
181
|
+
data_context (dict[str, Any] | None): Additional context for computation.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
pl.DataFrame: Features DataFrame with columns:
|
|
185
|
+
- pair, timestamp (always included)
|
|
186
|
+
- feature columns (from compute_group)
|
|
187
|
+
|
|
188
|
+
Raises:
|
|
189
|
+
TypeError: If df not pl.DataFrame or compute_group returns wrong type.
|
|
190
|
+
ValueError: If compute_group changes row count or columns missing.
|
|
191
|
+
|
|
192
|
+
Example:
|
|
193
|
+
```python
|
|
194
|
+
# Basic extraction
|
|
195
|
+
features = extractor.extract(ohlcv_df)
|
|
196
|
+
|
|
197
|
+
# With resampling (5m from 1m)
|
|
198
|
+
extractor = RsiExtractor(
|
|
199
|
+
window=14,
|
|
200
|
+
offset_window=5,
|
|
201
|
+
use_resample=True
|
|
202
|
+
)
|
|
203
|
+
features = extractor.extract(ohlcv_df)
|
|
204
|
+
|
|
205
|
+
# Keep input columns
|
|
206
|
+
extractor.keep_input_columns = True
|
|
207
|
+
features_with_ohlcv = extractor.extract(ohlcv_df)
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Note:
|
|
211
|
+
Only accepts pl.DataFrame (Polars-first design).
|
|
212
|
+
Use PandasFeatureExtractor adapter for Pandas data.
|
|
213
|
+
"""
|
|
214
|
+
if not isinstance(df, pl.DataFrame):
|
|
215
|
+
raise TypeError(
|
|
216
|
+
f"{self.__class__.__name__} is polars-first and accepts only pl.DataFrame. "
|
|
217
|
+
f"Got: {type(df)}. Use an adapter for other dataframe types."
|
|
218
|
+
)
|
|
219
|
+
self._validate_input(df)
|
|
220
|
+
|
|
221
|
+
df0 = df.sort([self.pair_col, self.ts_col])
|
|
222
|
+
|
|
223
|
+
if self.offset_col not in df0.columns:
|
|
224
|
+
df0 = self._resampler.add_offset_column(df0)
|
|
225
|
+
|
|
226
|
+
if self.use_resample:
|
|
227
|
+
df0 = self._resampler.resample(df0)
|
|
228
|
+
|
|
229
|
+
if self.compute_last_offset:
|
|
230
|
+
last_off = self._resampler.get_last_offset(df0)
|
|
231
|
+
df0 = df0.filter(pl.col(self.offset_col) == last_off)
|
|
232
|
+
|
|
233
|
+
prepared_cols = set(df0.columns)
|
|
234
|
+
inferred_features: set[str] = set()
|
|
235
|
+
|
|
236
|
+
def _wrapped(g: pl.DataFrame) -> pl.DataFrame:
|
|
237
|
+
nonlocal inferred_features
|
|
238
|
+
|
|
239
|
+
in_cols = set(g.columns)
|
|
240
|
+
out = self.compute_group(g, data_context=data_context)
|
|
241
|
+
|
|
242
|
+
if not isinstance(out, pl.DataFrame):
|
|
243
|
+
raise TypeError(f"{self.__class__.__name__}.compute_pl_group must return pl.DataFrame")
|
|
244
|
+
|
|
245
|
+
if out.height != g.height:
|
|
246
|
+
raise ValueError(
|
|
247
|
+
f"{self.__class__.__name__}: len(output_group)={out.height} != len(input_group)={g.height}"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
if not inferred_features:
|
|
251
|
+
inferred_features = set(out.columns) - in_cols
|
|
252
|
+
|
|
253
|
+
return out
|
|
254
|
+
|
|
255
|
+
out = (
|
|
256
|
+
df0.group_by(self.pair_col, self.offset_col, maintain_order=True)
|
|
257
|
+
.map_groups(_wrapped)
|
|
258
|
+
.sort([self.pair_col, self.ts_col])
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
if self.keep_input_columns:
|
|
262
|
+
return out
|
|
263
|
+
|
|
264
|
+
feature_cols = sorted(set(out.columns) - prepared_cols)
|
|
265
|
+
keep_cols = [self.pair_col, self.ts_col] + feature_cols
|
|
266
|
+
|
|
267
|
+
missing = [c for c in keep_cols if c not in out.columns]
|
|
268
|
+
if missing:
|
|
269
|
+
raise ValueError(f"Projection error, missing columns: {missing}")
|
|
270
|
+
|
|
271
|
+
return out.select(keep_cols)
|
|
272
|
+
|
|
273
|
+
def compute_group(
|
|
274
|
+
self,
|
|
275
|
+
group_df: pl.DataFrame,
|
|
276
|
+
data_context: dict[str, Any] | None,
|
|
277
|
+
) -> pl.DataFrame:
|
|
278
|
+
"""Compute features for single (pair, resample_offset) group.
|
|
279
|
+
|
|
280
|
+
Core feature extraction logic - must be implemented by subclasses.
|
|
281
|
+
|
|
282
|
+
CRITICAL: Must preserve row count (len(output) == len(input)).
|
|
283
|
+
Should preserve ordering within group.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
group_df (pl.DataFrame): Single group's data, sorted by timestamp.
|
|
287
|
+
data_context (dict[str, Any] | None): Additional context.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
pl.DataFrame: Same length as input with added feature columns.
|
|
291
|
+
|
|
292
|
+
Example:
|
|
293
|
+
```python
|
|
294
|
+
def compute_group(self, group_df, data_context=None):
|
|
295
|
+
# Simple moving average
|
|
296
|
+
return group_df.with_columns([
|
|
297
|
+
pl.col("close")
|
|
298
|
+
.rolling_mean(self.window)
|
|
299
|
+
.alias(f"sma_{self.window}")
|
|
300
|
+
])
|
|
301
|
+
|
|
302
|
+
# Multiple features
|
|
303
|
+
def compute_group(self, group_df, data_context=None):
|
|
304
|
+
return group_df.with_columns([
|
|
305
|
+
pl.col("close").rolling_mean(10).alias("sma_10"),
|
|
306
|
+
pl.col("close").rolling_mean(20).alias("sma_20"),
|
|
307
|
+
pl.col("high").rolling_max(14).alias("high_14"),
|
|
308
|
+
pl.col("low").rolling_min(14).alias("low_14")
|
|
309
|
+
])
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
Note:
|
|
313
|
+
Output must have same height as input (length-preserving).
|
|
314
|
+
Use rolling operations for windowed features.
|
|
315
|
+
First N-1 bars may have null values for N-period indicators.
|
|
316
|
+
"""
|
|
317
|
+
raise NotImplementedError
|
|
318
|
+
|
|
319
|
+
def _validate_input(self, df: pl.DataFrame) -> None:
|
|
320
|
+
"""Validate input DataFrame has required columns.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
df (pl.DataFrame): Input to validate.
|
|
324
|
+
|
|
325
|
+
Raises:
|
|
326
|
+
ValueError: If required columns missing.
|
|
327
|
+
"""
|
|
328
|
+
missing = [c for c in (self.pair_col, self.ts_col) if c not in df.columns]
|
|
329
|
+
if missing:
|
|
330
|
+
raise ValueError(f"Missing required columns: {missing}")
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
# IMPORTANT
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
|
|
9
|
+
from signalflow.feature.base import FeatureExtractor
|
|
10
|
+
from signalflow.core import RawDataView, RawDataType, DataFrameType
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class FeatureSet:
|
|
15
|
+
"""Polars-first orchestrator for multiple feature extractors.
|
|
16
|
+
|
|
17
|
+
Combines independent feature extractors via outer join on (pair, timestamp).
|
|
18
|
+
Each extractor fetches its required data, computes features, and results
|
|
19
|
+
are merged into single DataFrame.
|
|
20
|
+
|
|
21
|
+
Key features:
|
|
22
|
+
- Automatic data fetching per extractor
|
|
23
|
+
- Timezone normalization (all → naive)
|
|
24
|
+
- Outer join on (pair, timestamp) for alignment
|
|
25
|
+
- Duplicate feature column detection
|
|
26
|
+
- Consistent index columns across extractors
|
|
27
|
+
|
|
28
|
+
Processing flow:
|
|
29
|
+
For each extractor:
|
|
30
|
+
1. Fetch appropriate raw data as Polars
|
|
31
|
+
2. Run extractor.extract()
|
|
32
|
+
3. Normalize timestamps to timezone-naive
|
|
33
|
+
4. Validate index columns present
|
|
34
|
+
Then:
|
|
35
|
+
5. Outer join all results on (pair, timestamp)
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
extractors (list[FeatureExtractor]): Feature extractors to orchestrate.
|
|
39
|
+
parallel (bool): Parallel execution flag (not yet implemented). Default: False.
|
|
40
|
+
pair_col (str): Trading pair column name. Default: "pair".
|
|
41
|
+
ts_col (str): Timestamp column name. Default: "timestamp".
|
|
42
|
+
|
|
43
|
+
Example:
|
|
44
|
+
```python
|
|
45
|
+
from signalflow.feature import FeatureSet, SmaExtractor, RsiExtractor
|
|
46
|
+
|
|
47
|
+
# Create feature set
|
|
48
|
+
feature_set = FeatureSet([
|
|
49
|
+
SmaExtractor(window=10, column="close"),
|
|
50
|
+
SmaExtractor(window=20, column="close"),
|
|
51
|
+
RsiExtractor(window=14, column="close")
|
|
52
|
+
])
|
|
53
|
+
|
|
54
|
+
# Extract all features at once
|
|
55
|
+
from signalflow.core import RawDataView
|
|
56
|
+
view = RawDataView(raw=raw_data)
|
|
57
|
+
features = feature_set.extract(view)
|
|
58
|
+
|
|
59
|
+
# Result has: pair, timestamp, sma_10, sma_20, rsi_14
|
|
60
|
+
print(features.columns)
|
|
61
|
+
# ['pair', 'timestamp', 'sma_10', 'sma_20', 'rsi_14']
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Example:
|
|
65
|
+
```python
|
|
66
|
+
# With multi-timeframe features
|
|
67
|
+
feature_set = FeatureSet([
|
|
68
|
+
# 1-minute features
|
|
69
|
+
SmaExtractor(window=10, offset_window=1),
|
|
70
|
+
# 5-minute features
|
|
71
|
+
SmaExtractor(
|
|
72
|
+
window=10,
|
|
73
|
+
offset_window=5,
|
|
74
|
+
use_resample=True,
|
|
75
|
+
resample_prefix="5m_"
|
|
76
|
+
)
|
|
77
|
+
])
|
|
78
|
+
|
|
79
|
+
features = feature_set.extract(view)
|
|
80
|
+
# Has both 1m and 5m features aligned
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Note:
|
|
84
|
+
All extractors must use same pair_col and ts_col.
|
|
85
|
+
Feature column names must be unique across extractors.
|
|
86
|
+
Timestamps automatically normalized to timezone-naive.
|
|
87
|
+
|
|
88
|
+
See Also:
|
|
89
|
+
FeatureExtractor: Base class for individual extractors.
|
|
90
|
+
RawDataView: Provides data in required format.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
extractors: list[FeatureExtractor]
|
|
94
|
+
parallel: bool = False
|
|
95
|
+
|
|
96
|
+
pair_col: str = "pair"
|
|
97
|
+
ts_col: str = "timestamp"
|
|
98
|
+
|
|
99
|
+
def __post_init__(self) -> None:
|
|
100
|
+
"""Validate extractors configuration.
|
|
101
|
+
|
|
102
|
+
Checks:
|
|
103
|
+
- At least one extractor provided
|
|
104
|
+
- All extractors use same pair_col
|
|
105
|
+
- All extractors use same ts_col
|
|
106
|
+
|
|
107
|
+
Raises:
|
|
108
|
+
ValueError: If validation fails.
|
|
109
|
+
"""
|
|
110
|
+
if not self.extractors:
|
|
111
|
+
raise ValueError("At least one extractor must be provided")
|
|
112
|
+
|
|
113
|
+
for ex in self.extractors:
|
|
114
|
+
if getattr(ex, "pair_col", self.pair_col) != self.pair_col:
|
|
115
|
+
raise ValueError(
|
|
116
|
+
f"All extractors must use pair_col='{self.pair_col}'. "
|
|
117
|
+
f"{ex.__class__.__name__} uses '{getattr(ex, 'pair_col', None)}'"
|
|
118
|
+
)
|
|
119
|
+
if getattr(ex, "ts_col", self.ts_col) != self.ts_col:
|
|
120
|
+
raise ValueError(
|
|
121
|
+
f"All extractors must use ts_col='{self.ts_col}'. "
|
|
122
|
+
f"{ex.__class__.__name__} uses '{getattr(ex, 'ts_col', None)}'"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
def extract(self, raw_data: RawDataView, context: dict[str, Any] | None = None) -> pl.DataFrame:
|
|
126
|
+
"""Extract and combine features from all extractors.
|
|
127
|
+
|
|
128
|
+
Main entry point - orchestrates extraction and merging.
|
|
129
|
+
|
|
130
|
+
Processing:
|
|
131
|
+
1. For each extractor:
|
|
132
|
+
- Fetch appropriate data format
|
|
133
|
+
- Run extraction
|
|
134
|
+
- Normalize timestamps
|
|
135
|
+
- Validate output
|
|
136
|
+
2. Outer join all results on (pair, timestamp)
|
|
137
|
+
3. Detect duplicate feature columns
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
raw_data (RawDataView): View to raw market data.
|
|
141
|
+
context (dict[str, Any] | None): Additional context passed to extractors.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
pl.DataFrame: Combined features with columns:
|
|
145
|
+
- pair, timestamp (index)
|
|
146
|
+
- feature columns from all extractors
|
|
147
|
+
|
|
148
|
+
Raises:
|
|
149
|
+
ValueError: If no extractors or duplicate feature columns.
|
|
150
|
+
TypeError: If extractor doesn't return pl.DataFrame.
|
|
151
|
+
|
|
152
|
+
Example:
|
|
153
|
+
```python
|
|
154
|
+
from signalflow.core import RawData, RawDataView
|
|
155
|
+
|
|
156
|
+
# Create view
|
|
157
|
+
view = RawDataView(raw=raw_data)
|
|
158
|
+
|
|
159
|
+
# Extract features
|
|
160
|
+
features = feature_set.extract(view)
|
|
161
|
+
|
|
162
|
+
# Check result
|
|
163
|
+
print(f"Features: {features.columns}")
|
|
164
|
+
print(f"Shape: {features.shape}")
|
|
165
|
+
|
|
166
|
+
# With context
|
|
167
|
+
features = feature_set.extract(
|
|
168
|
+
view,
|
|
169
|
+
context={"lookback_bars": 100}
|
|
170
|
+
)
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
Note:
|
|
174
|
+
Outer join means all (pair, timestamp) combinations preserved.
|
|
175
|
+
Missing features filled with null for non-matching timestamps.
|
|
176
|
+
"""
|
|
177
|
+
feature_dfs: list[pl.DataFrame] = []
|
|
178
|
+
|
|
179
|
+
for extractor in self.extractors:
|
|
180
|
+
input_df = self._get_input_df(raw_data, extractor)
|
|
181
|
+
|
|
182
|
+
result_df = extractor.extract(input_df, data_context=context)
|
|
183
|
+
if not isinstance(result_df, pl.DataFrame):
|
|
184
|
+
raise TypeError(
|
|
185
|
+
f"{extractor.__class__.__name__}.extract must return pl.DataFrame, got {type(result_df)}"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
result_df = self._normalize_index(result_df)
|
|
189
|
+
|
|
190
|
+
if self.pair_col not in result_df.columns or self.ts_col not in result_df.columns:
|
|
191
|
+
raise ValueError(
|
|
192
|
+
f"{extractor.__class__.__name__} returned no index columns "
|
|
193
|
+
f"('{self.pair_col}', '{self.ts_col}'). "
|
|
194
|
+
f"FeatureSet requires index columns to combine features."
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
feature_dfs.append(result_df)
|
|
198
|
+
|
|
199
|
+
return self._combine_features(feature_dfs)
|
|
200
|
+
|
|
201
|
+
def _get_input_df(self, raw_data: RawDataView, extractor: FeatureExtractor) -> pl.DataFrame:
|
|
202
|
+
"""Fetch input data for extractor in Polars format.
|
|
203
|
+
|
|
204
|
+
Determines required data type from extractor.raw_data_type and
|
|
205
|
+
fetches as Polars DataFrame (canonical format).
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
raw_data (RawDataView): Data view.
|
|
209
|
+
extractor (FeatureExtractor): Extractor needing data.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
pl.DataFrame: Raw data in Polars format.
|
|
213
|
+
|
|
214
|
+
Note:
|
|
215
|
+
Always returns Polars (Polars-first design).
|
|
216
|
+
Falls back to string "polars" for backward compatibility.
|
|
217
|
+
"""
|
|
218
|
+
raw_data_type = getattr(extractor, "raw_data_type", RawDataType.SPOT)
|
|
219
|
+
|
|
220
|
+
try:
|
|
221
|
+
return raw_data.get_data(raw_data_type, DataFrameType.POLARS)
|
|
222
|
+
except TypeError:
|
|
223
|
+
return raw_data.get_data(raw_data_type, "polars")
|
|
224
|
+
|
|
225
|
+
def _normalize_index(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
226
|
+
"""Normalize timestamp to timezone-naive.
|
|
227
|
+
|
|
228
|
+
Ensures consistent timezone handling across all extractors.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
df (pl.DataFrame): DataFrame to normalize.
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
pl.DataFrame: DataFrame with timezone-naive timestamps.
|
|
235
|
+
"""
|
|
236
|
+
if self.ts_col in df.columns:
|
|
237
|
+
ts_dtype = df.schema.get(self.ts_col)
|
|
238
|
+
if isinstance(ts_dtype, pl.Datetime) and ts_dtype.time_zone is not None:
|
|
239
|
+
df = df.with_columns(pl.col(self.ts_col).dt.replace_time_zone(None))
|
|
240
|
+
return df
|
|
241
|
+
|
|
242
|
+
def _combine_features(self, feature_dfs: list[pl.DataFrame]) -> pl.DataFrame:
|
|
243
|
+
"""Combine feature DataFrames via outer join.
|
|
244
|
+
|
|
245
|
+
Merges all feature DataFrames on (pair, timestamp) index.
|
|
246
|
+
Detects and rejects duplicate feature column names.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
feature_dfs (list[pl.DataFrame]): Feature DataFrames to combine.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
pl.DataFrame: Combined features with outer join semantics.
|
|
253
|
+
|
|
254
|
+
Raises:
|
|
255
|
+
ValueError: If no DataFrames or duplicate feature columns found.
|
|
256
|
+
|
|
257
|
+
Example:
|
|
258
|
+
```python
|
|
259
|
+
# Internal usage
|
|
260
|
+
df1 = pl.DataFrame({"pair": ["BTC"], "timestamp": [t1], "sma_10": [45000]})
|
|
261
|
+
df2 = pl.DataFrame({"pair": ["BTC"], "timestamp": [t1], "rsi_14": [50]})
|
|
262
|
+
combined = self._combine_features([df1, df2])
|
|
263
|
+
# Result: pair, timestamp, sma_10, rsi_14
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
Note:
|
|
267
|
+
Outer join preserves all (pair, timestamp) from all extractors.
|
|
268
|
+
Duplicate columns trigger error - use unique prefixes.
|
|
269
|
+
"""
|
|
270
|
+
if not feature_dfs:
|
|
271
|
+
raise ValueError("No feature DataFrames to combine")
|
|
272
|
+
|
|
273
|
+
combined = feature_dfs[0]
|
|
274
|
+
|
|
275
|
+
for right in feature_dfs[1:]:
|
|
276
|
+
right_feature_cols = [c for c in right.columns if c not in (self.pair_col, self.ts_col)]
|
|
277
|
+
dup = set(right_feature_cols).intersection(set(combined.columns))
|
|
278
|
+
if dup:
|
|
279
|
+
raise ValueError(
|
|
280
|
+
f"Duplicate feature columns during FeatureSet combine: {sorted(dup)}. "
|
|
281
|
+
f"Rename features or set unique prefixes."
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
combined = combined.join(right, on=[self.pair_col, self.ts_col], how="outer", coalesce=True)
|
|
285
|
+
|
|
286
|
+
return combined
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# IMPORTANT
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
import polars as pl
|
|
5
|
+
from signalflow.feature.base import FeatureExtractor
|
|
6
|
+
from signalflow.core import sf_component
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
@sf_component(name="rsi")
|
|
11
|
+
class RsiExtractor(FeatureExtractor):
|
|
12
|
+
rsi_period: int = 14
|
|
13
|
+
price_col: str = "close"
|
|
14
|
+
out_col: str = "rsi"
|
|
15
|
+
use_resample:bool = True
|
|
16
|
+
|
|
17
|
+
def compute_group(self, group_df: pl.DataFrame, data_context: dict | None) -> pl.DataFrame:
|
|
18
|
+
price = pl.col(self.price_col)
|
|
19
|
+
delta = price.diff()
|
|
20
|
+
|
|
21
|
+
gain = delta.clip(lower_bound=0.0)
|
|
22
|
+
loss = (-delta).clip(lower_bound=0.0)
|
|
23
|
+
|
|
24
|
+
avg_gain = gain.rolling_mean(
|
|
25
|
+
window_size=self.rsi_period,
|
|
26
|
+
min_samples=self.rsi_period,
|
|
27
|
+
)
|
|
28
|
+
avg_loss = loss.rolling_mean(
|
|
29
|
+
window_size=self.rsi_period,
|
|
30
|
+
min_samples=self.rsi_period,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
rs = avg_gain / avg_loss
|
|
35
|
+
|
|
36
|
+
rsi = (
|
|
37
|
+
pl.when((avg_loss == 0) & (avg_gain == 0)).then(50.0)
|
|
38
|
+
.when(avg_loss == 0).then(100.0)
|
|
39
|
+
.otherwise(100.0 - (100.0 / (1.0 + rs)))
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
return group_df.with_columns(rsi.alias(self.out_col))
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from signalflow.feature.pandasta.pandas_ta_extractor import PandasTaExtractor
|
|
2
|
+
from signalflow.feature.pandasta.top_pandasta_extractors import PandasTaRsiExtractor, PandasTaBbandsExtractor, PandasTaMacdExtractor, PandasTaAtrExtractor
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"PandasTaExtractor",
|
|
6
|
+
"PandasTaRsiExtractor",
|
|
7
|
+
"PandasTaBbandsExtractor",
|
|
8
|
+
"PandasTaMacdExtractor",
|
|
9
|
+
"PandasTaAtrExtractor",
|
|
10
|
+
]
|