signalflow-trading 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- signalflow/__init__.py +21 -0
- signalflow/analytics/__init__.py +0 -0
- signalflow/core/__init__.py +46 -0
- signalflow/core/base_mixin.py +232 -0
- signalflow/core/containers/__init__.py +21 -0
- signalflow/core/containers/order.py +216 -0
- signalflow/core/containers/portfolio.py +211 -0
- signalflow/core/containers/position.py +296 -0
- signalflow/core/containers/raw_data.py +167 -0
- signalflow/core/containers/raw_data_view.py +169 -0
- signalflow/core/containers/signals.py +198 -0
- signalflow/core/containers/strategy_state.py +147 -0
- signalflow/core/containers/trade.py +112 -0
- signalflow/core/decorators.py +103 -0
- signalflow/core/enums.py +270 -0
- signalflow/core/registry.py +322 -0
- signalflow/core/rolling_aggregator.py +362 -0
- signalflow/core/signal_transforms/__init__.py +5 -0
- signalflow/core/signal_transforms/base_signal_transform.py +186 -0
- signalflow/data/__init__.py +11 -0
- signalflow/data/raw_data_factory.py +225 -0
- signalflow/data/raw_store/__init__.py +7 -0
- signalflow/data/raw_store/base.py +271 -0
- signalflow/data/raw_store/duckdb_stores.py +696 -0
- signalflow/data/source/__init__.py +10 -0
- signalflow/data/source/base.py +300 -0
- signalflow/data/source/binance.py +442 -0
- signalflow/data/strategy_store/__init__.py +8 -0
- signalflow/data/strategy_store/base.py +278 -0
- signalflow/data/strategy_store/duckdb.py +409 -0
- signalflow/data/strategy_store/schema.py +36 -0
- signalflow/detector/__init__.py +7 -0
- signalflow/detector/adapter/__init__.py +5 -0
- signalflow/detector/adapter/pandas_detector.py +46 -0
- signalflow/detector/base.py +390 -0
- signalflow/detector/sma_cross.py +105 -0
- signalflow/feature/__init__.py +16 -0
- signalflow/feature/adapter/__init__.py +5 -0
- signalflow/feature/adapter/pandas_feature_extractor.py +54 -0
- signalflow/feature/base.py +330 -0
- signalflow/feature/feature_set.py +286 -0
- signalflow/feature/oscillator/__init__.py +5 -0
- signalflow/feature/oscillator/rsi_extractor.py +42 -0
- signalflow/feature/pandasta/__init__.py +10 -0
- signalflow/feature/pandasta/pandas_ta_extractor.py +141 -0
- signalflow/feature/pandasta/top_pandasta_extractors.py +64 -0
- signalflow/feature/smoother/__init__.py +5 -0
- signalflow/feature/smoother/sma_extractor.py +46 -0
- signalflow/strategy/__init__.py +9 -0
- signalflow/strategy/broker/__init__.py +15 -0
- signalflow/strategy/broker/backtest.py +172 -0
- signalflow/strategy/broker/base.py +186 -0
- signalflow/strategy/broker/executor/__init__.py +9 -0
- signalflow/strategy/broker/executor/base.py +35 -0
- signalflow/strategy/broker/executor/binance_spot.py +12 -0
- signalflow/strategy/broker/executor/virtual_spot.py +81 -0
- signalflow/strategy/broker/realtime_spot.py +12 -0
- signalflow/strategy/component/__init__.py +9 -0
- signalflow/strategy/component/base.py +65 -0
- signalflow/strategy/component/entry/__init__.py +7 -0
- signalflow/strategy/component/entry/fixed_size.py +57 -0
- signalflow/strategy/component/entry/signal.py +127 -0
- signalflow/strategy/component/exit/__init__.py +5 -0
- signalflow/strategy/component/exit/time_based.py +47 -0
- signalflow/strategy/component/exit/tp_sl.py +80 -0
- signalflow/strategy/component/metric/__init__.py +8 -0
- signalflow/strategy/component/metric/main_metrics.py +181 -0
- signalflow/strategy/runner/__init__.py +8 -0
- signalflow/strategy/runner/backtest_runner.py +208 -0
- signalflow/strategy/runner/base.py +19 -0
- signalflow/strategy/runner/optimized_backtest_runner.py +178 -0
- signalflow/strategy/runner/realtime_runner.py +0 -0
- signalflow/target/__init__.py +14 -0
- signalflow/target/adapter/__init__.py +5 -0
- signalflow/target/adapter/pandas_labeler.py +45 -0
- signalflow/target/base.py +409 -0
- signalflow/target/fixed_horizon_labeler.py +93 -0
- signalflow/target/static_triple_barrier.py +162 -0
- signalflow/target/triple_barrier.py +188 -0
- signalflow/utils/__init__.py +7 -0
- signalflow/utils/import_utils.py +11 -0
- signalflow/utils/tune_utils.py +19 -0
- signalflow/validator/__init__.py +6 -0
- signalflow/validator/base.py +139 -0
- signalflow/validator/sklearn_validator.py +527 -0
- signalflow_trading-0.2.1.dist-info/METADATA +149 -0
- signalflow_trading-0.2.1.dist-info/RECORD +90 -0
- signalflow_trading-0.2.1.dist-info/WHEEL +5 -0
- signalflow_trading-0.2.1.dist-info/licenses/LICENSE +21 -0
- signalflow_trading-0.2.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
import polars as pl
|
|
7
|
+
from signalflow.core.enums import RawDataType
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class RollingAggregator:
|
|
12
|
+
"""Offset (sliding) resampler for raw market data.
|
|
13
|
+
|
|
14
|
+
Computes rolling aggregates over a sliding window of bars for each trading pair.
|
|
15
|
+
For each row at time t, aggregates over the last `offset_window` rows: [t-(k-1), ..., t].
|
|
16
|
+
|
|
17
|
+
Key features:
|
|
18
|
+
- Length-preserving: len(output) == len(input)
|
|
19
|
+
- Per-pair processing: Each pair aggregated independently
|
|
20
|
+
- First (k-1) rows per pair → nulls (min_periods=k)
|
|
21
|
+
- (pair, timestamp) columns preserved
|
|
22
|
+
|
|
23
|
+
Use cases:
|
|
24
|
+
- Create higher timeframe features (e.g., 5m bars from 1m bars)
|
|
25
|
+
- Smooth noise with rolling aggregates
|
|
26
|
+
- Generate multi-timeframe features for models
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
offset_window (int): Number of bars in sliding window. Must be > 0.
|
|
30
|
+
ts_col (str): Timestamp column name. Default: "timestamp".
|
|
31
|
+
pair_col (str): Trading pair column name. Default: "pair".
|
|
32
|
+
mode (Literal["add", "replace"]): Output mode:
|
|
33
|
+
- "add": Add resampled columns with prefix
|
|
34
|
+
- "replace": Replace original OHLC columns
|
|
35
|
+
prefix (str | None): Prefix for output columns in "add" mode.
|
|
36
|
+
Default: "rs_{offset_window}m_"
|
|
37
|
+
raw_data_type (RawDataType): Type of raw data. Default: SPOT.
|
|
38
|
+
OFFSET_COL (str): Column name for offset tracking. Default: "resample_offset".
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
```python
|
|
42
|
+
from signalflow.core import RollingAggregator
|
|
43
|
+
import polars as pl
|
|
44
|
+
|
|
45
|
+
# Create 5-minute bars from 1-minute bars
|
|
46
|
+
aggregator = RollingAggregator(
|
|
47
|
+
offset_window=5,
|
|
48
|
+
mode="add",
|
|
49
|
+
prefix="5m_"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# Resample data
|
|
53
|
+
df_resampled = aggregator.resample(spot_df)
|
|
54
|
+
|
|
55
|
+
# Result has both 1m and 5m data
|
|
56
|
+
print(df_resampled.columns)
|
|
57
|
+
# ['pair', 'timestamp', 'open', 'high', 'low', 'close', 'volume',
|
|
58
|
+
# '5m_open', '5m_high', '5m_low', '5m_close', '5m_volume']
|
|
59
|
+
|
|
60
|
+
# Replace mode - output only 5m bars
|
|
61
|
+
aggregator_replace = RollingAggregator(
|
|
62
|
+
offset_window=5,
|
|
63
|
+
mode="replace"
|
|
64
|
+
)
|
|
65
|
+
df_5m = aggregator_replace.resample(spot_df)
|
|
66
|
+
|
|
67
|
+
# Add offset column for tracking
|
|
68
|
+
df_with_offset = aggregator.add_offset_column(spot_df)
|
|
69
|
+
print(df_with_offset["resample_offset"]) # 0, 1, 2, 3, 4, 0, 1, ...
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Note:
|
|
73
|
+
Currently only supports SPOT data type (OHLCV).
|
|
74
|
+
First (k-1) rows per pair will have null values for resampled columns.
|
|
75
|
+
DataFrame must be sorted by (pair, timestamp) - automatic sorting applied.
|
|
76
|
+
|
|
77
|
+
See Also:
|
|
78
|
+
FeatureExtractor: For extracting features from resampled data.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
offset_window: int = 1
|
|
82
|
+
ts_col: str = "timestamp"
|
|
83
|
+
pair_col: str = "pair"
|
|
84
|
+
mode: Literal["add", "replace"] = "replace"
|
|
85
|
+
prefix: str | None = None
|
|
86
|
+
raw_data_type: RawDataType = RawDataType.SPOT
|
|
87
|
+
|
|
88
|
+
OFFSET_COL: str = "resample_offset"
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def out_prefix(self) -> str:
|
|
92
|
+
"""Get output prefix for resampled columns.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
str: Prefix for output columns. Uses custom prefix if provided,
|
|
96
|
+
otherwise defaults to "rs_{offset_window}m_".
|
|
97
|
+
|
|
98
|
+
Example:
|
|
99
|
+
```python
|
|
100
|
+
# Default prefix
|
|
101
|
+
agg = RollingAggregator(offset_window=5)
|
|
102
|
+
assert agg.out_prefix == "rs_5m_"
|
|
103
|
+
|
|
104
|
+
# Custom prefix
|
|
105
|
+
agg = RollingAggregator(offset_window=5, prefix="5min_")
|
|
106
|
+
assert agg.out_prefix == "5min_"
|
|
107
|
+
```
|
|
108
|
+
"""
|
|
109
|
+
return self.prefix if self.prefix is not None else f"rs_{self.offset_window}m_"
|
|
110
|
+
|
|
111
|
+
def _validate_base(self, df: pl.DataFrame) -> None:
|
|
112
|
+
"""Validate base requirements for DataFrame.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
df (pl.DataFrame): DataFrame to validate.
|
|
116
|
+
|
|
117
|
+
Raises:
|
|
118
|
+
ValueError: If offset_window <= 0.
|
|
119
|
+
ValueError: If required columns (ts_col, pair_col) are missing.
|
|
120
|
+
"""
|
|
121
|
+
if self.offset_window <= 0:
|
|
122
|
+
raise ValueError(f"offset_window must be > 0, got {self.offset_window}")
|
|
123
|
+
if self.ts_col not in df.columns:
|
|
124
|
+
raise ValueError(f"Missing '{self.ts_col}' column")
|
|
125
|
+
if self.pair_col not in df.columns:
|
|
126
|
+
raise ValueError(f"Missing '{self.pair_col}' column")
|
|
127
|
+
|
|
128
|
+
def add_offset_column(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
129
|
+
"""Add offset column for tracking position within window.
|
|
130
|
+
|
|
131
|
+
Computes: timestamp.minute() % offset_window
|
|
132
|
+
|
|
133
|
+
Useful for:
|
|
134
|
+
- Debugging resampling logic
|
|
135
|
+
- Identifying bar position within window (0, 1, 2, ..., k-1)
|
|
136
|
+
- Aligning multiple dataframes
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
df (pl.DataFrame): Input DataFrame with timestamp column.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
pl.DataFrame: DataFrame with added offset column.
|
|
143
|
+
|
|
144
|
+
Raises:
|
|
145
|
+
ValueError: If validation fails (missing columns, invalid window).
|
|
146
|
+
|
|
147
|
+
Example:
|
|
148
|
+
```python
|
|
149
|
+
# Add offset for 5-minute windows
|
|
150
|
+
agg = RollingAggregator(offset_window=5)
|
|
151
|
+
df_with_offset = agg.add_offset_column(spot_df)
|
|
152
|
+
|
|
153
|
+
# Offset cycles: 0, 1, 2, 3, 4, 0, 1, 2, ...
|
|
154
|
+
print(df_with_offset["resample_offset"].to_list()[:10])
|
|
155
|
+
# [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]
|
|
156
|
+
|
|
157
|
+
# Filter by specific offset
|
|
158
|
+
df_offset_0 = df_with_offset.filter(
|
|
159
|
+
pl.col("resample_offset") == 0
|
|
160
|
+
)
|
|
161
|
+
```
|
|
162
|
+
"""
|
|
163
|
+
self._validate_base(df)
|
|
164
|
+
|
|
165
|
+
return df.with_columns(
|
|
166
|
+
(pl.col(self.ts_col).dt.minute() % pl.lit(self.offset_window)).cast(pl.Int64).alias(self.OFFSET_COL)
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def get_last_offset(self, df: pl.DataFrame) -> int:
|
|
170
|
+
"""Get offset value of last timestamp in DataFrame.
|
|
171
|
+
|
|
172
|
+
Useful for:
|
|
173
|
+
- Determining current position in resampling window
|
|
174
|
+
- Synchronizing multiple dataframes
|
|
175
|
+
- Tracking resampling state
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
df (pl.DataFrame): Input DataFrame with timestamp column.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
int: Offset value (0 to offset_window-1).
|
|
182
|
+
|
|
183
|
+
Raises:
|
|
184
|
+
ValueError: If DataFrame is empty or validation fails.
|
|
185
|
+
|
|
186
|
+
Example:
|
|
187
|
+
```python
|
|
188
|
+
agg = RollingAggregator(offset_window=5)
|
|
189
|
+
|
|
190
|
+
# Check current offset
|
|
191
|
+
last_offset = agg.get_last_offset(spot_df)
|
|
192
|
+
print(f"Current offset: {last_offset}") # 0-4
|
|
193
|
+
|
|
194
|
+
# Wait for window completion
|
|
195
|
+
if last_offset == 4:
|
|
196
|
+
print("Window complete, ready to resample")
|
|
197
|
+
```
|
|
198
|
+
"""
|
|
199
|
+
self._validate_base(df)
|
|
200
|
+
if df.is_empty():
|
|
201
|
+
raise ValueError("Empty dataframe")
|
|
202
|
+
|
|
203
|
+
last_ts = df.select(pl.col(self.ts_col).max()).item()
|
|
204
|
+
return int(last_ts.minute % self.offset_window)
|
|
205
|
+
|
|
206
|
+
def _spot_validate(self, cols: list[str]) -> None:
|
|
207
|
+
"""Validate SPOT data requirements.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
cols (list[str]): Column names in DataFrame.
|
|
211
|
+
|
|
212
|
+
Raises:
|
|
213
|
+
ValueError: If required OHLC columns are missing.
|
|
214
|
+
"""
|
|
215
|
+
required = {"open", "high", "low", "close"}
|
|
216
|
+
missing = required - set(cols)
|
|
217
|
+
if missing:
|
|
218
|
+
raise ValueError(f"spot resample requires columns {sorted(required)}; missing {sorted(missing)}")
|
|
219
|
+
|
|
220
|
+
def resample(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
221
|
+
"""Resample DataFrame using rolling window aggregation.
|
|
222
|
+
|
|
223
|
+
Aggregation rules for SPOT data:
|
|
224
|
+
- open: Value from (k-1) bars ago (shifted)
|
|
225
|
+
- high: Maximum over window
|
|
226
|
+
- low: Minimum over window
|
|
227
|
+
- close: Current value (no aggregation)
|
|
228
|
+
- volume: Sum over window (if present)
|
|
229
|
+
- trades: Sum over window (if present)
|
|
230
|
+
|
|
231
|
+
Processing:
|
|
232
|
+
1. Sort by (pair, timestamp)
|
|
233
|
+
2. Add offset column if needed
|
|
234
|
+
3. Apply rolling aggregations per pair
|
|
235
|
+
4. Return length-preserving result
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
df (pl.DataFrame): Input DataFrame with OHLCV data.
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
pl.DataFrame: Resampled DataFrame. Length equals input length.
|
|
242
|
+
|
|
243
|
+
Raises:
|
|
244
|
+
NotImplementedError: If raw_data_type is not SPOT.
|
|
245
|
+
ValueError: If required columns missing or output length mismatch.
|
|
246
|
+
|
|
247
|
+
Example:
|
|
248
|
+
```python
|
|
249
|
+
import polars as pl
|
|
250
|
+
from datetime import datetime, timedelta
|
|
251
|
+
|
|
252
|
+
# Create 1-minute bars
|
|
253
|
+
df = pl.DataFrame({
|
|
254
|
+
"pair": ["BTCUSDT"] * 10,
|
|
255
|
+
"timestamp": [
|
|
256
|
+
datetime(2024, 1, 1, 10, i)
|
|
257
|
+
for i in range(10)
|
|
258
|
+
],
|
|
259
|
+
"open": [45000 + i*10 for i in range(10)],
|
|
260
|
+
"high": [45100 + i*10 for i in range(10)],
|
|
261
|
+
"low": [44900 + i*10 for i in range(10)],
|
|
262
|
+
"close": [45050 + i*10 for i in range(10)],
|
|
263
|
+
"volume": [100.0] * 10
|
|
264
|
+
})
|
|
265
|
+
|
|
266
|
+
# Create 5-minute bars (add mode)
|
|
267
|
+
agg = RollingAggregator(offset_window=5, mode="add")
|
|
268
|
+
df_resampled = agg.resample(df)
|
|
269
|
+
|
|
270
|
+
# First 4 rows have nulls for 5m columns
|
|
271
|
+
print(df_resampled.filter(pl.col("5m_open").is_null()).height) # 4
|
|
272
|
+
|
|
273
|
+
# From row 5 onwards, 5m data available
|
|
274
|
+
print(df_resampled[5:])
|
|
275
|
+
# 5m_open = open from 5 bars ago
|
|
276
|
+
# 5m_high = max(high) over last 5 bars
|
|
277
|
+
# 5m_low = min(low) over last 5 bars
|
|
278
|
+
# 5m_close = current close
|
|
279
|
+
# 5m_volume = sum(volume) over last 5 bars
|
|
280
|
+
|
|
281
|
+
# Replace mode - output only resampled columns
|
|
282
|
+
agg_replace = RollingAggregator(offset_window=5, mode="replace")
|
|
283
|
+
df_5m = agg_replace.resample(df)
|
|
284
|
+
print(df_5m.columns) # ['pair', 'timestamp', 'open', 'high', 'low', 'close', 'volume']
|
|
285
|
+
# But values are 5-minute aggregates
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
Note:
|
|
289
|
+
First (k-1) rows per pair will have null values (min_periods=k).
|
|
290
|
+
Input DataFrame is automatically sorted by (pair, timestamp).
|
|
291
|
+
Volume and trades columns are optional but recommended.
|
|
292
|
+
"""
|
|
293
|
+
if self.raw_data_type != RawDataType.SPOT:
|
|
294
|
+
raise NotImplementedError("Currently resample() implemented for data_type='spot' only")
|
|
295
|
+
|
|
296
|
+
self._validate_base(df)
|
|
297
|
+
self._spot_validate(df.columns)
|
|
298
|
+
|
|
299
|
+
df0 = df.sort([self.pair_col, self.ts_col])
|
|
300
|
+
|
|
301
|
+
if self.OFFSET_COL not in df0.columns:
|
|
302
|
+
df0 = self.add_offset_column(df0)
|
|
303
|
+
|
|
304
|
+
k = int(self.offset_window)
|
|
305
|
+
pfx = self.out_prefix
|
|
306
|
+
over = [self.pair_col]
|
|
307
|
+
|
|
308
|
+
rs_open = pl.col("open").shift(k - 1).over(over)
|
|
309
|
+
rs_high = pl.col("high").rolling_max(window_size=k, min_periods=k).over(over)
|
|
310
|
+
rs_low = pl.col("low").rolling_min(window_size=k, min_periods=k).over(over)
|
|
311
|
+
rs_close = pl.col("close")
|
|
312
|
+
|
|
313
|
+
has_volume = "volume" in df0.columns
|
|
314
|
+
has_trades = "trades" in df0.columns
|
|
315
|
+
|
|
316
|
+
if self.mode == "add":
|
|
317
|
+
exprs: list[pl.Expr] = [
|
|
318
|
+
rs_open.alias(f"{pfx}open"),
|
|
319
|
+
rs_high.alias(f"{pfx}high"),
|
|
320
|
+
rs_low.alias(f"{pfx}low"),
|
|
321
|
+
rs_close.alias(f"{pfx}close"),
|
|
322
|
+
]
|
|
323
|
+
if has_volume:
|
|
324
|
+
exprs.append(
|
|
325
|
+
pl.col("volume")
|
|
326
|
+
.rolling_sum(window_size=k, min_periods=k)
|
|
327
|
+
.over(over)
|
|
328
|
+
.alias(f"{pfx}volume")
|
|
329
|
+
)
|
|
330
|
+
if has_trades:
|
|
331
|
+
exprs.append(
|
|
332
|
+
pl.col("trades")
|
|
333
|
+
.rolling_sum(window_size=k, min_periods=k)
|
|
334
|
+
.over(over)
|
|
335
|
+
.alias(f"{pfx}trades")
|
|
336
|
+
)
|
|
337
|
+
out = df0.with_columns(exprs)
|
|
338
|
+
|
|
339
|
+
elif self.mode == "replace":
|
|
340
|
+
exprs2: list[pl.Expr] = [
|
|
341
|
+
rs_open.alias("open"),
|
|
342
|
+
rs_high.alias("high"),
|
|
343
|
+
rs_low.alias("low"),
|
|
344
|
+
rs_close.alias("close"),
|
|
345
|
+
]
|
|
346
|
+
if has_volume:
|
|
347
|
+
exprs2.append(
|
|
348
|
+
pl.col("volume").rolling_sum(window_size=k, min_periods=k).over(over).alias("volume")
|
|
349
|
+
)
|
|
350
|
+
if has_trades:
|
|
351
|
+
exprs2.append(
|
|
352
|
+
pl.col("trades").rolling_sum(window_size=k, min_periods=k).over(over).alias("trades")
|
|
353
|
+
)
|
|
354
|
+
out = df0.with_columns(exprs2)
|
|
355
|
+
|
|
356
|
+
else:
|
|
357
|
+
raise ValueError(f"Unknown mode: {self.mode}")
|
|
358
|
+
|
|
359
|
+
if out.height != df.height:
|
|
360
|
+
raise ValueError(f"resample(pl): len(out)={out.height} != len(in)={df.height}")
|
|
361
|
+
|
|
362
|
+
return out
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
from typing import Protocol
|
|
2
|
+
import polars as pl
|
|
3
|
+
from signalflow.core.enums import SfComponentType
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SignalsTransform(Protocol):
|
|
7
|
+
"""Protocol for signal transformations.
|
|
8
|
+
|
|
9
|
+
Defines the interface for functions that transform signal DataFrames.
|
|
10
|
+
Transformations can filter, modify, or augment signals while preserving
|
|
11
|
+
the core schema (pair, timestamp, signal_type, signal, probability).
|
|
12
|
+
|
|
13
|
+
Protocol-based design allows:
|
|
14
|
+
- Any callable with matching signature
|
|
15
|
+
- Functional composition via Signals.pipe()
|
|
16
|
+
- Type checking without inheritance
|
|
17
|
+
|
|
18
|
+
Common use cases:
|
|
19
|
+
- Filter signals by probability threshold
|
|
20
|
+
- Normalize probability values
|
|
21
|
+
- Add metadata columns
|
|
22
|
+
- Remove duplicate signals
|
|
23
|
+
- Apply time-based filters (cooldown periods)
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
name (str): Descriptive name for the transformation.
|
|
27
|
+
component_type (SfComponentType): Always SIGNALS_TRANSFORM for registry.
|
|
28
|
+
|
|
29
|
+
Example:
|
|
30
|
+
```python
|
|
31
|
+
import polars as pl
|
|
32
|
+
from signalflow.core import Signals, SignalsTransform
|
|
33
|
+
|
|
34
|
+
# Simple function-based transform
|
|
35
|
+
def filter_high_probability(df: pl.DataFrame) -> pl.DataFrame:
|
|
36
|
+
'''Keep only signals with probability > 0.7'''
|
|
37
|
+
return df.filter(pl.col("probability") > 0.7)
|
|
38
|
+
|
|
39
|
+
filter_high_probability.name = "filter_high_prob"
|
|
40
|
+
filter_high_probability.component_type = SfComponentType.SIGNALS_TRANSFORM
|
|
41
|
+
|
|
42
|
+
# Class-based transform
|
|
43
|
+
class NormalizeProbability:
|
|
44
|
+
name = "normalize_prob"
|
|
45
|
+
component_type = SfComponentType.SIGNALS_TRANSFORM
|
|
46
|
+
|
|
47
|
+
def __call__(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
48
|
+
'''Normalize probabilities to [0, 1] range'''
|
|
49
|
+
return df.with_columns(
|
|
50
|
+
(pl.col("probability") / pl.col("probability").max())
|
|
51
|
+
.alias("probability")
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Use in signal pipeline
|
|
55
|
+
signals = detector.detect(data)
|
|
56
|
+
|
|
57
|
+
# Single transform
|
|
58
|
+
filtered = signals.apply(filter_high_probability)
|
|
59
|
+
|
|
60
|
+
# Chained transforms
|
|
61
|
+
processed = signals.pipe(
|
|
62
|
+
filter_high_probability,
|
|
63
|
+
NormalizeProbability(),
|
|
64
|
+
add_metadata_transform
|
|
65
|
+
)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Example:
|
|
69
|
+
```python
|
|
70
|
+
# Register transform in registry
|
|
71
|
+
from signalflow.core import sf_component
|
|
72
|
+
|
|
73
|
+
@sf_component(name="cooldown_filter")
|
|
74
|
+
class CooldownFilter:
|
|
75
|
+
component_type = SfComponentType.SIGNALS_TRANSFORM
|
|
76
|
+
name = "cooldown_filter"
|
|
77
|
+
|
|
78
|
+
def __init__(self, cooldown_minutes: int = 60):
|
|
79
|
+
self.cooldown_minutes = cooldown_minutes
|
|
80
|
+
|
|
81
|
+
def __call__(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
82
|
+
'''Filter signals within cooldown period'''
|
|
83
|
+
return (
|
|
84
|
+
df.sort(["pair", "timestamp"])
|
|
85
|
+
.with_columns(
|
|
86
|
+
pl.col("timestamp")
|
|
87
|
+
.diff()
|
|
88
|
+
.over("pair")
|
|
89
|
+
.dt.total_minutes()
|
|
90
|
+
.alias("minutes_since_last")
|
|
91
|
+
)
|
|
92
|
+
.filter(
|
|
93
|
+
(pl.col("minutes_since_last").is_null()) |
|
|
94
|
+
(pl.col("minutes_since_last") >= self.cooldown_minutes)
|
|
95
|
+
)
|
|
96
|
+
.drop("minutes_since_last")
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Use registered transform
|
|
100
|
+
from signalflow.core.registry import default_registry
|
|
101
|
+
|
|
102
|
+
cooldown = default_registry.create(
|
|
103
|
+
SfComponentType.SIGNALS_TRANSFORM,
|
|
104
|
+
"cooldown_filter",
|
|
105
|
+
cooldown_minutes=120
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
filtered_signals = signals.apply(cooldown)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Note:
|
|
112
|
+
Transformations should be pure functions (no side effects).
|
|
113
|
+
Input DataFrame schema should be preserved where possible.
|
|
114
|
+
Return DataFrame with same or compatible schema for chaining.
|
|
115
|
+
|
|
116
|
+
See Also:
|
|
117
|
+
Signals: Container class with apply() and pipe() methods.
|
|
118
|
+
sf_component: Decorator for registering transforms.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
name: str
|
|
122
|
+
component_type: SfComponentType = SfComponentType.SIGNALS_TRANSFORM
|
|
123
|
+
|
|
124
|
+
def __call__(self, value: pl.DataFrame) -> pl.DataFrame:
|
|
125
|
+
"""Apply transformation to signals DataFrame.
|
|
126
|
+
|
|
127
|
+
Core method that performs the actual transformation logic.
|
|
128
|
+
Must accept and return Polars DataFrame with signals schema.
|
|
129
|
+
|
|
130
|
+
Expected input schema:
|
|
131
|
+
- pair (str): Trading pair
|
|
132
|
+
- timestamp (datetime): Signal timestamp
|
|
133
|
+
- signal_type (int): SignalType enum value
|
|
134
|
+
- signal (int|float): Signal value
|
|
135
|
+
- probability (float): Signal probability (optional but common)
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
value (pl.DataFrame): Input signals DataFrame with standard schema.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
pl.DataFrame: Transformed signals DataFrame. Should maintain
|
|
142
|
+
compatible schema for chaining with other transforms.
|
|
143
|
+
|
|
144
|
+
Example:
|
|
145
|
+
```python
|
|
146
|
+
# Function-based transform
|
|
147
|
+
def remove_none_signals(df: pl.DataFrame) -> pl.DataFrame:
|
|
148
|
+
from signalflow.core.enums import SignalType
|
|
149
|
+
return df.filter(
|
|
150
|
+
pl.col("signal_type") != SignalType.NONE.value
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# Apply transform
|
|
154
|
+
filtered = signals.apply(remove_none_signals)
|
|
155
|
+
|
|
156
|
+
# Class-based transform with state
|
|
157
|
+
class ThresholdFilter:
|
|
158
|
+
def __init__(self, threshold: float = 0.5):
|
|
159
|
+
self.threshold = threshold
|
|
160
|
+
|
|
161
|
+
def __call__(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
162
|
+
return df.filter(
|
|
163
|
+
pl.col("probability") >= self.threshold
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Use with different thresholds
|
|
167
|
+
filter_50 = ThresholdFilter(threshold=0.5)
|
|
168
|
+
filter_70 = ThresholdFilter(threshold=0.7)
|
|
169
|
+
|
|
170
|
+
signals_50 = signals.apply(filter_50)
|
|
171
|
+
signals_70 = signals.apply(filter_70)
|
|
172
|
+
|
|
173
|
+
# Combine multiple transforms
|
|
174
|
+
processed = signals.pipe(
|
|
175
|
+
remove_none_signals,
|
|
176
|
+
filter_50,
|
|
177
|
+
lambda df: df.sort(["pair", "timestamp"])
|
|
178
|
+
)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
Note:
|
|
182
|
+
Should be deterministic - same input produces same output.
|
|
183
|
+
Avoid modifying input DataFrame (return new DataFrame).
|
|
184
|
+
Consider performance for large datasets (vectorized operations).
|
|
185
|
+
"""
|
|
186
|
+
...
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import signalflow.data.source as source
|
|
2
|
+
import signalflow.data.raw_store as raw_store
|
|
3
|
+
import signalflow.data.strategy_store as strategy_store
|
|
4
|
+
from signalflow.data.raw_data_factory import RawDataFactory
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"source",
|
|
8
|
+
"raw_store",
|
|
9
|
+
"strategy_store",
|
|
10
|
+
"RawDataFactory",
|
|
11
|
+
]
|