signalflow-trading 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. signalflow/__init__.py +21 -0
  2. signalflow/analytics/__init__.py +0 -0
  3. signalflow/core/__init__.py +46 -0
  4. signalflow/core/base_mixin.py +232 -0
  5. signalflow/core/containers/__init__.py +21 -0
  6. signalflow/core/containers/order.py +216 -0
  7. signalflow/core/containers/portfolio.py +211 -0
  8. signalflow/core/containers/position.py +296 -0
  9. signalflow/core/containers/raw_data.py +167 -0
  10. signalflow/core/containers/raw_data_view.py +169 -0
  11. signalflow/core/containers/signals.py +198 -0
  12. signalflow/core/containers/strategy_state.py +147 -0
  13. signalflow/core/containers/trade.py +112 -0
  14. signalflow/core/decorators.py +103 -0
  15. signalflow/core/enums.py +270 -0
  16. signalflow/core/registry.py +322 -0
  17. signalflow/core/rolling_aggregator.py +362 -0
  18. signalflow/core/signal_transforms/__init__.py +5 -0
  19. signalflow/core/signal_transforms/base_signal_transform.py +186 -0
  20. signalflow/data/__init__.py +11 -0
  21. signalflow/data/raw_data_factory.py +225 -0
  22. signalflow/data/raw_store/__init__.py +7 -0
  23. signalflow/data/raw_store/base.py +271 -0
  24. signalflow/data/raw_store/duckdb_stores.py +696 -0
  25. signalflow/data/source/__init__.py +10 -0
  26. signalflow/data/source/base.py +300 -0
  27. signalflow/data/source/binance.py +442 -0
  28. signalflow/data/strategy_store/__init__.py +8 -0
  29. signalflow/data/strategy_store/base.py +278 -0
  30. signalflow/data/strategy_store/duckdb.py +409 -0
  31. signalflow/data/strategy_store/schema.py +36 -0
  32. signalflow/detector/__init__.py +7 -0
  33. signalflow/detector/adapter/__init__.py +5 -0
  34. signalflow/detector/adapter/pandas_detector.py +46 -0
  35. signalflow/detector/base.py +390 -0
  36. signalflow/detector/sma_cross.py +105 -0
  37. signalflow/feature/__init__.py +16 -0
  38. signalflow/feature/adapter/__init__.py +5 -0
  39. signalflow/feature/adapter/pandas_feature_extractor.py +54 -0
  40. signalflow/feature/base.py +330 -0
  41. signalflow/feature/feature_set.py +286 -0
  42. signalflow/feature/oscillator/__init__.py +5 -0
  43. signalflow/feature/oscillator/rsi_extractor.py +42 -0
  44. signalflow/feature/pandasta/__init__.py +10 -0
  45. signalflow/feature/pandasta/pandas_ta_extractor.py +141 -0
  46. signalflow/feature/pandasta/top_pandasta_extractors.py +64 -0
  47. signalflow/feature/smoother/__init__.py +5 -0
  48. signalflow/feature/smoother/sma_extractor.py +46 -0
  49. signalflow/strategy/__init__.py +9 -0
  50. signalflow/strategy/broker/__init__.py +15 -0
  51. signalflow/strategy/broker/backtest.py +172 -0
  52. signalflow/strategy/broker/base.py +186 -0
  53. signalflow/strategy/broker/executor/__init__.py +9 -0
  54. signalflow/strategy/broker/executor/base.py +35 -0
  55. signalflow/strategy/broker/executor/binance_spot.py +12 -0
  56. signalflow/strategy/broker/executor/virtual_spot.py +81 -0
  57. signalflow/strategy/broker/realtime_spot.py +12 -0
  58. signalflow/strategy/component/__init__.py +9 -0
  59. signalflow/strategy/component/base.py +65 -0
  60. signalflow/strategy/component/entry/__init__.py +7 -0
  61. signalflow/strategy/component/entry/fixed_size.py +57 -0
  62. signalflow/strategy/component/entry/signal.py +127 -0
  63. signalflow/strategy/component/exit/__init__.py +5 -0
  64. signalflow/strategy/component/exit/time_based.py +47 -0
  65. signalflow/strategy/component/exit/tp_sl.py +80 -0
  66. signalflow/strategy/component/metric/__init__.py +8 -0
  67. signalflow/strategy/component/metric/main_metrics.py +181 -0
  68. signalflow/strategy/runner/__init__.py +8 -0
  69. signalflow/strategy/runner/backtest_runner.py +208 -0
  70. signalflow/strategy/runner/base.py +19 -0
  71. signalflow/strategy/runner/optimized_backtest_runner.py +178 -0
  72. signalflow/strategy/runner/realtime_runner.py +0 -0
  73. signalflow/target/__init__.py +14 -0
  74. signalflow/target/adapter/__init__.py +5 -0
  75. signalflow/target/adapter/pandas_labeler.py +45 -0
  76. signalflow/target/base.py +409 -0
  77. signalflow/target/fixed_horizon_labeler.py +93 -0
  78. signalflow/target/static_triple_barrier.py +162 -0
  79. signalflow/target/triple_barrier.py +188 -0
  80. signalflow/utils/__init__.py +7 -0
  81. signalflow/utils/import_utils.py +11 -0
  82. signalflow/utils/tune_utils.py +19 -0
  83. signalflow/validator/__init__.py +6 -0
  84. signalflow/validator/base.py +139 -0
  85. signalflow/validator/sklearn_validator.py +527 -0
  86. signalflow_trading-0.2.1.dist-info/METADATA +149 -0
  87. signalflow_trading-0.2.1.dist-info/RECORD +90 -0
  88. signalflow_trading-0.2.1.dist-info/WHEEL +5 -0
  89. signalflow_trading-0.2.1.dist-info/licenses/LICENSE +21 -0
  90. signalflow_trading-0.2.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,36 @@
1
+ # src/signalflow/data/strategy_store/schema.py
2
+ SCHEMA_SQL = """
3
+ CREATE TABLE IF NOT EXISTS strategy_state (
4
+ strategy_id TEXT PRIMARY KEY,
5
+ last_ts TIMESTAMP,
6
+ last_event_id TEXT,
7
+ payload_json TEXT NOT NULL
8
+ );
9
+
10
+ CREATE TABLE IF NOT EXISTS positions (
11
+ strategy_id TEXT NOT NULL,
12
+ ts TIMESTAMP NOT NULL,
13
+ position_id TEXT NOT NULL,
14
+ payload_json TEXT NOT NULL,
15
+ PRIMARY KEY (strategy_id, ts, position_id)
16
+ );
17
+
18
+ CREATE TABLE IF NOT EXISTS trades (
19
+ strategy_id TEXT NOT NULL,
20
+ ts TIMESTAMP NOT NULL,
21
+ trade_id TEXT NOT NULL,
22
+ payload_json TEXT NOT NULL,
23
+ PRIMARY KEY (strategy_id, trade_id)
24
+ );
25
+
26
+ CREATE TABLE IF NOT EXISTS metrics (
27
+ strategy_id TEXT NOT NULL,
28
+ ts TIMESTAMP NOT NULL,
29
+ name TEXT NOT NULL,
30
+ value DOUBLE NOT NULL,
31
+ PRIMARY KEY (strategy_id, ts, name)
32
+ );
33
+
34
+ CREATE INDEX IF NOT EXISTS idx_metrics_strategy_ts ON metrics(strategy_id, ts);
35
+ CREATE INDEX IF NOT EXISTS idx_positions_strategy_ts ON positions(strategy_id, ts);
36
+ """
@@ -0,0 +1,7 @@
1
+ from signalflow.detector.base import SignalDetector
2
+ from signalflow.detector.sma_cross import SmaCrossSignalDetector
3
+
4
+ __all__ = [
5
+ "SignalDetector",
6
+ "SmaCrossSignalDetector",
7
+ ]
@@ -0,0 +1,5 @@
1
+ from signalflow.detector.adapter.pandas_detector import PandasSignalDetector
2
+
3
+ __all__ = [
4
+ "PandasSignalDetector",
5
+ ]
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import abstractmethod
4
+ from dataclasses import dataclass
5
+ from typing import Any
6
+
7
+ import pandas as pd
8
+ import polars as pl
9
+
10
+ from signalflow.core import Signals
11
+ from signalflow.detector.base_detector import SignalDetector
12
+
13
+
14
+ @dataclass
15
+ class PandasSignalDetector(SignalDetector):
16
+ """
17
+ Adapter: pandas-based detector logic, polars-first public interface.
18
+
19
+ Rule:
20
+ - preprocess() still returns pl.DataFrame (from FeatureSet or overridden)
21
+ - detect(pl.DataFrame) converts to pandas -> detect_pd() -> back to pl -> Signals
22
+ """
23
+
24
+ def detect(self, features: pl.DataFrame, context: dict[str, Any] | None = None) -> Signals:
25
+ if not isinstance(features, pl.DataFrame):
26
+ raise TypeError(f"{self.__class__.__name__}.detect expects pl.DataFrame, got {type(features)}")
27
+
28
+ pdf = features.to_pandas()
29
+ out_pd = self.detect_pd(pdf, context=context)
30
+
31
+ if not isinstance(out_pd, pd.DataFrame):
32
+ raise TypeError(f"{self.__class__.__name__}.detect_pd must return pd.DataFrame, got {type(out_pd)}")
33
+
34
+ out_pl = pl.from_pandas(out_pd, include_index=False)
35
+ out_pl = self._normalize_index(out_pl)
36
+ return Signals(out_pl)
37
+
38
+ @abstractmethod
39
+ def detect_pd(self, features: pd.DataFrame, context: dict[str, Any] | None = None) -> pd.DataFrame:
40
+ """
41
+ Pandas detection implementation.
42
+
43
+ Must return a DataFrame with at least:
44
+ - pair, timestamp, signal_type
45
+ """
46
+ raise NotImplementedError
@@ -0,0 +1,390 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass
5
+ from typing import Any, ClassVar
6
+
7
+ import polars as pl
8
+
9
+ from signalflow.core import RawDataView, Signals, SfComponentType, SignalType, RawDataType
10
+ from signalflow.feature import FeatureSet
11
+
12
+
13
+ @dataclass
14
+ class SignalDetector(ABC):
15
+ """Base class for Polars-first signal detection.
16
+
17
+ Provides standardized pipeline for detecting trading signals from raw data:
18
+ 1. preprocess: Extract features from raw data
19
+ 2. detect: Generate signals from features
20
+ 3. validate: Ensure data quality
21
+
22
+ Key features:
23
+ - Polars-native for performance
24
+ - Automatic feature extraction via FeatureSet
25
+ - Built-in validation (schema, duplicates, timezones)
26
+ - Optional probability requirement
27
+ - Keep latest signal per pair option
28
+
29
+ Public API:
30
+ - run(): Complete pipeline (preprocess → detect → validate)
31
+ - preprocess(): Feature extraction (delegates to FeatureSet)
32
+ - detect(): Signal generation (must implement)
33
+
34
+ Attributes:
35
+ component_type (ClassVar[SfComponentType]): Always DETECTOR for registry.
36
+ pair_col (str): Trading pair column name. Default: "pair".
37
+ ts_col (str): Timestamp column name. Default: "timestamp".
38
+ raw_data_type (RawDataType): Type of raw data to process. Default: SPOT.
39
+ feature_set (FeatureSet | None): Feature extractor. Default: None.
40
+ require_probability (bool): Require probability column in signals. Default: False.
41
+ keep_only_latest_per_pair (bool): Keep only latest signal per pair. Default: False.
42
+
43
+ Example:
44
+ ```python
45
+ from signalflow.detector import SignalDetector
46
+ from signalflow.core import Signals, SignalType
47
+ import polars as pl
48
+
49
+ class SmaCrossDetector(SignalDetector):
50
+ '''Simple SMA crossover detector'''
51
+
52
+ def __init__(self, fast_window: int = 10, slow_window: int = 20):
53
+ super().__init__()
54
+ # Auto-generate features
55
+ from signalflow.feature import FeatureSet, SmaExtractor
56
+ self.feature_set = FeatureSet([
57
+ SmaExtractor(window=fast_window, column="close"),
58
+ SmaExtractor(window=slow_window, column="close")
59
+ ])
60
+
61
+ def detect(self, features: pl.DataFrame, context=None) -> Signals:
62
+ signals = features.with_columns([
63
+ # Detect crossover
64
+ (pl.col("sma_10") > pl.col("sma_20")).alias("is_bull"),
65
+ (pl.col("sma_10") < pl.col("sma_20")).alias("is_bear")
66
+ ]).with_columns([
67
+ # Assign signal type
68
+ pl.when(pl.col("is_bull"))
69
+ .then(pl.lit(SignalType.RISE.value))
70
+ .when(pl.col("is_bear"))
71
+ .then(pl.lit(SignalType.FALL.value))
72
+ .otherwise(pl.lit(SignalType.NONE.value))
73
+ .alias("signal_type")
74
+ ]).select([
75
+ self.pair_col,
76
+ self.ts_col,
77
+ "signal_type",
78
+ pl.lit(1).alias("signal")
79
+ ])
80
+
81
+ return Signals(signals)
82
+
83
+ # Usage
84
+ detector = SmaCrossDetector(fast_window=10, slow_window=20)
85
+ signals = detector.run(raw_data_view)
86
+ ```
87
+
88
+ Note:
89
+ Subclasses must implement detect() method.
90
+ All DataFrames must use timezone-naive timestamps.
91
+ Duplicate (pair, timestamp) combinations are rejected.
92
+
93
+ See Also:
94
+ FeatureSet: Orchestrates feature extraction.
95
+ Signals: Container for signal output.
96
+ """
97
+
98
+ component_type: ClassVar[SfComponentType] = SfComponentType.DETECTOR
99
+
100
+ pair_col: str = "pair"
101
+ ts_col: str = "timestamp"
102
+
103
+ raw_data_type: RawDataType = RawDataType.SPOT
104
+
105
+ feature_set: FeatureSet | None = None
106
+
107
+ require_probability: bool = False
108
+ keep_only_latest_per_pair: bool = False
109
+
110
+ def run(self, raw_data_view: RawDataView, context: dict[str, Any] | None = None) -> Signals:
111
+ """Execute complete detection pipeline.
112
+
113
+ Pipeline steps:
114
+ 1. preprocess: Extract features
115
+ 2. normalize: Ensure timezone-naive timestamps
116
+ 3. validate features: Check schema and duplicates
117
+ 4. detect: Generate signals
118
+ 5. validate signals: Check output quality
119
+ 6. (optional) keep latest: Filter to latest per pair
120
+
121
+ Args:
122
+ raw_data_view (RawDataView): View to raw market data.
123
+ context (dict[str, Any] | None): Additional context for detection.
124
+
125
+ Returns:
126
+ Signals: Detected signals.
127
+
128
+ Raises:
129
+ TypeError: If preprocess doesn't return pl.DataFrame.
130
+ ValueError: If features/signals fail validation.
131
+
132
+ Example:
133
+ ```python
134
+ from signalflow.core import RawData, RawDataView
135
+
136
+ # Create view
137
+ view = RawDataView(raw=raw_data)
138
+
139
+ # Run detection
140
+ signals = detector.run(view)
141
+
142
+ # With context
143
+ signals = detector.run(view, context={"threshold": 0.7})
144
+ ```
145
+
146
+ Note:
147
+ Can also be called directly: detector(raw_data_view).
148
+ All validation errors include helpful diagnostic information.
149
+ """
150
+ feats = self.preprocess(raw_data_view, context=context)
151
+ feats = self._normalize_index(feats)
152
+ self._validate_features(feats)
153
+
154
+ signals = self.detect(feats, context=context)
155
+ self._validate_signals(signals)
156
+
157
+ if self.keep_only_latest_per_pair:
158
+ signals = self._keep_only_latest(signals)
159
+
160
+ return signals
161
+
162
+ __call__ = run
163
+
164
+ def preprocess(self, raw_data_view: RawDataView, context: dict[str, Any] | None = None) -> pl.DataFrame:
165
+ """Extract features from raw data.
166
+
167
+ Default implementation delegates to FeatureSet. Override for custom
168
+ feature extraction logic.
169
+
170
+ Args:
171
+ raw_data_view (RawDataView): View to raw market data.
172
+ context (dict[str, Any] | None): Additional context.
173
+
174
+ Returns:
175
+ pl.DataFrame: Features with at minimum pair and timestamp columns.
176
+
177
+ Raises:
178
+ NotImplementedError: If feature_set is None and not overridden.
179
+ TypeError: If FeatureSet doesn't return pl.DataFrame.
180
+
181
+ Example:
182
+ ```python
183
+ # Default: uses FeatureSet
184
+ features = detector.preprocess(raw_data_view)
185
+
186
+ # Custom override
187
+ class CustomDetector(SignalDetector):
188
+ def preprocess(self, raw_data_view, context=None):
189
+ df = raw_data_view.to_polars("spot")
190
+ return df.with_columns([
191
+ pl.col("close").rolling_mean(10).alias("sma_10")
192
+ ])
193
+ ```
194
+ """
195
+ if self.feature_set is None:
196
+ raise NotImplementedError(
197
+ f"{self.__class__.__name__}.preprocess is not implemented and feature_set is None"
198
+ )
199
+ out = self.feature_set.extract(raw_data_view, context=context)
200
+ if not isinstance(out, pl.DataFrame):
201
+ raise TypeError(f"{self.__class__.__name__}.feature_set.extract must return pl.DataFrame, got {type(out)}")
202
+ return out
203
+
204
+ @abstractmethod
205
+ def detect(self, features: pl.DataFrame, context: dict[str, Any] | None = None) -> Signals:
206
+ """Generate signals from features.
207
+
208
+ Core detection logic - must be implemented by subclasses.
209
+
210
+ Args:
211
+ features (pl.DataFrame): Preprocessed features.
212
+ context (dict[str, Any] | None): Additional context.
213
+
214
+ Returns:
215
+ Signals: Detected signals with columns:
216
+ - pair (str): Trading pair
217
+ - timestamp (datetime): Signal timestamp (timezone-naive)
218
+ - signal_type (int): SignalType enum value
219
+ - signal (int | float): Signal value
220
+ - probability (float, optional): Signal probability
221
+
222
+ Example:
223
+ ```python
224
+ def detect(self, features, context=None):
225
+ # Simple threshold detector
226
+ signals = features.filter(
227
+ pl.col("rsi") > 70 # Overbought
228
+ ).with_columns([
229
+ pl.lit(SignalType.FALL.value).alias("signal_type"),
230
+ pl.lit(-1).alias("signal"),
231
+ pl.lit(0.8).alias("probability")
232
+ ]).select([
233
+ self.pair_col,
234
+ self.ts_col,
235
+ "signal_type",
236
+ "signal",
237
+ "probability"
238
+ ])
239
+
240
+ return Signals(signals)
241
+ ```
242
+
243
+ Note:
244
+ Must return Signals with at minimum: pair, timestamp, signal_type.
245
+ Timestamps must be timezone-naive.
246
+ No duplicate (pair, timestamp) combinations allowed.
247
+ """
248
+ raise NotImplementedError
249
+
250
+ def _normalize_index(self, df: pl.DataFrame) -> pl.DataFrame:
251
+ """Normalize timestamps to timezone-naive.
252
+
253
+ Args:
254
+ df (pl.DataFrame): Input DataFrame.
255
+
256
+ Returns:
257
+ pl.DataFrame: DataFrame with timezone-naive timestamps.
258
+
259
+ Raises:
260
+ TypeError: If df is not pl.DataFrame.
261
+ """
262
+ if not isinstance(df, pl.DataFrame):
263
+ raise TypeError(f"Expected pl.DataFrame, got {type(df)}")
264
+
265
+ if self.ts_col in df.columns:
266
+ ts_dtype = df.schema.get(self.ts_col)
267
+ if isinstance(ts_dtype, pl.Datetime) and ts_dtype.time_zone is not None:
268
+ df = df.with_columns(pl.col(self.ts_col).dt.replace_time_zone(None))
269
+ return df
270
+
271
+ def _validate_features(self, df: pl.DataFrame) -> None:
272
+ """Validate feature DataFrame.
273
+
274
+ Checks:
275
+ - Is pl.DataFrame
276
+ - Has required columns (pair, timestamp)
277
+ - Timestamps are timezone-naive
278
+ - No duplicate (pair, timestamp) combinations
279
+
280
+ Args:
281
+ df (pl.DataFrame): Features to validate.
282
+
283
+ Raises:
284
+ TypeError: If not pl.DataFrame.
285
+ ValueError: If validation fails.
286
+ """
287
+ if not isinstance(df, pl.DataFrame):
288
+ raise TypeError(f"preprocess must return polars.DataFrame, got {type(df)}")
289
+
290
+ missing = [c for c in (self.pair_col, self.ts_col) if c not in df.columns]
291
+ if missing:
292
+ raise ValueError(f"Features missing required columns: {missing}")
293
+
294
+ ts_dtype = df.schema.get(self.ts_col)
295
+ if isinstance(ts_dtype, pl.Datetime) and ts_dtype.time_zone is not None:
296
+ raise ValueError(
297
+ f"Features column '{self.ts_col}' must be timezone-naive, got tz={ts_dtype.time_zone}. "
298
+ f"Use .dt.replace_time_zone(None)."
299
+ )
300
+
301
+ dup = (
302
+ df.group_by([self.pair_col, self.ts_col])
303
+ .len()
304
+ .filter(pl.col("len") > 1)
305
+ )
306
+ if dup.height > 0:
307
+ raise ValueError(
308
+ "Features contain duplicate keys (pair,timestamp). "
309
+ f"Examples:\n{dup.select([self.pair_col, self.ts_col]).head(10)}"
310
+ )
311
+
312
+ def _validate_signals(self, signals: Signals) -> None:
313
+ """Validate signal output.
314
+
315
+ Checks:
316
+ - Is Signals instance with pl.DataFrame value
317
+ - Has required columns (pair, timestamp, signal_type)
318
+ - signal_type values are valid SignalType enums
319
+ - Timestamps are timezone-naive
320
+ - No duplicate (pair, timestamp) combinations
321
+ - (optional) Has probability column if required
322
+
323
+ Args:
324
+ signals (Signals): Signals to validate.
325
+
326
+ Raises:
327
+ TypeError: If not Signals or value not pl.DataFrame.
328
+ ValueError: If validation fails.
329
+ """
330
+ if not isinstance(signals, Signals):
331
+ raise TypeError(f"detect must return Signals, got {type(signals)}")
332
+
333
+ s = signals.value
334
+ if not isinstance(s, pl.DataFrame):
335
+ raise TypeError(f"Signals.value must be polars.DataFrame, got {type(s)}")
336
+
337
+ required = {self.pair_col, self.ts_col, "signal_type"}
338
+ missing = sorted(required - set(s.columns))
339
+ if missing:
340
+ raise ValueError(f"Signals missing required columns: {missing}")
341
+
342
+ allowed = {t.value for t in SignalType}
343
+ bad = (
344
+ s.select(pl.col("signal_type"))
345
+ .unique()
346
+ .filter(~pl.col("signal_type").is_in(list(allowed)))
347
+ )
348
+ if bad.height > 0:
349
+ raise ValueError(
350
+ f"Signals contain unknown signal_type values: {bad.get_column('signal_type').to_list()}"
351
+ )
352
+
353
+ if self.require_probability and "probability" not in s.columns:
354
+ raise ValueError("Signals must contain 'probability' column (require_probability=True)")
355
+
356
+ ts_dtype = s.schema.get(self.ts_col)
357
+ if isinstance(ts_dtype, pl.Datetime) and ts_dtype.time_zone is not None:
358
+ raise ValueError(f"Signals column '{self.ts_col}' must be timezone-naive, got tz={ts_dtype.time_zone}.")
359
+
360
+ # optional: hard guarantee no duplicates in signals
361
+ dup = (
362
+ s.group_by([self.pair_col, self.ts_col])
363
+ .len()
364
+ .filter(pl.col("len") > 1)
365
+ )
366
+ if dup.height > 0:
367
+ raise ValueError(
368
+ "Signals contain duplicate keys (pair,timestamp). "
369
+ f"Examples:\n{dup.select([self.pair_col, self.ts_col]).head(10)}"
370
+ )
371
+
372
+ def _keep_only_latest(self, signals: Signals) -> Signals:
373
+ """Keep only latest signal per pair.
374
+
375
+ Useful for strategies that only trade most recent signal.
376
+
377
+ Args:
378
+ signals (Signals): Input signals.
379
+
380
+ Returns:
381
+ Signals: Filtered signals with one per pair.
382
+ """
383
+ s = signals.value
384
+ out = (
385
+ s.sort([self.pair_col, self.ts_col])
386
+ .group_by(self.pair_col, maintain_order=True)
387
+ .tail(1)
388
+ .sort([self.pair_col, self.ts_col])
389
+ )
390
+ return Signals(out)
@@ -0,0 +1,105 @@
1
+ # IMPORTANT
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+
6
+ import polars as pl
7
+
8
+ from signalflow.core import RawDataType, Signals, SignalType, sf_component
9
+ from signalflow.detector import SignalDetector
10
+ from signalflow.feature import FeatureSet
11
+ from signalflow.feature.smoother import SmaExtractor
12
+
13
+
14
+ @dataclass
15
+ @sf_component(name="sma_cross")
16
+ class SmaCrossSignalDetector(SignalDetector):
17
+ """
18
+ SMA crossover signal detector.
19
+
20
+ Signal rules (per pair, per timestamp):
21
+ - RISE : fast crosses above slow (fast_t > slow_t) and (fast_{t-1} <= slow_{t-1})
22
+ - FALL : fast crosses below slow (fast_t < slow_t) and (fast_{t-1} >= slow_{t-1})
23
+ - NONE : otherwise
24
+
25
+ Output Signals columns:
26
+ - pair, timestamp, signal_type, signal
27
+ - signal: +1 for RISE, -1 for FALL, 0 for NONE
28
+ """
29
+
30
+ fast_period: int = 20
31
+ slow_period: int = 50
32
+ price_col: str = "close"
33
+
34
+ fast_col: str | None = None
35
+ slow_col: str | None = None
36
+
37
+ def __post_init__(self) -> None:
38
+ if self.fast_period <= 0 or self.slow_period <= 0:
39
+ raise ValueError("fast_period and slow_period must be > 0")
40
+ if self.fast_period >= self.slow_period:
41
+ raise ValueError(f"fast_period must be < slow_period, got {self.fast_period} >= {self.slow_period}")
42
+
43
+ self.fast_col = self.fast_col or f"sma_{self.fast_period}"
44
+ self.slow_col = self.slow_col or f"sma_{self.slow_period}"
45
+
46
+ self.feature_set = FeatureSet(
47
+ extractors=[
48
+ SmaExtractor(
49
+ offset_window=1,
50
+ sma_period=self.fast_period,
51
+ price_col=self.price_col,
52
+ out_col=self.fast_col,
53
+ use_resample=True,
54
+ raw_data_type=RawDataType.SPOT,
55
+ ),
56
+ SmaExtractor(
57
+ offset_window=1,
58
+ sma_period=self.slow_period,
59
+ price_col=self.price_col,
60
+ out_col=self.slow_col,
61
+ use_resample=True,
62
+ raw_data_type=RawDataType.SPOT,
63
+ ),
64
+ ]
65
+ )
66
+
67
+ def detect(self, features: pl.DataFrame, context: dict[str, Any] | None = None) -> Signals:
68
+ df = features.sort([self.pair_col, self.ts_col])
69
+
70
+ if self.fast_col not in df.columns or self.slow_col not in df.columns:
71
+ raise ValueError(
72
+ f"Expected columns '{self.fast_col}' and '{self.slow_col}' in features. "
73
+ f"Got: {df.columns}"
74
+ )
75
+
76
+ df = df.filter(pl.col(self.fast_col).is_not_null() & pl.col(self.slow_col).is_not_null())
77
+
78
+ fast = pl.col(self.fast_col)
79
+ slow = pl.col(self.slow_col)
80
+
81
+ fast_prev = fast.shift(1).over(self.pair_col)
82
+ slow_prev = slow.shift(1).over(self.pair_col)
83
+
84
+ cross_up = (fast > slow) & (fast_prev <= slow_prev)
85
+ cross_down = (fast < slow) & (fast_prev >= slow_prev)
86
+
87
+ out = (
88
+ df.select([self.pair_col, self.ts_col, self.fast_col, self.slow_col])
89
+ .with_columns(
90
+ pl.when(cross_up)
91
+ .then(pl.lit(SignalType.RISE.value))
92
+ .when(cross_down)
93
+ .then(pl.lit(SignalType.FALL.value))
94
+ .otherwise(pl.lit(SignalType.NONE.value))
95
+ .alias("signal_type")
96
+ )
97
+ .with_columns(
98
+ pl.when(pl.col("signal_type") == SignalType.RISE.value).then(pl.lit(1))
99
+ .when(pl.col("signal_type") == SignalType.FALL.value).then(pl.lit(-1))
100
+ .otherwise(pl.lit(0))
101
+ .alias("signal")
102
+ )
103
+ )
104
+
105
+ return Signals(out.select([self.pair_col, self.ts_col, "signal_type", "signal"]))
@@ -0,0 +1,16 @@
1
+ from signalflow.feature.feature_set import FeatureSet
2
+ from signalflow.feature.base import FeatureExtractor
3
+ import signalflow.feature.smoother as smoother
4
+ import signalflow.feature.oscillator as oscillator
5
+ import signalflow.feature.pandasta as pandasta
6
+ import signalflow.feature.adapter as adapter
7
+
8
+
9
+ __all__ = [
10
+ "FeatureSet",
11
+ "FeatureExtractor",
12
+ "adapter",
13
+ "pandasta",
14
+ "smoother",
15
+ "oscillator",
16
+ ]
@@ -0,0 +1,5 @@
1
+ from signalflow.feature.adapter.pandas_feature_extractor import PandasFeatureExtractor
2
+
3
+ __all__ = [
4
+ "PandasFeatureExtractor",
5
+ ]
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Callable
5
+
6
+ import polars as pl
7
+ import pandas as pd
8
+
9
+ from signalflow.feature.base import FeatureExtractor
10
+
11
+ PandasGroupFn = Callable[[pd.DataFrame, dict[str, Any] | None], pd.DataFrame | pd.Series]
12
+
13
+
14
+ @dataclass
15
+ class PandasFeatureExtractor(FeatureExtractor):
16
+ pandas_group_fn: PandasGroupFn | None = field(default=None, kw_only=True)
17
+
18
+ out_cols: list[str] | None = None
19
+ series_name: str = "feature"
20
+ rename_outputs: dict[str, str] = field(default_factory=dict)
21
+
22
+ def __post_init__(self) -> None:
23
+ super().__post_init__()
24
+ if self.pandas_group_fn is None or not callable(self.pandas_group_fn):
25
+ raise TypeError("pandas_group_fn must be provided and callable (keyword-only argument)")
26
+
27
+ def compute_group(self, group_df: pl.DataFrame, data_context: dict[str, Any] | None) -> pl.DataFrame:
28
+ pdf = group_df.to_pandas()
29
+ result = self.pandas_group_fn(pdf, data_context)
30
+
31
+ if isinstance(result, pd.Series):
32
+ if result.name is None:
33
+ result = result.rename(self.series_name)
34
+ result = result.to_frame()
35
+
36
+ if not isinstance(result, pd.DataFrame):
37
+ raise TypeError("pandas_group_fn must return pd.DataFrame or pd.Series")
38
+
39
+ if len(result) != len(pdf):
40
+ raise ValueError(f"pandas_group_fn must preserve row count: got {len(result)} != {len(pdf)}")
41
+
42
+ if self.rename_outputs:
43
+ result = result.rename(columns=self.rename_outputs)
44
+
45
+ if self.out_cols is not None:
46
+ missing = set(self.out_cols) - set(result.columns)
47
+ if missing:
48
+ raise ValueError(f"pandas_group_fn output missing columns: {sorted(missing)}")
49
+
50
+ out = group_df
51
+ for col in result.columns:
52
+ out = out.with_columns(pl.Series(col, result[col].to_numpy()))
53
+
54
+ return out