signalflow-trading 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. signalflow/__init__.py +21 -0
  2. signalflow/analytics/__init__.py +0 -0
  3. signalflow/core/__init__.py +46 -0
  4. signalflow/core/base_mixin.py +232 -0
  5. signalflow/core/containers/__init__.py +21 -0
  6. signalflow/core/containers/order.py +216 -0
  7. signalflow/core/containers/portfolio.py +211 -0
  8. signalflow/core/containers/position.py +296 -0
  9. signalflow/core/containers/raw_data.py +167 -0
  10. signalflow/core/containers/raw_data_view.py +169 -0
  11. signalflow/core/containers/signals.py +198 -0
  12. signalflow/core/containers/strategy_state.py +147 -0
  13. signalflow/core/containers/trade.py +112 -0
  14. signalflow/core/decorators.py +103 -0
  15. signalflow/core/enums.py +270 -0
  16. signalflow/core/registry.py +322 -0
  17. signalflow/core/rolling_aggregator.py +362 -0
  18. signalflow/core/signal_transforms/__init__.py +5 -0
  19. signalflow/core/signal_transforms/base_signal_transform.py +186 -0
  20. signalflow/data/__init__.py +11 -0
  21. signalflow/data/raw_data_factory.py +225 -0
  22. signalflow/data/raw_store/__init__.py +7 -0
  23. signalflow/data/raw_store/base.py +271 -0
  24. signalflow/data/raw_store/duckdb_stores.py +696 -0
  25. signalflow/data/source/__init__.py +10 -0
  26. signalflow/data/source/base.py +300 -0
  27. signalflow/data/source/binance.py +442 -0
  28. signalflow/data/strategy_store/__init__.py +8 -0
  29. signalflow/data/strategy_store/base.py +278 -0
  30. signalflow/data/strategy_store/duckdb.py +409 -0
  31. signalflow/data/strategy_store/schema.py +36 -0
  32. signalflow/detector/__init__.py +7 -0
  33. signalflow/detector/adapter/__init__.py +5 -0
  34. signalflow/detector/adapter/pandas_detector.py +46 -0
  35. signalflow/detector/base.py +390 -0
  36. signalflow/detector/sma_cross.py +105 -0
  37. signalflow/feature/__init__.py +16 -0
  38. signalflow/feature/adapter/__init__.py +5 -0
  39. signalflow/feature/adapter/pandas_feature_extractor.py +54 -0
  40. signalflow/feature/base.py +330 -0
  41. signalflow/feature/feature_set.py +286 -0
  42. signalflow/feature/oscillator/__init__.py +5 -0
  43. signalflow/feature/oscillator/rsi_extractor.py +42 -0
  44. signalflow/feature/pandasta/__init__.py +10 -0
  45. signalflow/feature/pandasta/pandas_ta_extractor.py +141 -0
  46. signalflow/feature/pandasta/top_pandasta_extractors.py +64 -0
  47. signalflow/feature/smoother/__init__.py +5 -0
  48. signalflow/feature/smoother/sma_extractor.py +46 -0
  49. signalflow/strategy/__init__.py +9 -0
  50. signalflow/strategy/broker/__init__.py +15 -0
  51. signalflow/strategy/broker/backtest.py +172 -0
  52. signalflow/strategy/broker/base.py +186 -0
  53. signalflow/strategy/broker/executor/__init__.py +9 -0
  54. signalflow/strategy/broker/executor/base.py +35 -0
  55. signalflow/strategy/broker/executor/binance_spot.py +12 -0
  56. signalflow/strategy/broker/executor/virtual_spot.py +81 -0
  57. signalflow/strategy/broker/realtime_spot.py +12 -0
  58. signalflow/strategy/component/__init__.py +9 -0
  59. signalflow/strategy/component/base.py +65 -0
  60. signalflow/strategy/component/entry/__init__.py +7 -0
  61. signalflow/strategy/component/entry/fixed_size.py +57 -0
  62. signalflow/strategy/component/entry/signal.py +127 -0
  63. signalflow/strategy/component/exit/__init__.py +5 -0
  64. signalflow/strategy/component/exit/time_based.py +47 -0
  65. signalflow/strategy/component/exit/tp_sl.py +80 -0
  66. signalflow/strategy/component/metric/__init__.py +8 -0
  67. signalflow/strategy/component/metric/main_metrics.py +181 -0
  68. signalflow/strategy/runner/__init__.py +8 -0
  69. signalflow/strategy/runner/backtest_runner.py +208 -0
  70. signalflow/strategy/runner/base.py +19 -0
  71. signalflow/strategy/runner/optimized_backtest_runner.py +178 -0
  72. signalflow/strategy/runner/realtime_runner.py +0 -0
  73. signalflow/target/__init__.py +14 -0
  74. signalflow/target/adapter/__init__.py +5 -0
  75. signalflow/target/adapter/pandas_labeler.py +45 -0
  76. signalflow/target/base.py +409 -0
  77. signalflow/target/fixed_horizon_labeler.py +93 -0
  78. signalflow/target/static_triple_barrier.py +162 -0
  79. signalflow/target/triple_barrier.py +188 -0
  80. signalflow/utils/__init__.py +7 -0
  81. signalflow/utils/import_utils.py +11 -0
  82. signalflow/utils/tune_utils.py +19 -0
  83. signalflow/validator/__init__.py +6 -0
  84. signalflow/validator/base.py +139 -0
  85. signalflow/validator/sklearn_validator.py +527 -0
  86. signalflow_trading-0.2.1.dist-info/METADATA +149 -0
  87. signalflow_trading-0.2.1.dist-info/RECORD +90 -0
  88. signalflow_trading-0.2.1.dist-info/WHEEL +5 -0
  89. signalflow_trading-0.2.1.dist-info/licenses/LICENSE +21 -0
  90. signalflow_trading-0.2.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,330 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC
4
+ from dataclasses import dataclass
5
+ from typing import Any, Literal
6
+
7
+ import polars as pl
8
+
9
+ from signalflow.core import RawDataType, RollingAggregator, SfComponentType
10
+ from typing import ClassVar
11
+
12
+
13
+ @dataclass
14
+ class FeatureExtractor(ABC):
15
+ """Base class for Polars-first feature extraction.
16
+
17
+ Extracts technical indicators and derived features from raw OHLCV data
18
+ with optional sliding window resampling for multi-timeframe features.
19
+
20
+ Key features:
21
+ - Polars-native for performance
22
+ - Optional sliding window resampling (e.g., 5m bars from 1m bars)
23
+ - Per-pair, per-offset-window processing
24
+ - Length-preserving operations
25
+ - Automatic projection (keep only new features)
26
+
27
+ Processing pipeline:
28
+ 1. Sort by (pair, timestamp)
29
+ 2. Add resample_offset column
30
+ 3. (optional) Apply sliding resample
31
+ 4. (optional) Filter to last offset
32
+ 5. Group by (pair, resample_offset) and compute features
33
+ 6. Sort output
34
+ 7. Project columns (keep input or features only)
35
+
36
+ Attributes:
37
+ offset_window (int): Sliding window size in bars. Default: 1.
38
+ compute_last_offset (bool): Keep only last offset. Default: False.
39
+ pair_col (str): Trading pair column. Default: "pair".
40
+ ts_col (str): Timestamp column. Default: "timestamp".
41
+ offset_col (str): Offset tracking column. Default: "resample_offset".
42
+ use_resample (bool): Apply sliding resample. Default: False.
43
+ resample_mode (Literal["add", "replace"]): Resample mode. Default: "add".
44
+ resample_prefix (str | None): Prefix for resampled columns. Default: None.
45
+ raw_data_type (RawDataType): Type of raw data. Default: SPOT.
46
+ component_type (ClassVar[SfComponentType]): Always FEATURE_EXTRACTOR.
47
+ keep_input_columns (bool): Keep all input columns. Default: False.
48
+
49
+ Example:
50
+ ```python
51
+ from signalflow.feature import FeatureExtractor
52
+ import polars as pl
53
+
54
+ class RsiExtractor(FeatureExtractor):
55
+ '''RSI indicator extractor'''
56
+
57
+ def __init__(self, window: int = 14, column: str = "close"):
58
+ super().__init__()
59
+ self.window = window
60
+ self.column = column
61
+
62
+ def compute_group(self, group_df, data_context=None):
63
+ # Compute RSI per group
64
+ delta = group_df.select(pl.col(self.column).diff().alias("delta"))
65
+
66
+ gain = delta.select(
67
+ pl.when(pl.col("delta") > 0)
68
+ .then(pl.col("delta"))
69
+ .otherwise(0)
70
+ .alias("gain")
71
+ )
72
+
73
+ loss = delta.select(
74
+ pl.when(pl.col("delta") < 0)
75
+ .then(-pl.col("delta"))
76
+ .otherwise(0)
77
+ .alias("loss")
78
+ )
79
+
80
+ avg_gain = gain.select(
81
+ pl.col("gain").rolling_mean(self.window).alias("avg_gain")
82
+ )
83
+
84
+ avg_loss = loss.select(
85
+ pl.col("loss").rolling_mean(self.window).alias("avg_loss")
86
+ )
87
+
88
+ rs = avg_gain.select(
89
+ (pl.col("avg_gain") / pl.col("avg_loss")).alias("rs")
90
+ )
91
+
92
+ rsi = group_df.with_columns([
93
+ (100 - (100 / (1 + rs.get_column("rs")))).alias(f"rsi_{self.window}")
94
+ ])
95
+
96
+ return rsi
97
+
98
+ # Usage
99
+ extractor = RsiExtractor(window=14)
100
+ features = extractor.extract(ohlcv_df)
101
+ ```
102
+
103
+ Note:
104
+ compute_group() must preserve row count (length-preserving).
105
+ All timestamps must be timezone-naive.
106
+ For multi-timeframe features, use use_resample=True.
107
+
108
+ See Also:
109
+ RollingAggregator: Sliding window resampler.
110
+ FeatureSet: Orchestrates multiple extractors.
111
+ """
112
+
113
+ offset_window: int = 1
114
+ compute_last_offset: bool = False
115
+
116
+ pair_col: str = "pair"
117
+ ts_col: str = "timestamp"
118
+ offset_col: str = "resample_offset"
119
+
120
+ use_resample: bool = False
121
+ resample_mode: Literal["add", "replace"] = "add"
122
+ resample_prefix: str | None = None
123
+ raw_data_type: RawDataType = RawDataType.SPOT
124
+ component_type: ClassVar[SfComponentType] = SfComponentType.FEATURE_EXTRACTOR
125
+ keep_input_columns: bool = False
126
+
127
+ def __post_init__(self) -> None:
128
+ """Validate configuration after initialization.
129
+
130
+ Raises:
131
+ ValueError: If offset_window <= 0, invalid resample_mode, or wrong offset_col.
132
+ TypeError: If column names not strings.
133
+ """
134
+ if self.offset_window <= 0:
135
+ raise ValueError(f"offset_window must be > 0, got {self.offset_window}")
136
+
137
+ if self.resample_mode not in ("add", "replace"):
138
+ raise ValueError(f"Invalid resample_mode: {self.resample_mode}")
139
+
140
+ if self.offset_col != RollingAggregator.OFFSET_COL:
141
+ raise ValueError(
142
+ f"offset_col must be '{RollingAggregator.OFFSET_COL}', got '{self.offset_col}'"
143
+ )
144
+
145
+ if not isinstance(self.pair_col, str) or not isinstance(self.ts_col, str) or not isinstance(self.offset_col, str):
146
+ raise TypeError("pair_col/ts_col/offset_col must be str")
147
+
148
+ @property
149
+ def _resampler(self) -> RollingAggregator:
150
+ """Get configured RollingAggregator instance.
151
+
152
+ Returns:
153
+ RollingAggregator: Resampler with current configuration.
154
+ """
155
+ return RollingAggregator(
156
+ offset_window=self.offset_window,
157
+ ts_col=self.ts_col,
158
+ pair_col=self.pair_col,
159
+ mode=self.resample_mode,
160
+ prefix=self.resample_prefix,
161
+ raw_data_type=self.raw_data_type,
162
+ )
163
+
164
+ def extract(self, df: pl.DataFrame, data_context: dict[str, Any] | None = None) -> pl.DataFrame:
165
+ """Extract features from input DataFrame.
166
+
167
+ Main entry point - handles sorting, resampling, grouping, and projection.
168
+
169
+ Processing pipeline:
170
+ 1. Validate input (required columns)
171
+ 2. Sort by (pair, timestamp)
172
+ 3. Add resample_offset column if missing
173
+ 4. (optional) Apply sliding resample
174
+ 5. (optional) Filter to last offset
175
+ 6. Group by (pair, resample_offset) and compute features
176
+ 7. Sort output
177
+ 8. Project to output columns
178
+
179
+ Args:
180
+ df (pl.DataFrame): Input OHLCV data with pair and timestamp columns.
181
+ data_context (dict[str, Any] | None): Additional context for computation.
182
+
183
+ Returns:
184
+ pl.DataFrame: Features DataFrame with columns:
185
+ - pair, timestamp (always included)
186
+ - feature columns (from compute_group)
187
+
188
+ Raises:
189
+ TypeError: If df not pl.DataFrame or compute_group returns wrong type.
190
+ ValueError: If compute_group changes row count or columns missing.
191
+
192
+ Example:
193
+ ```python
194
+ # Basic extraction
195
+ features = extractor.extract(ohlcv_df)
196
+
197
+ # With resampling (5m from 1m)
198
+ extractor = RsiExtractor(
199
+ window=14,
200
+ offset_window=5,
201
+ use_resample=True
202
+ )
203
+ features = extractor.extract(ohlcv_df)
204
+
205
+ # Keep input columns
206
+ extractor.keep_input_columns = True
207
+ features_with_ohlcv = extractor.extract(ohlcv_df)
208
+ ```
209
+
210
+ Note:
211
+ Only accepts pl.DataFrame (Polars-first design).
212
+ Use PandasFeatureExtractor adapter for Pandas data.
213
+ """
214
+ if not isinstance(df, pl.DataFrame):
215
+ raise TypeError(
216
+ f"{self.__class__.__name__} is polars-first and accepts only pl.DataFrame. "
217
+ f"Got: {type(df)}. Use an adapter for other dataframe types."
218
+ )
219
+ self._validate_input(df)
220
+
221
+ df0 = df.sort([self.pair_col, self.ts_col])
222
+
223
+ if self.offset_col not in df0.columns:
224
+ df0 = self._resampler.add_offset_column(df0)
225
+
226
+ if self.use_resample:
227
+ df0 = self._resampler.resample(df0)
228
+
229
+ if self.compute_last_offset:
230
+ last_off = self._resampler.get_last_offset(df0)
231
+ df0 = df0.filter(pl.col(self.offset_col) == last_off)
232
+
233
+ prepared_cols = set(df0.columns)
234
+ inferred_features: set[str] = set()
235
+
236
+ def _wrapped(g: pl.DataFrame) -> pl.DataFrame:
237
+ nonlocal inferred_features
238
+
239
+ in_cols = set(g.columns)
240
+ out = self.compute_group(g, data_context=data_context)
241
+
242
+ if not isinstance(out, pl.DataFrame):
243
+ raise TypeError(f"{self.__class__.__name__}.compute_pl_group must return pl.DataFrame")
244
+
245
+ if out.height != g.height:
246
+ raise ValueError(
247
+ f"{self.__class__.__name__}: len(output_group)={out.height} != len(input_group)={g.height}"
248
+ )
249
+
250
+ if not inferred_features:
251
+ inferred_features = set(out.columns) - in_cols
252
+
253
+ return out
254
+
255
+ out = (
256
+ df0.group_by(self.pair_col, self.offset_col, maintain_order=True)
257
+ .map_groups(_wrapped)
258
+ .sort([self.pair_col, self.ts_col])
259
+ )
260
+
261
+ if self.keep_input_columns:
262
+ return out
263
+
264
+ feature_cols = sorted(set(out.columns) - prepared_cols)
265
+ keep_cols = [self.pair_col, self.ts_col] + feature_cols
266
+
267
+ missing = [c for c in keep_cols if c not in out.columns]
268
+ if missing:
269
+ raise ValueError(f"Projection error, missing columns: {missing}")
270
+
271
+ return out.select(keep_cols)
272
+
273
+ def compute_group(
274
+ self,
275
+ group_df: pl.DataFrame,
276
+ data_context: dict[str, Any] | None,
277
+ ) -> pl.DataFrame:
278
+ """Compute features for single (pair, resample_offset) group.
279
+
280
+ Core feature extraction logic - must be implemented by subclasses.
281
+
282
+ CRITICAL: Must preserve row count (len(output) == len(input)).
283
+ Should preserve ordering within group.
284
+
285
+ Args:
286
+ group_df (pl.DataFrame): Single group's data, sorted by timestamp.
287
+ data_context (dict[str, Any] | None): Additional context.
288
+
289
+ Returns:
290
+ pl.DataFrame: Same length as input with added feature columns.
291
+
292
+ Example:
293
+ ```python
294
+ def compute_group(self, group_df, data_context=None):
295
+ # Simple moving average
296
+ return group_df.with_columns([
297
+ pl.col("close")
298
+ .rolling_mean(self.window)
299
+ .alias(f"sma_{self.window}")
300
+ ])
301
+
302
+ # Multiple features
303
+ def compute_group(self, group_df, data_context=None):
304
+ return group_df.with_columns([
305
+ pl.col("close").rolling_mean(10).alias("sma_10"),
306
+ pl.col("close").rolling_mean(20).alias("sma_20"),
307
+ pl.col("high").rolling_max(14).alias("high_14"),
308
+ pl.col("low").rolling_min(14).alias("low_14")
309
+ ])
310
+ ```
311
+
312
+ Note:
313
+ Output must have same height as input (length-preserving).
314
+ Use rolling operations for windowed features.
315
+ First N-1 bars may have null values for N-period indicators.
316
+ """
317
+ raise NotImplementedError
318
+
319
+ def _validate_input(self, df: pl.DataFrame) -> None:
320
+ """Validate input DataFrame has required columns.
321
+
322
+ Args:
323
+ df (pl.DataFrame): Input to validate.
324
+
325
+ Raises:
326
+ ValueError: If required columns missing.
327
+ """
328
+ missing = [c for c in (self.pair_col, self.ts_col) if c not in df.columns]
329
+ if missing:
330
+ raise ValueError(f"Missing required columns: {missing}")
@@ -0,0 +1,286 @@
1
+ # IMPORTANT
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass
5
+ from typing import Any
6
+
7
+ import polars as pl
8
+
9
+ from signalflow.feature.base import FeatureExtractor
10
+ from signalflow.core import RawDataView, RawDataType, DataFrameType
11
+
12
+
13
+ @dataclass
14
+ class FeatureSet:
15
+ """Polars-first orchestrator for multiple feature extractors.
16
+
17
+ Combines independent feature extractors via outer join on (pair, timestamp).
18
+ Each extractor fetches its required data, computes features, and results
19
+ are merged into single DataFrame.
20
+
21
+ Key features:
22
+ - Automatic data fetching per extractor
23
+ - Timezone normalization (all → naive)
24
+ - Outer join on (pair, timestamp) for alignment
25
+ - Duplicate feature column detection
26
+ - Consistent index columns across extractors
27
+
28
+ Processing flow:
29
+ For each extractor:
30
+ 1. Fetch appropriate raw data as Polars
31
+ 2. Run extractor.extract()
32
+ 3. Normalize timestamps to timezone-naive
33
+ 4. Validate index columns present
34
+ Then:
35
+ 5. Outer join all results on (pair, timestamp)
36
+
37
+ Attributes:
38
+ extractors (list[FeatureExtractor]): Feature extractors to orchestrate.
39
+ parallel (bool): Parallel execution flag (not yet implemented). Default: False.
40
+ pair_col (str): Trading pair column name. Default: "pair".
41
+ ts_col (str): Timestamp column name. Default: "timestamp".
42
+
43
+ Example:
44
+ ```python
45
+ from signalflow.feature import FeatureSet, SmaExtractor, RsiExtractor
46
+
47
+ # Create feature set
48
+ feature_set = FeatureSet([
49
+ SmaExtractor(window=10, column="close"),
50
+ SmaExtractor(window=20, column="close"),
51
+ RsiExtractor(window=14, column="close")
52
+ ])
53
+
54
+ # Extract all features at once
55
+ from signalflow.core import RawDataView
56
+ view = RawDataView(raw=raw_data)
57
+ features = feature_set.extract(view)
58
+
59
+ # Result has: pair, timestamp, sma_10, sma_20, rsi_14
60
+ print(features.columns)
61
+ # ['pair', 'timestamp', 'sma_10', 'sma_20', 'rsi_14']
62
+ ```
63
+
64
+ Example:
65
+ ```python
66
+ # With multi-timeframe features
67
+ feature_set = FeatureSet([
68
+ # 1-minute features
69
+ SmaExtractor(window=10, offset_window=1),
70
+ # 5-minute features
71
+ SmaExtractor(
72
+ window=10,
73
+ offset_window=5,
74
+ use_resample=True,
75
+ resample_prefix="5m_"
76
+ )
77
+ ])
78
+
79
+ features = feature_set.extract(view)
80
+ # Has both 1m and 5m features aligned
81
+ ```
82
+
83
+ Note:
84
+ All extractors must use same pair_col and ts_col.
85
+ Feature column names must be unique across extractors.
86
+ Timestamps automatically normalized to timezone-naive.
87
+
88
+ See Also:
89
+ FeatureExtractor: Base class for individual extractors.
90
+ RawDataView: Provides data in required format.
91
+ """
92
+
93
+ extractors: list[FeatureExtractor]
94
+ parallel: bool = False
95
+
96
+ pair_col: str = "pair"
97
+ ts_col: str = "timestamp"
98
+
99
+ def __post_init__(self) -> None:
100
+ """Validate extractors configuration.
101
+
102
+ Checks:
103
+ - At least one extractor provided
104
+ - All extractors use same pair_col
105
+ - All extractors use same ts_col
106
+
107
+ Raises:
108
+ ValueError: If validation fails.
109
+ """
110
+ if not self.extractors:
111
+ raise ValueError("At least one extractor must be provided")
112
+
113
+ for ex in self.extractors:
114
+ if getattr(ex, "pair_col", self.pair_col) != self.pair_col:
115
+ raise ValueError(
116
+ f"All extractors must use pair_col='{self.pair_col}'. "
117
+ f"{ex.__class__.__name__} uses '{getattr(ex, 'pair_col', None)}'"
118
+ )
119
+ if getattr(ex, "ts_col", self.ts_col) != self.ts_col:
120
+ raise ValueError(
121
+ f"All extractors must use ts_col='{self.ts_col}'. "
122
+ f"{ex.__class__.__name__} uses '{getattr(ex, 'ts_col', None)}'"
123
+ )
124
+
125
+ def extract(self, raw_data: RawDataView, context: dict[str, Any] | None = None) -> pl.DataFrame:
126
+ """Extract and combine features from all extractors.
127
+
128
+ Main entry point - orchestrates extraction and merging.
129
+
130
+ Processing:
131
+ 1. For each extractor:
132
+ - Fetch appropriate data format
133
+ - Run extraction
134
+ - Normalize timestamps
135
+ - Validate output
136
+ 2. Outer join all results on (pair, timestamp)
137
+ 3. Detect duplicate feature columns
138
+
139
+ Args:
140
+ raw_data (RawDataView): View to raw market data.
141
+ context (dict[str, Any] | None): Additional context passed to extractors.
142
+
143
+ Returns:
144
+ pl.DataFrame: Combined features with columns:
145
+ - pair, timestamp (index)
146
+ - feature columns from all extractors
147
+
148
+ Raises:
149
+ ValueError: If no extractors or duplicate feature columns.
150
+ TypeError: If extractor doesn't return pl.DataFrame.
151
+
152
+ Example:
153
+ ```python
154
+ from signalflow.core import RawData, RawDataView
155
+
156
+ # Create view
157
+ view = RawDataView(raw=raw_data)
158
+
159
+ # Extract features
160
+ features = feature_set.extract(view)
161
+
162
+ # Check result
163
+ print(f"Features: {features.columns}")
164
+ print(f"Shape: {features.shape}")
165
+
166
+ # With context
167
+ features = feature_set.extract(
168
+ view,
169
+ context={"lookback_bars": 100}
170
+ )
171
+ ```
172
+
173
+ Note:
174
+ Outer join means all (pair, timestamp) combinations preserved.
175
+ Missing features filled with null for non-matching timestamps.
176
+ """
177
+ feature_dfs: list[pl.DataFrame] = []
178
+
179
+ for extractor in self.extractors:
180
+ input_df = self._get_input_df(raw_data, extractor)
181
+
182
+ result_df = extractor.extract(input_df, data_context=context)
183
+ if not isinstance(result_df, pl.DataFrame):
184
+ raise TypeError(
185
+ f"{extractor.__class__.__name__}.extract must return pl.DataFrame, got {type(result_df)}"
186
+ )
187
+
188
+ result_df = self._normalize_index(result_df)
189
+
190
+ if self.pair_col not in result_df.columns or self.ts_col not in result_df.columns:
191
+ raise ValueError(
192
+ f"{extractor.__class__.__name__} returned no index columns "
193
+ f"('{self.pair_col}', '{self.ts_col}'). "
194
+ f"FeatureSet requires index columns to combine features."
195
+ )
196
+
197
+ feature_dfs.append(result_df)
198
+
199
+ return self._combine_features(feature_dfs)
200
+
201
+ def _get_input_df(self, raw_data: RawDataView, extractor: FeatureExtractor) -> pl.DataFrame:
202
+ """Fetch input data for extractor in Polars format.
203
+
204
+ Determines required data type from extractor.raw_data_type and
205
+ fetches as Polars DataFrame (canonical format).
206
+
207
+ Args:
208
+ raw_data (RawDataView): Data view.
209
+ extractor (FeatureExtractor): Extractor needing data.
210
+
211
+ Returns:
212
+ pl.DataFrame: Raw data in Polars format.
213
+
214
+ Note:
215
+ Always returns Polars (Polars-first design).
216
+ Falls back to string "polars" for backward compatibility.
217
+ """
218
+ raw_data_type = getattr(extractor, "raw_data_type", RawDataType.SPOT)
219
+
220
+ try:
221
+ return raw_data.get_data(raw_data_type, DataFrameType.POLARS)
222
+ except TypeError:
223
+ return raw_data.get_data(raw_data_type, "polars")
224
+
225
+ def _normalize_index(self, df: pl.DataFrame) -> pl.DataFrame:
226
+ """Normalize timestamp to timezone-naive.
227
+
228
+ Ensures consistent timezone handling across all extractors.
229
+
230
+ Args:
231
+ df (pl.DataFrame): DataFrame to normalize.
232
+
233
+ Returns:
234
+ pl.DataFrame: DataFrame with timezone-naive timestamps.
235
+ """
236
+ if self.ts_col in df.columns:
237
+ ts_dtype = df.schema.get(self.ts_col)
238
+ if isinstance(ts_dtype, pl.Datetime) and ts_dtype.time_zone is not None:
239
+ df = df.with_columns(pl.col(self.ts_col).dt.replace_time_zone(None))
240
+ return df
241
+
242
+ def _combine_features(self, feature_dfs: list[pl.DataFrame]) -> pl.DataFrame:
243
+ """Combine feature DataFrames via outer join.
244
+
245
+ Merges all feature DataFrames on (pair, timestamp) index.
246
+ Detects and rejects duplicate feature column names.
247
+
248
+ Args:
249
+ feature_dfs (list[pl.DataFrame]): Feature DataFrames to combine.
250
+
251
+ Returns:
252
+ pl.DataFrame: Combined features with outer join semantics.
253
+
254
+ Raises:
255
+ ValueError: If no DataFrames or duplicate feature columns found.
256
+
257
+ Example:
258
+ ```python
259
+ # Internal usage
260
+ df1 = pl.DataFrame({"pair": ["BTC"], "timestamp": [t1], "sma_10": [45000]})
261
+ df2 = pl.DataFrame({"pair": ["BTC"], "timestamp": [t1], "rsi_14": [50]})
262
+ combined = self._combine_features([df1, df2])
263
+ # Result: pair, timestamp, sma_10, rsi_14
264
+ ```
265
+
266
+ Note:
267
+ Outer join preserves all (pair, timestamp) from all extractors.
268
+ Duplicate columns trigger error - use unique prefixes.
269
+ """
270
+ if not feature_dfs:
271
+ raise ValueError("No feature DataFrames to combine")
272
+
273
+ combined = feature_dfs[0]
274
+
275
+ for right in feature_dfs[1:]:
276
+ right_feature_cols = [c for c in right.columns if c not in (self.pair_col, self.ts_col)]
277
+ dup = set(right_feature_cols).intersection(set(combined.columns))
278
+ if dup:
279
+ raise ValueError(
280
+ f"Duplicate feature columns during FeatureSet combine: {sorted(dup)}. "
281
+ f"Rename features or set unique prefixes."
282
+ )
283
+
284
+ combined = combined.join(right, on=[self.pair_col, self.ts_col], how="outer", coalesce=True)
285
+
286
+ return combined
@@ -0,0 +1,5 @@
1
+ from signalflow.feature.oscillator.rsi_extractor import RsiExtractor
2
+
3
+ __all__ = [
4
+ "RsiExtractor"
5
+ ]
@@ -0,0 +1,42 @@
1
+ # IMPORTANT
2
+
3
+ from dataclasses import dataclass
4
+ import polars as pl
5
+ from signalflow.feature.base import FeatureExtractor
6
+ from signalflow.core import sf_component
7
+
8
+
9
+ @dataclass
10
+ @sf_component(name="rsi")
11
+ class RsiExtractor(FeatureExtractor):
12
+ rsi_period: int = 14
13
+ price_col: str = "close"
14
+ out_col: str = "rsi"
15
+ use_resample:bool = True
16
+
17
+ def compute_group(self, group_df: pl.DataFrame, data_context: dict | None) -> pl.DataFrame:
18
+ price = pl.col(self.price_col)
19
+ delta = price.diff()
20
+
21
+ gain = delta.clip(lower_bound=0.0)
22
+ loss = (-delta).clip(lower_bound=0.0)
23
+
24
+ avg_gain = gain.rolling_mean(
25
+ window_size=self.rsi_period,
26
+ min_samples=self.rsi_period,
27
+ )
28
+ avg_loss = loss.rolling_mean(
29
+ window_size=self.rsi_period,
30
+ min_samples=self.rsi_period,
31
+ )
32
+
33
+
34
+ rs = avg_gain / avg_loss
35
+
36
+ rsi = (
37
+ pl.when((avg_loss == 0) & (avg_gain == 0)).then(50.0)
38
+ .when(avg_loss == 0).then(100.0)
39
+ .otherwise(100.0 - (100.0 / (1.0 + rs)))
40
+ )
41
+
42
+ return group_df.with_columns(rsi.alias(self.out_col))
@@ -0,0 +1,10 @@
1
+ from signalflow.feature.pandasta.pandas_ta_extractor import PandasTaExtractor
2
+ from signalflow.feature.pandasta.top_pandasta_extractors import PandasTaRsiExtractor, PandasTaBbandsExtractor, PandasTaMacdExtractor, PandasTaAtrExtractor
3
+
4
+ __all__ = [
5
+ "PandasTaExtractor",
6
+ "PandasTaRsiExtractor",
7
+ "PandasTaBbandsExtractor",
8
+ "PandasTaMacdExtractor",
9
+ "PandasTaAtrExtractor",
10
+ ]