signalflow-trading 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. signalflow/__init__.py +21 -0
  2. signalflow/analytics/__init__.py +0 -0
  3. signalflow/core/__init__.py +46 -0
  4. signalflow/core/base_mixin.py +232 -0
  5. signalflow/core/containers/__init__.py +21 -0
  6. signalflow/core/containers/order.py +216 -0
  7. signalflow/core/containers/portfolio.py +211 -0
  8. signalflow/core/containers/position.py +296 -0
  9. signalflow/core/containers/raw_data.py +167 -0
  10. signalflow/core/containers/raw_data_view.py +169 -0
  11. signalflow/core/containers/signals.py +198 -0
  12. signalflow/core/containers/strategy_state.py +147 -0
  13. signalflow/core/containers/trade.py +112 -0
  14. signalflow/core/decorators.py +103 -0
  15. signalflow/core/enums.py +270 -0
  16. signalflow/core/registry.py +322 -0
  17. signalflow/core/rolling_aggregator.py +362 -0
  18. signalflow/core/signal_transforms/__init__.py +5 -0
  19. signalflow/core/signal_transforms/base_signal_transform.py +186 -0
  20. signalflow/data/__init__.py +11 -0
  21. signalflow/data/raw_data_factory.py +225 -0
  22. signalflow/data/raw_store/__init__.py +7 -0
  23. signalflow/data/raw_store/base.py +271 -0
  24. signalflow/data/raw_store/duckdb_stores.py +696 -0
  25. signalflow/data/source/__init__.py +10 -0
  26. signalflow/data/source/base.py +300 -0
  27. signalflow/data/source/binance.py +442 -0
  28. signalflow/data/strategy_store/__init__.py +8 -0
  29. signalflow/data/strategy_store/base.py +278 -0
  30. signalflow/data/strategy_store/duckdb.py +409 -0
  31. signalflow/data/strategy_store/schema.py +36 -0
  32. signalflow/detector/__init__.py +7 -0
  33. signalflow/detector/adapter/__init__.py +5 -0
  34. signalflow/detector/adapter/pandas_detector.py +46 -0
  35. signalflow/detector/base.py +390 -0
  36. signalflow/detector/sma_cross.py +105 -0
  37. signalflow/feature/__init__.py +16 -0
  38. signalflow/feature/adapter/__init__.py +5 -0
  39. signalflow/feature/adapter/pandas_feature_extractor.py +54 -0
  40. signalflow/feature/base.py +330 -0
  41. signalflow/feature/feature_set.py +286 -0
  42. signalflow/feature/oscillator/__init__.py +5 -0
  43. signalflow/feature/oscillator/rsi_extractor.py +42 -0
  44. signalflow/feature/pandasta/__init__.py +10 -0
  45. signalflow/feature/pandasta/pandas_ta_extractor.py +141 -0
  46. signalflow/feature/pandasta/top_pandasta_extractors.py +64 -0
  47. signalflow/feature/smoother/__init__.py +5 -0
  48. signalflow/feature/smoother/sma_extractor.py +46 -0
  49. signalflow/strategy/__init__.py +9 -0
  50. signalflow/strategy/broker/__init__.py +15 -0
  51. signalflow/strategy/broker/backtest.py +172 -0
  52. signalflow/strategy/broker/base.py +186 -0
  53. signalflow/strategy/broker/executor/__init__.py +9 -0
  54. signalflow/strategy/broker/executor/base.py +35 -0
  55. signalflow/strategy/broker/executor/binance_spot.py +12 -0
  56. signalflow/strategy/broker/executor/virtual_spot.py +81 -0
  57. signalflow/strategy/broker/realtime_spot.py +12 -0
  58. signalflow/strategy/component/__init__.py +9 -0
  59. signalflow/strategy/component/base.py +65 -0
  60. signalflow/strategy/component/entry/__init__.py +7 -0
  61. signalflow/strategy/component/entry/fixed_size.py +57 -0
  62. signalflow/strategy/component/entry/signal.py +127 -0
  63. signalflow/strategy/component/exit/__init__.py +5 -0
  64. signalflow/strategy/component/exit/time_based.py +47 -0
  65. signalflow/strategy/component/exit/tp_sl.py +80 -0
  66. signalflow/strategy/component/metric/__init__.py +8 -0
  67. signalflow/strategy/component/metric/main_metrics.py +181 -0
  68. signalflow/strategy/runner/__init__.py +8 -0
  69. signalflow/strategy/runner/backtest_runner.py +208 -0
  70. signalflow/strategy/runner/base.py +19 -0
  71. signalflow/strategy/runner/optimized_backtest_runner.py +178 -0
  72. signalflow/strategy/runner/realtime_runner.py +0 -0
  73. signalflow/target/__init__.py +14 -0
  74. signalflow/target/adapter/__init__.py +5 -0
  75. signalflow/target/adapter/pandas_labeler.py +45 -0
  76. signalflow/target/base.py +409 -0
  77. signalflow/target/fixed_horizon_labeler.py +93 -0
  78. signalflow/target/static_triple_barrier.py +162 -0
  79. signalflow/target/triple_barrier.py +188 -0
  80. signalflow/utils/__init__.py +7 -0
  81. signalflow/utils/import_utils.py +11 -0
  82. signalflow/utils/tune_utils.py +19 -0
  83. signalflow/validator/__init__.py +6 -0
  84. signalflow/validator/base.py +139 -0
  85. signalflow/validator/sklearn_validator.py +527 -0
  86. signalflow_trading-0.2.1.dist-info/METADATA +149 -0
  87. signalflow_trading-0.2.1.dist-info/RECORD +90 -0
  88. signalflow_trading-0.2.1.dist-info/WHEEL +5 -0
  89. signalflow_trading-0.2.1.dist-info/licenses/LICENSE +21 -0
  90. signalflow_trading-0.2.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,362 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Literal
5
+
6
+ import polars as pl
7
+ from signalflow.core.enums import RawDataType
8
+
9
+
10
+ @dataclass
11
+ class RollingAggregator:
12
+ """Offset (sliding) resampler for raw market data.
13
+
14
+ Computes rolling aggregates over a sliding window of bars for each trading pair.
15
+ For each row at time t, aggregates over the last `offset_window` rows: [t-(k-1), ..., t].
16
+
17
+ Key features:
18
+ - Length-preserving: len(output) == len(input)
19
+ - Per-pair processing: Each pair aggregated independently
20
+ - First (k-1) rows per pair → nulls (min_periods=k)
21
+ - (pair, timestamp) columns preserved
22
+
23
+ Use cases:
24
+ - Create higher timeframe features (e.g., 5m bars from 1m bars)
25
+ - Smooth noise with rolling aggregates
26
+ - Generate multi-timeframe features for models
27
+
28
+ Attributes:
29
+ offset_window (int): Number of bars in sliding window. Must be > 0.
30
+ ts_col (str): Timestamp column name. Default: "timestamp".
31
+ pair_col (str): Trading pair column name. Default: "pair".
32
+ mode (Literal["add", "replace"]): Output mode:
33
+ - "add": Add resampled columns with prefix
34
+ - "replace": Replace original OHLC columns
35
+ prefix (str | None): Prefix for output columns in "add" mode.
36
+ Default: "rs_{offset_window}m_"
37
+ raw_data_type (RawDataType): Type of raw data. Default: SPOT.
38
+ OFFSET_COL (str): Column name for offset tracking. Default: "resample_offset".
39
+
40
+ Example:
41
+ ```python
42
+ from signalflow.core import RollingAggregator
43
+ import polars as pl
44
+
45
+ # Create 5-minute bars from 1-minute bars
46
+ aggregator = RollingAggregator(
47
+ offset_window=5,
48
+ mode="add",
49
+ prefix="5m_"
50
+ )
51
+
52
+ # Resample data
53
+ df_resampled = aggregator.resample(spot_df)
54
+
55
+ # Result has both 1m and 5m data
56
+ print(df_resampled.columns)
57
+ # ['pair', 'timestamp', 'open', 'high', 'low', 'close', 'volume',
58
+ # '5m_open', '5m_high', '5m_low', '5m_close', '5m_volume']
59
+
60
+ # Replace mode - output only 5m bars
61
+ aggregator_replace = RollingAggregator(
62
+ offset_window=5,
63
+ mode="replace"
64
+ )
65
+ df_5m = aggregator_replace.resample(spot_df)
66
+
67
+ # Add offset column for tracking
68
+ df_with_offset = aggregator.add_offset_column(spot_df)
69
+ print(df_with_offset["resample_offset"]) # 0, 1, 2, 3, 4, 0, 1, ...
70
+ ```
71
+
72
+ Note:
73
+ Currently only supports SPOT data type (OHLCV).
74
+ First (k-1) rows per pair will have null values for resampled columns.
75
+ DataFrame must be sorted by (pair, timestamp) - automatic sorting applied.
76
+
77
+ See Also:
78
+ FeatureExtractor: For extracting features from resampled data.
79
+ """
80
+
81
+ offset_window: int = 1
82
+ ts_col: str = "timestamp"
83
+ pair_col: str = "pair"
84
+ mode: Literal["add", "replace"] = "replace"
85
+ prefix: str | None = None
86
+ raw_data_type: RawDataType = RawDataType.SPOT
87
+
88
+ OFFSET_COL: str = "resample_offset"
89
+
90
+ @property
91
+ def out_prefix(self) -> str:
92
+ """Get output prefix for resampled columns.
93
+
94
+ Returns:
95
+ str: Prefix for output columns. Uses custom prefix if provided,
96
+ otherwise defaults to "rs_{offset_window}m_".
97
+
98
+ Example:
99
+ ```python
100
+ # Default prefix
101
+ agg = RollingAggregator(offset_window=5)
102
+ assert agg.out_prefix == "rs_5m_"
103
+
104
+ # Custom prefix
105
+ agg = RollingAggregator(offset_window=5, prefix="5min_")
106
+ assert agg.out_prefix == "5min_"
107
+ ```
108
+ """
109
+ return self.prefix if self.prefix is not None else f"rs_{self.offset_window}m_"
110
+
111
+ def _validate_base(self, df: pl.DataFrame) -> None:
112
+ """Validate base requirements for DataFrame.
113
+
114
+ Args:
115
+ df (pl.DataFrame): DataFrame to validate.
116
+
117
+ Raises:
118
+ ValueError: If offset_window <= 0.
119
+ ValueError: If required columns (ts_col, pair_col) are missing.
120
+ """
121
+ if self.offset_window <= 0:
122
+ raise ValueError(f"offset_window must be > 0, got {self.offset_window}")
123
+ if self.ts_col not in df.columns:
124
+ raise ValueError(f"Missing '{self.ts_col}' column")
125
+ if self.pair_col not in df.columns:
126
+ raise ValueError(f"Missing '{self.pair_col}' column")
127
+
128
+ def add_offset_column(self, df: pl.DataFrame) -> pl.DataFrame:
129
+ """Add offset column for tracking position within window.
130
+
131
+ Computes: timestamp.minute() % offset_window
132
+
133
+ Useful for:
134
+ - Debugging resampling logic
135
+ - Identifying bar position within window (0, 1, 2, ..., k-1)
136
+ - Aligning multiple dataframes
137
+
138
+ Args:
139
+ df (pl.DataFrame): Input DataFrame with timestamp column.
140
+
141
+ Returns:
142
+ pl.DataFrame: DataFrame with added offset column.
143
+
144
+ Raises:
145
+ ValueError: If validation fails (missing columns, invalid window).
146
+
147
+ Example:
148
+ ```python
149
+ # Add offset for 5-minute windows
150
+ agg = RollingAggregator(offset_window=5)
151
+ df_with_offset = agg.add_offset_column(spot_df)
152
+
153
+ # Offset cycles: 0, 1, 2, 3, 4, 0, 1, 2, ...
154
+ print(df_with_offset["resample_offset"].to_list()[:10])
155
+ # [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]
156
+
157
+ # Filter by specific offset
158
+ df_offset_0 = df_with_offset.filter(
159
+ pl.col("resample_offset") == 0
160
+ )
161
+ ```
162
+ """
163
+ self._validate_base(df)
164
+
165
+ return df.with_columns(
166
+ (pl.col(self.ts_col).dt.minute() % pl.lit(self.offset_window)).cast(pl.Int64).alias(self.OFFSET_COL)
167
+ )
168
+
169
+ def get_last_offset(self, df: pl.DataFrame) -> int:
170
+ """Get offset value of last timestamp in DataFrame.
171
+
172
+ Useful for:
173
+ - Determining current position in resampling window
174
+ - Synchronizing multiple dataframes
175
+ - Tracking resampling state
176
+
177
+ Args:
178
+ df (pl.DataFrame): Input DataFrame with timestamp column.
179
+
180
+ Returns:
181
+ int: Offset value (0 to offset_window-1).
182
+
183
+ Raises:
184
+ ValueError: If DataFrame is empty or validation fails.
185
+
186
+ Example:
187
+ ```python
188
+ agg = RollingAggregator(offset_window=5)
189
+
190
+ # Check current offset
191
+ last_offset = agg.get_last_offset(spot_df)
192
+ print(f"Current offset: {last_offset}") # 0-4
193
+
194
+ # Wait for window completion
195
+ if last_offset == 4:
196
+ print("Window complete, ready to resample")
197
+ ```
198
+ """
199
+ self._validate_base(df)
200
+ if df.is_empty():
201
+ raise ValueError("Empty dataframe")
202
+
203
+ last_ts = df.select(pl.col(self.ts_col).max()).item()
204
+ return int(last_ts.minute % self.offset_window)
205
+
206
+ def _spot_validate(self, cols: list[str]) -> None:
207
+ """Validate SPOT data requirements.
208
+
209
+ Args:
210
+ cols (list[str]): Column names in DataFrame.
211
+
212
+ Raises:
213
+ ValueError: If required OHLC columns are missing.
214
+ """
215
+ required = {"open", "high", "low", "close"}
216
+ missing = required - set(cols)
217
+ if missing:
218
+ raise ValueError(f"spot resample requires columns {sorted(required)}; missing {sorted(missing)}")
219
+
220
+ def resample(self, df: pl.DataFrame) -> pl.DataFrame:
221
+ """Resample DataFrame using rolling window aggregation.
222
+
223
+ Aggregation rules for SPOT data:
224
+ - open: Value from (k-1) bars ago (shifted)
225
+ - high: Maximum over window
226
+ - low: Minimum over window
227
+ - close: Current value (no aggregation)
228
+ - volume: Sum over window (if present)
229
+ - trades: Sum over window (if present)
230
+
231
+ Processing:
232
+ 1. Sort by (pair, timestamp)
233
+ 2. Add offset column if needed
234
+ 3. Apply rolling aggregations per pair
235
+ 4. Return length-preserving result
236
+
237
+ Args:
238
+ df (pl.DataFrame): Input DataFrame with OHLCV data.
239
+
240
+ Returns:
241
+ pl.DataFrame: Resampled DataFrame. Length equals input length.
242
+
243
+ Raises:
244
+ NotImplementedError: If raw_data_type is not SPOT.
245
+ ValueError: If required columns missing or output length mismatch.
246
+
247
+ Example:
248
+ ```python
249
+ import polars as pl
250
+ from datetime import datetime, timedelta
251
+
252
+ # Create 1-minute bars
253
+ df = pl.DataFrame({
254
+ "pair": ["BTCUSDT"] * 10,
255
+ "timestamp": [
256
+ datetime(2024, 1, 1, 10, i)
257
+ for i in range(10)
258
+ ],
259
+ "open": [45000 + i*10 for i in range(10)],
260
+ "high": [45100 + i*10 for i in range(10)],
261
+ "low": [44900 + i*10 for i in range(10)],
262
+ "close": [45050 + i*10 for i in range(10)],
263
+ "volume": [100.0] * 10
264
+ })
265
+
266
+ # Create 5-minute bars (add mode)
267
+ agg = RollingAggregator(offset_window=5, mode="add")
268
+ df_resampled = agg.resample(df)
269
+
270
+ # First 4 rows have nulls for 5m columns
271
+ print(df_resampled.filter(pl.col("5m_open").is_null()).height) # 4
272
+
273
+ # From row 5 onwards, 5m data available
274
+ print(df_resampled[5:])
275
+ # 5m_open = open from 5 bars ago
276
+ # 5m_high = max(high) over last 5 bars
277
+ # 5m_low = min(low) over last 5 bars
278
+ # 5m_close = current close
279
+ # 5m_volume = sum(volume) over last 5 bars
280
+
281
+ # Replace mode - output only resampled columns
282
+ agg_replace = RollingAggregator(offset_window=5, mode="replace")
283
+ df_5m = agg_replace.resample(df)
284
+ print(df_5m.columns) # ['pair', 'timestamp', 'open', 'high', 'low', 'close', 'volume']
285
+ # But values are 5-minute aggregates
286
+ ```
287
+
288
+ Note:
289
+ First (k-1) rows per pair will have null values (min_periods=k).
290
+ Input DataFrame is automatically sorted by (pair, timestamp).
291
+ Volume and trades columns are optional but recommended.
292
+ """
293
+ if self.raw_data_type != RawDataType.SPOT:
294
+ raise NotImplementedError("Currently resample() implemented for data_type='spot' only")
295
+
296
+ self._validate_base(df)
297
+ self._spot_validate(df.columns)
298
+
299
+ df0 = df.sort([self.pair_col, self.ts_col])
300
+
301
+ if self.OFFSET_COL not in df0.columns:
302
+ df0 = self.add_offset_column(df0)
303
+
304
+ k = int(self.offset_window)
305
+ pfx = self.out_prefix
306
+ over = [self.pair_col]
307
+
308
+ rs_open = pl.col("open").shift(k - 1).over(over)
309
+ rs_high = pl.col("high").rolling_max(window_size=k, min_periods=k).over(over)
310
+ rs_low = pl.col("low").rolling_min(window_size=k, min_periods=k).over(over)
311
+ rs_close = pl.col("close")
312
+
313
+ has_volume = "volume" in df0.columns
314
+ has_trades = "trades" in df0.columns
315
+
316
+ if self.mode == "add":
317
+ exprs: list[pl.Expr] = [
318
+ rs_open.alias(f"{pfx}open"),
319
+ rs_high.alias(f"{pfx}high"),
320
+ rs_low.alias(f"{pfx}low"),
321
+ rs_close.alias(f"{pfx}close"),
322
+ ]
323
+ if has_volume:
324
+ exprs.append(
325
+ pl.col("volume")
326
+ .rolling_sum(window_size=k, min_periods=k)
327
+ .over(over)
328
+ .alias(f"{pfx}volume")
329
+ )
330
+ if has_trades:
331
+ exprs.append(
332
+ pl.col("trades")
333
+ .rolling_sum(window_size=k, min_periods=k)
334
+ .over(over)
335
+ .alias(f"{pfx}trades")
336
+ )
337
+ out = df0.with_columns(exprs)
338
+
339
+ elif self.mode == "replace":
340
+ exprs2: list[pl.Expr] = [
341
+ rs_open.alias("open"),
342
+ rs_high.alias("high"),
343
+ rs_low.alias("low"),
344
+ rs_close.alias("close"),
345
+ ]
346
+ if has_volume:
347
+ exprs2.append(
348
+ pl.col("volume").rolling_sum(window_size=k, min_periods=k).over(over).alias("volume")
349
+ )
350
+ if has_trades:
351
+ exprs2.append(
352
+ pl.col("trades").rolling_sum(window_size=k, min_periods=k).over(over).alias("trades")
353
+ )
354
+ out = df0.with_columns(exprs2)
355
+
356
+ else:
357
+ raise ValueError(f"Unknown mode: {self.mode}")
358
+
359
+ if out.height != df.height:
360
+ raise ValueError(f"resample(pl): len(out)={out.height} != len(in)={df.height}")
361
+
362
+ return out
@@ -0,0 +1,5 @@
1
+ from .base_signal_transform import SignalsTransform
2
+
3
+ __all__ = [
4
+ "SignalsTransform",
5
+ ]
@@ -0,0 +1,186 @@
1
+ from typing import Protocol
2
+ import polars as pl
3
+ from signalflow.core.enums import SfComponentType
4
+
5
+
6
+ class SignalsTransform(Protocol):
7
+ """Protocol for signal transformations.
8
+
9
+ Defines the interface for functions that transform signal DataFrames.
10
+ Transformations can filter, modify, or augment signals while preserving
11
+ the core schema (pair, timestamp, signal_type, signal, probability).
12
+
13
+ Protocol-based design allows:
14
+ - Any callable with matching signature
15
+ - Functional composition via Signals.pipe()
16
+ - Type checking without inheritance
17
+
18
+ Common use cases:
19
+ - Filter signals by probability threshold
20
+ - Normalize probability values
21
+ - Add metadata columns
22
+ - Remove duplicate signals
23
+ - Apply time-based filters (cooldown periods)
24
+
25
+ Attributes:
26
+ name (str): Descriptive name for the transformation.
27
+ component_type (SfComponentType): Always SIGNALS_TRANSFORM for registry.
28
+
29
+ Example:
30
+ ```python
31
+ import polars as pl
32
+ from signalflow.core import Signals, SignalsTransform
33
+
34
+ # Simple function-based transform
35
+ def filter_high_probability(df: pl.DataFrame) -> pl.DataFrame:
36
+ '''Keep only signals with probability > 0.7'''
37
+ return df.filter(pl.col("probability") > 0.7)
38
+
39
+ filter_high_probability.name = "filter_high_prob"
40
+ filter_high_probability.component_type = SfComponentType.SIGNALS_TRANSFORM
41
+
42
+ # Class-based transform
43
+ class NormalizeProbability:
44
+ name = "normalize_prob"
45
+ component_type = SfComponentType.SIGNALS_TRANSFORM
46
+
47
+ def __call__(self, df: pl.DataFrame) -> pl.DataFrame:
48
+ '''Normalize probabilities to [0, 1] range'''
49
+ return df.with_columns(
50
+ (pl.col("probability") / pl.col("probability").max())
51
+ .alias("probability")
52
+ )
53
+
54
+ # Use in signal pipeline
55
+ signals = detector.detect(data)
56
+
57
+ # Single transform
58
+ filtered = signals.apply(filter_high_probability)
59
+
60
+ # Chained transforms
61
+ processed = signals.pipe(
62
+ filter_high_probability,
63
+ NormalizeProbability(),
64
+ add_metadata_transform
65
+ )
66
+ ```
67
+
68
+ Example:
69
+ ```python
70
+ # Register transform in registry
71
+ from signalflow.core import sf_component
72
+
73
+ @sf_component(name="cooldown_filter")
74
+ class CooldownFilter:
75
+ component_type = SfComponentType.SIGNALS_TRANSFORM
76
+ name = "cooldown_filter"
77
+
78
+ def __init__(self, cooldown_minutes: int = 60):
79
+ self.cooldown_minutes = cooldown_minutes
80
+
81
+ def __call__(self, df: pl.DataFrame) -> pl.DataFrame:
82
+ '''Filter signals within cooldown period'''
83
+ return (
84
+ df.sort(["pair", "timestamp"])
85
+ .with_columns(
86
+ pl.col("timestamp")
87
+ .diff()
88
+ .over("pair")
89
+ .dt.total_minutes()
90
+ .alias("minutes_since_last")
91
+ )
92
+ .filter(
93
+ (pl.col("minutes_since_last").is_null()) |
94
+ (pl.col("minutes_since_last") >= self.cooldown_minutes)
95
+ )
96
+ .drop("minutes_since_last")
97
+ )
98
+
99
+ # Use registered transform
100
+ from signalflow.core.registry import default_registry
101
+
102
+ cooldown = default_registry.create(
103
+ SfComponentType.SIGNALS_TRANSFORM,
104
+ "cooldown_filter",
105
+ cooldown_minutes=120
106
+ )
107
+
108
+ filtered_signals = signals.apply(cooldown)
109
+ ```
110
+
111
+ Note:
112
+ Transformations should be pure functions (no side effects).
113
+ Input DataFrame schema should be preserved where possible.
114
+ Return DataFrame with same or compatible schema for chaining.
115
+
116
+ See Also:
117
+ Signals: Container class with apply() and pipe() methods.
118
+ sf_component: Decorator for registering transforms.
119
+ """
120
+
121
+ name: str
122
+ component_type: SfComponentType = SfComponentType.SIGNALS_TRANSFORM
123
+
124
+ def __call__(self, value: pl.DataFrame) -> pl.DataFrame:
125
+ """Apply transformation to signals DataFrame.
126
+
127
+ Core method that performs the actual transformation logic.
128
+ Must accept and return Polars DataFrame with signals schema.
129
+
130
+ Expected input schema:
131
+ - pair (str): Trading pair
132
+ - timestamp (datetime): Signal timestamp
133
+ - signal_type (int): SignalType enum value
134
+ - signal (int|float): Signal value
135
+ - probability (float): Signal probability (optional but common)
136
+
137
+ Args:
138
+ value (pl.DataFrame): Input signals DataFrame with standard schema.
139
+
140
+ Returns:
141
+ pl.DataFrame: Transformed signals DataFrame. Should maintain
142
+ compatible schema for chaining with other transforms.
143
+
144
+ Example:
145
+ ```python
146
+ # Function-based transform
147
+ def remove_none_signals(df: pl.DataFrame) -> pl.DataFrame:
148
+ from signalflow.core.enums import SignalType
149
+ return df.filter(
150
+ pl.col("signal_type") != SignalType.NONE.value
151
+ )
152
+
153
+ # Apply transform
154
+ filtered = signals.apply(remove_none_signals)
155
+
156
+ # Class-based transform with state
157
+ class ThresholdFilter:
158
+ def __init__(self, threshold: float = 0.5):
159
+ self.threshold = threshold
160
+
161
+ def __call__(self, df: pl.DataFrame) -> pl.DataFrame:
162
+ return df.filter(
163
+ pl.col("probability") >= self.threshold
164
+ )
165
+
166
+ # Use with different thresholds
167
+ filter_50 = ThresholdFilter(threshold=0.5)
168
+ filter_70 = ThresholdFilter(threshold=0.7)
169
+
170
+ signals_50 = signals.apply(filter_50)
171
+ signals_70 = signals.apply(filter_70)
172
+
173
+ # Combine multiple transforms
174
+ processed = signals.pipe(
175
+ remove_none_signals,
176
+ filter_50,
177
+ lambda df: df.sort(["pair", "timestamp"])
178
+ )
179
+ ```
180
+
181
+ Note:
182
+ Should be deterministic - same input produces same output.
183
+ Avoid modifying input DataFrame (return new DataFrame).
184
+ Consider performance for large datasets (vectorized operations).
185
+ """
186
+ ...
@@ -0,0 +1,11 @@
1
+ import signalflow.data.source as source
2
+ import signalflow.data.raw_store as raw_store
3
+ import signalflow.data.strategy_store as strategy_store
4
+ from signalflow.data.raw_data_factory import RawDataFactory
5
+
6
+ __all__ = [
7
+ "source",
8
+ "raw_store",
9
+ "strategy_store",
10
+ "RawDataFactory",
11
+ ]