signalflow-trading 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- signalflow/__init__.py +21 -0
- signalflow/analytics/__init__.py +0 -0
- signalflow/core/__init__.py +46 -0
- signalflow/core/base_mixin.py +232 -0
- signalflow/core/containers/__init__.py +21 -0
- signalflow/core/containers/order.py +216 -0
- signalflow/core/containers/portfolio.py +211 -0
- signalflow/core/containers/position.py +296 -0
- signalflow/core/containers/raw_data.py +167 -0
- signalflow/core/containers/raw_data_view.py +169 -0
- signalflow/core/containers/signals.py +198 -0
- signalflow/core/containers/strategy_state.py +147 -0
- signalflow/core/containers/trade.py +112 -0
- signalflow/core/decorators.py +103 -0
- signalflow/core/enums.py +270 -0
- signalflow/core/registry.py +322 -0
- signalflow/core/rolling_aggregator.py +362 -0
- signalflow/core/signal_transforms/__init__.py +5 -0
- signalflow/core/signal_transforms/base_signal_transform.py +186 -0
- signalflow/data/__init__.py +11 -0
- signalflow/data/raw_data_factory.py +225 -0
- signalflow/data/raw_store/__init__.py +7 -0
- signalflow/data/raw_store/base.py +271 -0
- signalflow/data/raw_store/duckdb_stores.py +696 -0
- signalflow/data/source/__init__.py +10 -0
- signalflow/data/source/base.py +300 -0
- signalflow/data/source/binance.py +442 -0
- signalflow/data/strategy_store/__init__.py +8 -0
- signalflow/data/strategy_store/base.py +278 -0
- signalflow/data/strategy_store/duckdb.py +409 -0
- signalflow/data/strategy_store/schema.py +36 -0
- signalflow/detector/__init__.py +7 -0
- signalflow/detector/adapter/__init__.py +5 -0
- signalflow/detector/adapter/pandas_detector.py +46 -0
- signalflow/detector/base.py +390 -0
- signalflow/detector/sma_cross.py +105 -0
- signalflow/feature/__init__.py +16 -0
- signalflow/feature/adapter/__init__.py +5 -0
- signalflow/feature/adapter/pandas_feature_extractor.py +54 -0
- signalflow/feature/base.py +330 -0
- signalflow/feature/feature_set.py +286 -0
- signalflow/feature/oscillator/__init__.py +5 -0
- signalflow/feature/oscillator/rsi_extractor.py +42 -0
- signalflow/feature/pandasta/__init__.py +10 -0
- signalflow/feature/pandasta/pandas_ta_extractor.py +141 -0
- signalflow/feature/pandasta/top_pandasta_extractors.py +64 -0
- signalflow/feature/smoother/__init__.py +5 -0
- signalflow/feature/smoother/sma_extractor.py +46 -0
- signalflow/strategy/__init__.py +9 -0
- signalflow/strategy/broker/__init__.py +15 -0
- signalflow/strategy/broker/backtest.py +172 -0
- signalflow/strategy/broker/base.py +186 -0
- signalflow/strategy/broker/executor/__init__.py +9 -0
- signalflow/strategy/broker/executor/base.py +35 -0
- signalflow/strategy/broker/executor/binance_spot.py +12 -0
- signalflow/strategy/broker/executor/virtual_spot.py +81 -0
- signalflow/strategy/broker/realtime_spot.py +12 -0
- signalflow/strategy/component/__init__.py +9 -0
- signalflow/strategy/component/base.py +65 -0
- signalflow/strategy/component/entry/__init__.py +7 -0
- signalflow/strategy/component/entry/fixed_size.py +57 -0
- signalflow/strategy/component/entry/signal.py +127 -0
- signalflow/strategy/component/exit/__init__.py +5 -0
- signalflow/strategy/component/exit/time_based.py +47 -0
- signalflow/strategy/component/exit/tp_sl.py +80 -0
- signalflow/strategy/component/metric/__init__.py +8 -0
- signalflow/strategy/component/metric/main_metrics.py +181 -0
- signalflow/strategy/runner/__init__.py +8 -0
- signalflow/strategy/runner/backtest_runner.py +208 -0
- signalflow/strategy/runner/base.py +19 -0
- signalflow/strategy/runner/optimized_backtest_runner.py +178 -0
- signalflow/strategy/runner/realtime_runner.py +0 -0
- signalflow/target/__init__.py +14 -0
- signalflow/target/adapter/__init__.py +5 -0
- signalflow/target/adapter/pandas_labeler.py +45 -0
- signalflow/target/base.py +409 -0
- signalflow/target/fixed_horizon_labeler.py +93 -0
- signalflow/target/static_triple_barrier.py +162 -0
- signalflow/target/triple_barrier.py +188 -0
- signalflow/utils/__init__.py +7 -0
- signalflow/utils/import_utils.py +11 -0
- signalflow/utils/tune_utils.py +19 -0
- signalflow/validator/__init__.py +6 -0
- signalflow/validator/base.py +139 -0
- signalflow/validator/sklearn_validator.py +527 -0
- signalflow_trading-0.2.1.dist-info/METADATA +149 -0
- signalflow_trading-0.2.1.dist-info/RECORD +90 -0
- signalflow_trading-0.2.1.dist-info/WHEEL +5 -0
- signalflow_trading-0.2.1.dist-info/licenses/LICENSE +21 -0
- signalflow_trading-0.2.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import polars as pl
|
|
5
|
+
|
|
6
|
+
from signalflow.core import RawData
|
|
7
|
+
from signalflow.data.raw_store import DuckDbSpotStore
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class RawDataFactory:
|
|
11
|
+
"""Factory for creating RawData instances from various sources.
|
|
12
|
+
|
|
13
|
+
Provides static methods to construct RawData objects from different
|
|
14
|
+
storage backends (DuckDB, Parquet, etc.) with proper validation
|
|
15
|
+
and schema normalization.
|
|
16
|
+
|
|
17
|
+
Key features:
|
|
18
|
+
- Automatic schema validation
|
|
19
|
+
- Duplicate detection
|
|
20
|
+
- Timezone normalization
|
|
21
|
+
- Column cleanup (remove unnecessary columns)
|
|
22
|
+
- Proper sorting by (pair, timestamp)
|
|
23
|
+
|
|
24
|
+
Example:
|
|
25
|
+
```python
|
|
26
|
+
from signalflow.data import RawDataFactory
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from datetime import datetime
|
|
29
|
+
|
|
30
|
+
# Load spot data from DuckDB
|
|
31
|
+
raw_data = RawDataFactory.from_duckdb_spot_store(
|
|
32
|
+
spot_store_path=Path("data/binance_spot.duckdb"),
|
|
33
|
+
pairs=["BTCUSDT", "ETHUSDT"],
|
|
34
|
+
start=datetime(2024, 1, 1),
|
|
35
|
+
end=datetime(2024, 12, 31),
|
|
36
|
+
data_types=["spot"]
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Access loaded data
|
|
40
|
+
spot_df = raw_data["spot"]
|
|
41
|
+
print(f"Loaded {len(spot_df)} bars")
|
|
42
|
+
print(f"Pairs: {raw_data.pairs}")
|
|
43
|
+
print(f"Date range: {raw_data.datetime_start} to {raw_data.datetime_end}")
|
|
44
|
+
|
|
45
|
+
# Use in detector
|
|
46
|
+
from signalflow.detector import SmaCrossSignalDetector
|
|
47
|
+
|
|
48
|
+
detector = SmaCrossSignalDetector(fast_window=10, slow_window=20)
|
|
49
|
+
signals = detector.detect(raw_data)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
See Also:
|
|
53
|
+
RawData: Immutable container for raw market data.
|
|
54
|
+
DuckDbSpotStore: DuckDB storage backend for spot data.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def from_duckdb_spot_store(
|
|
59
|
+
spot_store_path: Path,
|
|
60
|
+
pairs: list[str],
|
|
61
|
+
start: datetime,
|
|
62
|
+
end: datetime,
|
|
63
|
+
data_types: list[str] | None = None,
|
|
64
|
+
) -> RawData:
|
|
65
|
+
"""Create RawData from DuckDB spot store.
|
|
66
|
+
|
|
67
|
+
Loads spot trading data from DuckDB storage with validation,
|
|
68
|
+
deduplication checks, and schema normalization.
|
|
69
|
+
|
|
70
|
+
Processing steps:
|
|
71
|
+
1. Load data from DuckDB for specified pairs and date range
|
|
72
|
+
2. Validate required columns (pair, timestamp)
|
|
73
|
+
3. Remove unnecessary columns (timeframe)
|
|
74
|
+
4. Normalize timestamps (microseconds, timezone-naive)
|
|
75
|
+
5. Check for duplicates (pair, timestamp)
|
|
76
|
+
6. Sort by (pair, timestamp)
|
|
77
|
+
7. Package into RawData container
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
spot_store_path (Path): Path to DuckDB file.
|
|
81
|
+
pairs (list[str]): List of trading pairs to load (e.g., ["BTCUSDT", "ETHUSDT"]).
|
|
82
|
+
start (datetime): Start datetime (inclusive).
|
|
83
|
+
end (datetime): End datetime (inclusive).
|
|
84
|
+
data_types (list[str] | None): Data types to load. Default: None.
|
|
85
|
+
Currently supports: ["spot"].
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
RawData: Immutable container with loaded and validated data.
|
|
89
|
+
|
|
90
|
+
Raises:
|
|
91
|
+
ValueError: If required columns missing (pair, timestamp).
|
|
92
|
+
ValueError: If duplicate (pair, timestamp) combinations detected.
|
|
93
|
+
|
|
94
|
+
Example:
|
|
95
|
+
```python
|
|
96
|
+
from pathlib import Path
|
|
97
|
+
from datetime import datetime
|
|
98
|
+
from signalflow.data import RawDataFactory
|
|
99
|
+
|
|
100
|
+
# Load single pair
|
|
101
|
+
raw_data = RawDataFactory.from_duckdb_spot_store(
|
|
102
|
+
spot_store_path=Path("data/binance.duckdb"),
|
|
103
|
+
pairs=["BTCUSDT"],
|
|
104
|
+
start=datetime(2024, 1, 1),
|
|
105
|
+
end=datetime(2024, 1, 31),
|
|
106
|
+
data_types=["spot"]
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Load multiple pairs
|
|
110
|
+
raw_data = RawDataFactory.from_duckdb_spot_store(
|
|
111
|
+
spot_store_path=Path("data/binance.duckdb"),
|
|
112
|
+
pairs=["BTCUSDT", "ETHUSDT", "BNBUSDT"],
|
|
113
|
+
start=datetime(2024, 1, 1),
|
|
114
|
+
end=datetime(2024, 12, 31),
|
|
115
|
+
data_types=["spot"]
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Check loaded data
|
|
119
|
+
spot_df = raw_data["spot"]
|
|
120
|
+
print(f"Shape: {spot_df.shape}")
|
|
121
|
+
print(f"Columns: {spot_df.columns}")
|
|
122
|
+
print(f"Pairs: {spot_df['pair'].unique().to_list()}")
|
|
123
|
+
|
|
124
|
+
# Verify no duplicates
|
|
125
|
+
dup_check = (
|
|
126
|
+
spot_df.group_by(["pair", "timestamp"])
|
|
127
|
+
.len()
|
|
128
|
+
.filter(pl.col("len") > 1)
|
|
129
|
+
)
|
|
130
|
+
assert dup_check.is_empty()
|
|
131
|
+
|
|
132
|
+
# Use in pipeline
|
|
133
|
+
from signalflow.core import RawDataView
|
|
134
|
+
view = RawDataView(raw=raw_data)
|
|
135
|
+
spot_pandas = view.to_pandas("spot")
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Example:
|
|
139
|
+
```python
|
|
140
|
+
# Handle missing data gracefully
|
|
141
|
+
try:
|
|
142
|
+
raw_data = RawDataFactory.from_duckdb_spot_store(
|
|
143
|
+
spot_store_path=Path("data/binance.duckdb"),
|
|
144
|
+
pairs=["BTCUSDT"],
|
|
145
|
+
start=datetime(2024, 1, 1),
|
|
146
|
+
end=datetime(2024, 1, 31),
|
|
147
|
+
data_types=["spot"]
|
|
148
|
+
)
|
|
149
|
+
except ValueError as e:
|
|
150
|
+
if "missing columns" in str(e):
|
|
151
|
+
print("Data schema invalid")
|
|
152
|
+
elif "Duplicate" in str(e):
|
|
153
|
+
print("Data contains duplicates")
|
|
154
|
+
raise
|
|
155
|
+
|
|
156
|
+
# Validate date range
|
|
157
|
+
assert raw_data.datetime_start == datetime(2024, 1, 1)
|
|
158
|
+
assert raw_data.datetime_end == datetime(2024, 1, 31)
|
|
159
|
+
|
|
160
|
+
# Check data quality
|
|
161
|
+
spot_df = raw_data["spot"]
|
|
162
|
+
|
|
163
|
+
# Verify timestamps are sorted
|
|
164
|
+
assert spot_df["timestamp"].is_sorted()
|
|
165
|
+
|
|
166
|
+
# Verify timezone-naive
|
|
167
|
+
assert spot_df["timestamp"].dtype == pl.Datetime("us")
|
|
168
|
+
|
|
169
|
+
# Verify no nulls in key columns
|
|
170
|
+
assert spot_df["pair"].null_count() == 0
|
|
171
|
+
assert spot_df["timestamp"].null_count() == 0
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Note:
|
|
175
|
+
Store connection is automatically closed via finally block.
|
|
176
|
+
Timestamps are normalized to timezone-naive microseconds.
|
|
177
|
+
Duplicate detection shows first 10 examples if found.
|
|
178
|
+
All data sorted by (pair, timestamp) for consistent ordering.
|
|
179
|
+
"""
|
|
180
|
+
data: dict[str, pl.DataFrame] = {}
|
|
181
|
+
store = DuckDbSpotStore(spot_store_path)
|
|
182
|
+
try:
|
|
183
|
+
if "spot" in data_types:
|
|
184
|
+
spot = store.load_many(pairs=pairs, start=start, end=end)
|
|
185
|
+
|
|
186
|
+
required = {"pair", "timestamp"}
|
|
187
|
+
missing = required - set(spot.columns)
|
|
188
|
+
if missing:
|
|
189
|
+
raise ValueError(f"Spot df missing columns: {sorted(missing)}")
|
|
190
|
+
|
|
191
|
+
if "timeframe" in spot.columns:
|
|
192
|
+
spot = spot.drop("timeframe")
|
|
193
|
+
|
|
194
|
+
spot = spot.with_columns(
|
|
195
|
+
pl.col("timestamp").cast(pl.Datetime("us")).dt.replace_time_zone(None)
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
dup_count = (
|
|
199
|
+
spot.group_by(["pair", "timestamp"]).len()
|
|
200
|
+
.filter(pl.col("len") > 1)
|
|
201
|
+
)
|
|
202
|
+
if dup_count.height > 0:
|
|
203
|
+
dups = (
|
|
204
|
+
spot.join(
|
|
205
|
+
dup_count.select(["pair", "timestamp"]),
|
|
206
|
+
on=["pair", "timestamp"],
|
|
207
|
+
)
|
|
208
|
+
.select(["pair", "timestamp"])
|
|
209
|
+
.head(10)
|
|
210
|
+
)
|
|
211
|
+
raise ValueError(
|
|
212
|
+
f"Duplicate (pair, timestamp) detected. Examples:\n{dups}"
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
spot = spot.sort(["pair", "timestamp"])
|
|
216
|
+
data["spot"] = spot
|
|
217
|
+
|
|
218
|
+
return RawData(
|
|
219
|
+
datetime_start=start,
|
|
220
|
+
datetime_end=end,
|
|
221
|
+
pairs=pairs,
|
|
222
|
+
data=data,
|
|
223
|
+
)
|
|
224
|
+
finally:
|
|
225
|
+
store.close()
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Optional, ClassVar
|
|
6
|
+
from signalflow.core import SfComponentType
|
|
7
|
+
import polars as pl
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class RawDataStore(ABC):
|
|
12
|
+
"""Abstract base class for raw data storage backends.
|
|
13
|
+
|
|
14
|
+
Defines the interface for loading historical market data from storage.
|
|
15
|
+
Implementations provide specific storage backends (DuckDB, Parquet, etc.)
|
|
16
|
+
while maintaining a consistent API.
|
|
17
|
+
|
|
18
|
+
Key features:
|
|
19
|
+
- Single and batch loading (load, load_many)
|
|
20
|
+
- Flexible time filtering (hours, start/end)
|
|
21
|
+
- Multi-format output (Polars, Pandas)
|
|
22
|
+
- Resource management (close)
|
|
23
|
+
|
|
24
|
+
Supported operations:
|
|
25
|
+
- Load single pair with time filtering
|
|
26
|
+
- Load multiple pairs efficiently (batch query)
|
|
27
|
+
- Convert to Pandas for legacy compatibility
|
|
28
|
+
- Cleanup resources on shutdown
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
component_type (ClassVar[SfComponentType]): Always RAW_DATA_STORE for registry.
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
```python
|
|
35
|
+
from signalflow.data.raw_store import DuckDbSpotStore
|
|
36
|
+
from datetime import datetime
|
|
37
|
+
from pathlib import Path
|
|
38
|
+
|
|
39
|
+
# Create store instance
|
|
40
|
+
store = DuckDbSpotStore(Path("data/binance_spot.duckdb"))
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
# Load single pair
|
|
44
|
+
btc_df = store.load(
|
|
45
|
+
pair="BTCUSDT",
|
|
46
|
+
start=datetime(2024, 1, 1),
|
|
47
|
+
end=datetime(2024, 1, 31)
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Load multiple pairs
|
|
51
|
+
multi_df = store.load_many(
|
|
52
|
+
pairs=["BTCUSDT", "ETHUSDT"],
|
|
53
|
+
start=datetime(2024, 1, 1),
|
|
54
|
+
end=datetime(2024, 1, 31)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Load as Pandas
|
|
58
|
+
pandas_df = store.load_many_pandas(
|
|
59
|
+
pairs=["BTCUSDT"],
|
|
60
|
+
start=datetime(2024, 1, 1),
|
|
61
|
+
end=datetime(2024, 1, 31)
|
|
62
|
+
)
|
|
63
|
+
finally:
|
|
64
|
+
store.close()
|
|
65
|
+
|
|
66
|
+
# Use with context manager (if implemented)
|
|
67
|
+
with DuckDbSpotStore(Path("data/binance_spot.duckdb")) as store:
|
|
68
|
+
df = store.load("BTCUSDT", hours=24)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Note:
|
|
72
|
+
Subclasses must implement all abstract methods.
|
|
73
|
+
Always call close() or use context manager to cleanup resources.
|
|
74
|
+
Time filtering supports both relative (hours) and absolute (start/end).
|
|
75
|
+
|
|
76
|
+
See Also:
|
|
77
|
+
DuckDbSpotStore: DuckDB implementation for spot data.
|
|
78
|
+
RawDataFactory: Factory for creating RawData from stores.
|
|
79
|
+
"""
|
|
80
|
+
component_type: ClassVar[SfComponentType] = SfComponentType.RAW_DATA_STORE
|
|
81
|
+
|
|
82
|
+
@abstractmethod
|
|
83
|
+
def load(self, pair: str, hours: Optional[int] = None, start: Optional[datetime] = None, end: Optional[datetime] = None) -> pl.DataFrame:
|
|
84
|
+
"""Load data for a single trading pair.
|
|
85
|
+
|
|
86
|
+
Loads historical market data with flexible time filtering.
|
|
87
|
+
Use either relative (hours) or absolute (start/end) time filtering.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
pair (str): Trading pair (e.g., "BTCUSDT").
|
|
91
|
+
hours (int | None): Load last N hours of data. Mutually exclusive with start/end.
|
|
92
|
+
start (datetime | None): Start datetime (inclusive). Requires end parameter.
|
|
93
|
+
end (datetime | None): End datetime (inclusive). Requires start parameter.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
pl.DataFrame: Market data as Polars DataFrame.
|
|
97
|
+
Typically includes columns: pair, timestamp, open, high, low, close, volume.
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
ValueError: If both hours and start/end are provided.
|
|
101
|
+
ValueError: If start provided without end or vice versa.
|
|
102
|
+
FileNotFoundError: If storage file/database doesn't exist.
|
|
103
|
+
|
|
104
|
+
Example:
|
|
105
|
+
```python
|
|
106
|
+
from datetime import datetime
|
|
107
|
+
|
|
108
|
+
# Load last 24 hours
|
|
109
|
+
recent_df = store.load("BTCUSDT", hours=24)
|
|
110
|
+
|
|
111
|
+
# Load specific date range
|
|
112
|
+
historical_df = store.load(
|
|
113
|
+
"BTCUSDT",
|
|
114
|
+
start=datetime(2024, 1, 1),
|
|
115
|
+
end=datetime(2024, 1, 31)
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Check loaded data
|
|
119
|
+
print(f"Loaded {len(historical_df)} bars")
|
|
120
|
+
print(f"Date range: {historical_df['timestamp'].min()} to {historical_df['timestamp'].max()}")
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Note:
|
|
124
|
+
Implementation should handle timezone normalization.
|
|
125
|
+
Returned DataFrame should be sorted by timestamp.
|
|
126
|
+
"""
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
@abstractmethod
|
|
130
|
+
def load_many(self, pairs: list[str], hours: Optional[int] = None, start: Optional[datetime] = None, end: Optional[datetime] = None) -> pl.DataFrame:
|
|
131
|
+
"""Load data for multiple trading pairs efficiently.
|
|
132
|
+
|
|
133
|
+
Batch loading is more efficient than calling load() repeatedly.
|
|
134
|
+
Returns combined DataFrame with all pairs.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
pairs (list[str]): List of trading pairs (e.g., ["BTCUSDT", "ETHUSDT"]).
|
|
138
|
+
hours (int | None): Load last N hours of data. Mutually exclusive with start/end.
|
|
139
|
+
start (datetime | None): Start datetime (inclusive). Requires end parameter.
|
|
140
|
+
end (datetime | None): End datetime (inclusive). Requires start parameter.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
pl.DataFrame: Combined market data for all pairs as Polars DataFrame.
|
|
144
|
+
Includes pair column to distinguish between pairs.
|
|
145
|
+
|
|
146
|
+
Raises:
|
|
147
|
+
ValueError: If both hours and start/end are provided.
|
|
148
|
+
ValueError: If start provided without end or vice versa.
|
|
149
|
+
ValueError: If pairs list is empty.
|
|
150
|
+
|
|
151
|
+
Example:
|
|
152
|
+
```python
|
|
153
|
+
from datetime import datetime
|
|
154
|
+
|
|
155
|
+
# Load multiple pairs
|
|
156
|
+
multi_df = store.load_many(
|
|
157
|
+
pairs=["BTCUSDT", "ETHUSDT", "BNBUSDT"],
|
|
158
|
+
start=datetime(2024, 1, 1),
|
|
159
|
+
end=datetime(2024, 1, 31)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Analyze by pair
|
|
163
|
+
for pair in multi_df["pair"].unique():
|
|
164
|
+
pair_df = multi_df.filter(pl.col("pair") == pair)
|
|
165
|
+
print(f"{pair}: {len(pair_df)} bars")
|
|
166
|
+
|
|
167
|
+
# Last 24 hours for monitoring
|
|
168
|
+
recent_multi = store.load_many(
|
|
169
|
+
pairs=["BTCUSDT", "ETHUSDT"],
|
|
170
|
+
hours=24
|
|
171
|
+
)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Note:
|
|
175
|
+
Returned DataFrame sorted by (pair, timestamp).
|
|
176
|
+
More efficient than multiple load() calls due to batch query.
|
|
177
|
+
"""
|
|
178
|
+
pass
|
|
179
|
+
|
|
180
|
+
@abstractmethod
|
|
181
|
+
def load_many_pandas(self, pairs: list[str], start: Optional[datetime] = None, end: Optional[datetime] = None) -> pd.DataFrame:
|
|
182
|
+
"""Load data for multiple pairs as Pandas DataFrame.
|
|
183
|
+
|
|
184
|
+
Convenience method for legacy code or libraries requiring Pandas.
|
|
185
|
+
Typically converts from Polars internally.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
pairs (list[str]): List of trading pairs (e.g., ["BTCUSDT", "ETHUSDT"]).
|
|
189
|
+
start (datetime | None): Start datetime (inclusive). Requires end parameter.
|
|
190
|
+
end (datetime | None): End datetime (inclusive). Requires start parameter.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
pd.DataFrame: Combined market data as Pandas DataFrame.
|
|
194
|
+
|
|
195
|
+
Raises:
|
|
196
|
+
ValueError: If start provided without end or vice versa.
|
|
197
|
+
ValueError: If pairs list is empty.
|
|
198
|
+
|
|
199
|
+
Example:
|
|
200
|
+
```python
|
|
201
|
+
from datetime import datetime
|
|
202
|
+
import pandas as pd
|
|
203
|
+
|
|
204
|
+
# Load as Pandas
|
|
205
|
+
df = store.load_many_pandas(
|
|
206
|
+
pairs=["BTCUSDT", "ETHUSDT"],
|
|
207
|
+
start=datetime(2024, 1, 1),
|
|
208
|
+
end=datetime(2024, 1, 31)
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# Use with pandas-ta
|
|
212
|
+
import pandas_ta as ta
|
|
213
|
+
df["rsi"] = ta.rsi(df["close"], length=14)
|
|
214
|
+
|
|
215
|
+
# Use with legacy extractors
|
|
216
|
+
class LegacyExtractor:
|
|
217
|
+
def extract(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
218
|
+
df["sma_20"] = df.groupby("pair")["close"].rolling(20).mean()
|
|
219
|
+
return df
|
|
220
|
+
|
|
221
|
+
extractor = LegacyExtractor()
|
|
222
|
+
df_with_features = extractor.extract(df)
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Note:
|
|
226
|
+
Prefer load_many() with Polars for better performance.
|
|
227
|
+
Use this only when Pandas is required.
|
|
228
|
+
Timestamps normalized to timezone-naive datetime64[ns].
|
|
229
|
+
"""
|
|
230
|
+
pass
|
|
231
|
+
|
|
232
|
+
@abstractmethod
|
|
233
|
+
def close(self) -> None:
|
|
234
|
+
"""Close storage connection and cleanup resources.
|
|
235
|
+
|
|
236
|
+
Releases database connections, file handles, or other resources.
|
|
237
|
+
Should be called when store is no longer needed.
|
|
238
|
+
|
|
239
|
+
Always call close() in a finally block or use context manager
|
|
240
|
+
to ensure cleanup even on errors.
|
|
241
|
+
|
|
242
|
+
Example:
|
|
243
|
+
```python
|
|
244
|
+
# Manual cleanup
|
|
245
|
+
store = DuckDbSpotStore(Path("data/binance.duckdb"))
|
|
246
|
+
try:
|
|
247
|
+
df = store.load("BTCUSDT", hours=24)
|
|
248
|
+
# ... process data ...
|
|
249
|
+
finally:
|
|
250
|
+
store.close()
|
|
251
|
+
|
|
252
|
+
# With context manager (if implemented)
|
|
253
|
+
with DuckDbSpotStore(Path("data/binance.duckdb")) as store:
|
|
254
|
+
df = store.load("BTCUSDT", hours=24)
|
|
255
|
+
# ... process data ...
|
|
256
|
+
# Automatically closed
|
|
257
|
+
|
|
258
|
+
# In RawDataFactory
|
|
259
|
+
store = DuckDbSpotStore(store_path)
|
|
260
|
+
try:
|
|
261
|
+
data = store.load_many(pairs, start, end)
|
|
262
|
+
return RawData(data={"spot": data})
|
|
263
|
+
finally:
|
|
264
|
+
store.close() # Always cleanup
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
Note:
|
|
268
|
+
Idempotent - safe to call multiple times.
|
|
269
|
+
After close(), store should not be used for loading.
|
|
270
|
+
"""
|
|
271
|
+
pass
|