signalflow-trading 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. signalflow/__init__.py +21 -0
  2. signalflow/analytics/__init__.py +0 -0
  3. signalflow/core/__init__.py +46 -0
  4. signalflow/core/base_mixin.py +232 -0
  5. signalflow/core/containers/__init__.py +21 -0
  6. signalflow/core/containers/order.py +216 -0
  7. signalflow/core/containers/portfolio.py +211 -0
  8. signalflow/core/containers/position.py +296 -0
  9. signalflow/core/containers/raw_data.py +167 -0
  10. signalflow/core/containers/raw_data_view.py +169 -0
  11. signalflow/core/containers/signals.py +198 -0
  12. signalflow/core/containers/strategy_state.py +147 -0
  13. signalflow/core/containers/trade.py +112 -0
  14. signalflow/core/decorators.py +103 -0
  15. signalflow/core/enums.py +270 -0
  16. signalflow/core/registry.py +322 -0
  17. signalflow/core/rolling_aggregator.py +362 -0
  18. signalflow/core/signal_transforms/__init__.py +5 -0
  19. signalflow/core/signal_transforms/base_signal_transform.py +186 -0
  20. signalflow/data/__init__.py +11 -0
  21. signalflow/data/raw_data_factory.py +225 -0
  22. signalflow/data/raw_store/__init__.py +7 -0
  23. signalflow/data/raw_store/base.py +271 -0
  24. signalflow/data/raw_store/duckdb_stores.py +696 -0
  25. signalflow/data/source/__init__.py +10 -0
  26. signalflow/data/source/base.py +300 -0
  27. signalflow/data/source/binance.py +442 -0
  28. signalflow/data/strategy_store/__init__.py +8 -0
  29. signalflow/data/strategy_store/base.py +278 -0
  30. signalflow/data/strategy_store/duckdb.py +409 -0
  31. signalflow/data/strategy_store/schema.py +36 -0
  32. signalflow/detector/__init__.py +7 -0
  33. signalflow/detector/adapter/__init__.py +5 -0
  34. signalflow/detector/adapter/pandas_detector.py +46 -0
  35. signalflow/detector/base.py +390 -0
  36. signalflow/detector/sma_cross.py +105 -0
  37. signalflow/feature/__init__.py +16 -0
  38. signalflow/feature/adapter/__init__.py +5 -0
  39. signalflow/feature/adapter/pandas_feature_extractor.py +54 -0
  40. signalflow/feature/base.py +330 -0
  41. signalflow/feature/feature_set.py +286 -0
  42. signalflow/feature/oscillator/__init__.py +5 -0
  43. signalflow/feature/oscillator/rsi_extractor.py +42 -0
  44. signalflow/feature/pandasta/__init__.py +10 -0
  45. signalflow/feature/pandasta/pandas_ta_extractor.py +141 -0
  46. signalflow/feature/pandasta/top_pandasta_extractors.py +64 -0
  47. signalflow/feature/smoother/__init__.py +5 -0
  48. signalflow/feature/smoother/sma_extractor.py +46 -0
  49. signalflow/strategy/__init__.py +9 -0
  50. signalflow/strategy/broker/__init__.py +15 -0
  51. signalflow/strategy/broker/backtest.py +172 -0
  52. signalflow/strategy/broker/base.py +186 -0
  53. signalflow/strategy/broker/executor/__init__.py +9 -0
  54. signalflow/strategy/broker/executor/base.py +35 -0
  55. signalflow/strategy/broker/executor/binance_spot.py +12 -0
  56. signalflow/strategy/broker/executor/virtual_spot.py +81 -0
  57. signalflow/strategy/broker/realtime_spot.py +12 -0
  58. signalflow/strategy/component/__init__.py +9 -0
  59. signalflow/strategy/component/base.py +65 -0
  60. signalflow/strategy/component/entry/__init__.py +7 -0
  61. signalflow/strategy/component/entry/fixed_size.py +57 -0
  62. signalflow/strategy/component/entry/signal.py +127 -0
  63. signalflow/strategy/component/exit/__init__.py +5 -0
  64. signalflow/strategy/component/exit/time_based.py +47 -0
  65. signalflow/strategy/component/exit/tp_sl.py +80 -0
  66. signalflow/strategy/component/metric/__init__.py +8 -0
  67. signalflow/strategy/component/metric/main_metrics.py +181 -0
  68. signalflow/strategy/runner/__init__.py +8 -0
  69. signalflow/strategy/runner/backtest_runner.py +208 -0
  70. signalflow/strategy/runner/base.py +19 -0
  71. signalflow/strategy/runner/optimized_backtest_runner.py +178 -0
  72. signalflow/strategy/runner/realtime_runner.py +0 -0
  73. signalflow/target/__init__.py +14 -0
  74. signalflow/target/adapter/__init__.py +5 -0
  75. signalflow/target/adapter/pandas_labeler.py +45 -0
  76. signalflow/target/base.py +409 -0
  77. signalflow/target/fixed_horizon_labeler.py +93 -0
  78. signalflow/target/static_triple_barrier.py +162 -0
  79. signalflow/target/triple_barrier.py +188 -0
  80. signalflow/utils/__init__.py +7 -0
  81. signalflow/utils/import_utils.py +11 -0
  82. signalflow/utils/tune_utils.py +19 -0
  83. signalflow/validator/__init__.py +6 -0
  84. signalflow/validator/base.py +139 -0
  85. signalflow/validator/sklearn_validator.py +527 -0
  86. signalflow_trading-0.2.1.dist-info/METADATA +149 -0
  87. signalflow_trading-0.2.1.dist-info/RECORD +90 -0
  88. signalflow_trading-0.2.1.dist-info/WHEEL +5 -0
  89. signalflow_trading-0.2.1.dist-info/licenses/LICENSE +21 -0
  90. signalflow_trading-0.2.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,696 @@
1
+ # IMPORTANT
2
+ import duckdb
3
+ import polars as pl
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime, timedelta
7
+ from pathlib import Path
8
+ from typing import Optional, Iterable
9
+ from loguru import logger
10
+ import pandas as pd
11
+
12
+ from signalflow.core import sf_component
13
+ from signalflow.data.raw_store.base import RawDataStore
14
+
15
+ @dataclass
16
+ @sf_component(name="duckdb/spot")
17
+ class DuckDbSpotStore(RawDataStore):
18
+ """DuckDB storage backend for OHLCV spot data.
19
+
20
+ Provides efficient storage and retrieval of candlestick (OHLCV) data
21
+ using DuckDB as the backend. Designed for fixed-timeframe storage
22
+ (timeframe not stored per-row, configured at database level).
23
+
24
+ Key features:
25
+ - Automatic schema migration from legacy formats
26
+ - Efficient batch inserts with upsert (INSERT OR REPLACE)
27
+ - Gap detection for data continuity checks
28
+ - Multi-pair batch loading
29
+ - Indexed queries for fast retrieval
30
+
31
+ Schema:
32
+ - pair (VARCHAR): Trading pair
33
+ - timestamp (TIMESTAMP): Bar open time (timezone-naive)
34
+ - open, high, low, close (DOUBLE): OHLC prices
35
+ - volume (DOUBLE): Trading volume
36
+ - trades (INTEGER): Number of trades
37
+
38
+ Attributes:
39
+ db_path (Path): Path to DuckDB file.
40
+ timeframe (str): Fixed timeframe for all data (e.g., "1m", "5m"). Default: "1m".
41
+ _con (duckdb.DuckDBPyConnection): Database connection (initialized in __post_init__).
42
+
43
+ Example:
44
+ ```python
45
+ from signalflow.data.raw_store import DuckDbSpotStore
46
+ from pathlib import Path
47
+ from datetime import datetime
48
+
49
+ # Create store
50
+ store = DuckDbSpotStore(
51
+ db_path=Path("data/binance_spot.duckdb"),
52
+ timeframe="1m"
53
+ )
54
+
55
+ try:
56
+ # Insert data
57
+ klines = [
58
+ {
59
+ "timestamp": datetime(2024, 1, 1, 10, 0),
60
+ "open": 45000.0,
61
+ "high": 45100.0,
62
+ "low": 44900.0,
63
+ "close": 45050.0,
64
+ "volume": 100.5,
65
+ "trades": 150
66
+ }
67
+ ]
68
+ store.insert_klines("BTCUSDT", klines)
69
+
70
+ # Load data
71
+ df = store.load("BTCUSDT", hours=24)
72
+
73
+ # Check data bounds
74
+ min_ts, max_ts = store.get_time_bounds("BTCUSDT")
75
+ print(f"Data range: {min_ts} to {max_ts}")
76
+
77
+ # Get statistics
78
+ stats = store.get_stats()
79
+ print(stats)
80
+
81
+ finally:
82
+ store.close()
83
+ ```
84
+
85
+ Note:
86
+ Timeframe is fixed per database, not per row.
87
+ Automatically migrates from legacy schema (open_time, timeframe columns).
88
+ Always call close() to cleanup database connection.
89
+
90
+ See Also:
91
+ RawDataStore: Base class with interface definition.
92
+ RawDataFactory: Factory for creating RawData from stores.
93
+ """
94
+
95
+ db_path: Path
96
+ timeframe: str = "1m"
97
+ _con: duckdb.DuckDBPyConnection = field(init=False)
98
+
99
+ def __post_init__(self) -> None:
100
+ """Initialize database connection and ensure schema."""
101
+ self._con = duckdb.connect(str(self.db_path))
102
+ self._ensure_tables()
103
+
104
+ def _ensure_tables(self) -> None:
105
+ """Create tables and migrate from legacy schema if needed.
106
+
107
+ Automatically detects and migrates from:
108
+ - Legacy schema with 'timeframe' column
109
+ - Legacy schema with 'open_time' instead of 'timestamp'
110
+ - Legacy schema with 'quote_volume' instead of 'volume'
111
+
112
+ Creates:
113
+ - ohlcv table with PRIMARY KEY (pair, timestamp)
114
+ - Index on (pair, timestamp DESC) for fast queries
115
+ - meta table for storing timeframe configuration
116
+ """
117
+ existing = self._con.execute("""
118
+ SELECT column_name
119
+ FROM information_schema.columns
120
+ WHERE table_name = 'ohlcv'
121
+ """).fetchall()
122
+ existing_cols = {row[0] for row in existing}
123
+
124
+ if existing_cols and ("timeframe" in existing_cols or "open_time" in existing_cols):
125
+ logger.info("Migrating schema -> fixed-timeframe table (no timeframe column)...")
126
+
127
+ self._con.execute("""
128
+ CREATE TABLE IF NOT EXISTS ohlcv_new (
129
+ pair VARCHAR NOT NULL,
130
+ timestamp TIMESTAMP NOT NULL,
131
+ open DOUBLE NOT NULL,
132
+ high DOUBLE NOT NULL,
133
+ low DOUBLE NOT NULL,
134
+ close DOUBLE NOT NULL,
135
+ volume DOUBLE NOT NULL,
136
+ trades INTEGER,
137
+ PRIMARY KEY (pair, timestamp)
138
+ )
139
+ """)
140
+
141
+ if "open_time" in existing_cols:
142
+
143
+ self._con.execute("""
144
+ INSERT OR REPLACE INTO ohlcv_new
145
+ SELECT
146
+ pair,
147
+ open_time AS timestamp,
148
+ open, high, low, close,
149
+ quote_volume AS volume,
150
+ trades
151
+ FROM ohlcv
152
+ """)
153
+ else:
154
+ self._con.execute("""
155
+ INSERT OR REPLACE INTO ohlcv_new
156
+ SELECT
157
+ pair,
158
+ timestamp,
159
+ open, high, low, close,
160
+ volume,
161
+ trades
162
+ FROM ohlcv
163
+ """)
164
+
165
+ self._con.execute("DROP TABLE ohlcv")
166
+ self._con.execute("ALTER TABLE ohlcv_new RENAME TO ohlcv")
167
+ logger.info("Migration complete")
168
+
169
+ self._con.execute("""
170
+ CREATE TABLE IF NOT EXISTS ohlcv (
171
+ pair VARCHAR NOT NULL,
172
+ timestamp TIMESTAMP NOT NULL,
173
+ open DOUBLE NOT NULL,
174
+ high DOUBLE NOT NULL,
175
+ low DOUBLE NOT NULL,
176
+ close DOUBLE NOT NULL,
177
+ volume DOUBLE NOT NULL,
178
+ trades INTEGER,
179
+ PRIMARY KEY (pair, timestamp)
180
+ )
181
+ """)
182
+
183
+ self._con.execute("""
184
+ CREATE INDEX IF NOT EXISTS idx_ohlcv_pair_ts
185
+ ON ohlcv(pair, timestamp DESC)
186
+ """)
187
+
188
+ self._con.execute("""
189
+ CREATE TABLE IF NOT EXISTS meta (
190
+ key VARCHAR PRIMARY KEY,
191
+ value VARCHAR NOT NULL
192
+ )
193
+ """)
194
+ self._con.execute("""
195
+ INSERT OR REPLACE INTO meta(key, value) VALUES ('timeframe', ?)
196
+ """, [self.timeframe])
197
+
198
+ logger.info(f"Database initialized: {self.db_path} (timeframe={self.timeframe})")
199
+
200
+ def insert_klines(self, pair: str, klines: list[dict]) -> None:
201
+ """Upsert klines (INSERT OR REPLACE).
202
+
203
+ Efficient batch insertion with automatic upsert on (pair, timestamp) conflict.
204
+ Uses Arrow-based bulk insert for >10 rows for better performance.
205
+
206
+ Timestamp normalization:
207
+ - Removes timezone info
208
+ - Rounds to minute (removes seconds/microseconds)
209
+ - If second != 0, rounds up to next minute
210
+
211
+ Args:
212
+ pair (str): Trading pair (e.g., "BTCUSDT").
213
+ klines (list[dict]): List of kline dictionaries. Each must contain:
214
+ - timestamp (datetime): Bar open time
215
+ - open (float): Open price
216
+ - high (float): High price
217
+ - low (float): Low price
218
+ - close (float): Close price
219
+ - volume (float): Trading volume
220
+ - trades (int, optional): Number of trades
221
+
222
+ Example:
223
+ ```python
224
+ from datetime import datetime
225
+
226
+ # Insert single kline
227
+ store.insert_klines("BTCUSDT", [
228
+ {
229
+ "timestamp": datetime(2024, 1, 1, 10, 0),
230
+ "open": 45000.0,
231
+ "high": 45100.0,
232
+ "low": 44900.0,
233
+ "close": 45050.0,
234
+ "volume": 100.5,
235
+ "trades": 150
236
+ }
237
+ ])
238
+
239
+ # Batch insert (efficient for >10 rows)
240
+ klines = [
241
+ {
242
+ "timestamp": datetime(2024, 1, 1, 10, i),
243
+ "open": 45000.0 + i,
244
+ "high": 45100.0 + i,
245
+ "low": 44900.0 + i,
246
+ "close": 45050.0 + i,
247
+ "volume": 100.0,
248
+ "trades": 150
249
+ }
250
+ for i in range(100)
251
+ ]
252
+ store.insert_klines("BTCUSDT", klines)
253
+
254
+ # Upsert - updates existing rows
255
+ store.insert_klines("BTCUSDT", [
256
+ {
257
+ "timestamp": datetime(2024, 1, 1, 10, 0),
258
+ "open": 45010.0, # Updated price
259
+ "high": 45110.0,
260
+ "low": 44910.0,
261
+ "close": 45060.0,
262
+ "volume": 101.0,
263
+ "trades": 152
264
+ }
265
+ ])
266
+ ```
267
+
268
+ Note:
269
+ Empty klines list is silently ignored.
270
+ Uses executemany for ≤10 rows, Arrow bulk insert for >10 rows.
271
+ Automatically logs insert count at debug level.
272
+ """
273
+ if not klines:
274
+ return
275
+
276
+ if len(klines) <= 10:
277
+ self._con.executemany(
278
+ "INSERT OR REPLACE INTO ohlcv VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
279
+ [
280
+ (
281
+ pair,
282
+ k["timestamp"],
283
+ k["open"],
284
+ k["high"],
285
+ k["low"],
286
+ k["close"],
287
+ k["volume"],
288
+ k.get("trades"),
289
+ )
290
+ for k in klines
291
+ ],
292
+ )
293
+ else:
294
+ df = pl.DataFrame(
295
+ {
296
+ "pair": [pair] * len(klines),
297
+ "timestamp": [
298
+ k["timestamp"]
299
+ .replace(tzinfo=None)
300
+ .replace(second=0, microsecond=0)
301
+ + timedelta(minutes=1)
302
+ if k["timestamp"].second != 0 or k["timestamp"].microsecond != 0
303
+ else k["timestamp"].replace(tzinfo=None)
304
+ for k in klines
305
+ ],
306
+ "open": [k["open"] for k in klines],
307
+ "high": [k["high"] for k in klines],
308
+ "low": [k["low"] for k in klines],
309
+ "close": [k["close"] for k in klines],
310
+ "volume": [k["volume"] for k in klines],
311
+ "trades": [k.get("trades") for k in klines],
312
+ }
313
+ )
314
+ self._con.register("temp_klines", df.to_arrow())
315
+ self._con.execute("INSERT OR REPLACE INTO ohlcv SELECT * FROM temp_klines")
316
+ self._con.unregister("temp_klines")
317
+
318
+ logger.debug(f"Inserted {len(klines):,} rows for {pair}")
319
+
320
+ def get_time_bounds(self, pair: str) -> tuple[Optional[datetime], Optional[datetime]]:
321
+ """Get earliest and latest timestamps for a pair.
322
+
323
+ Useful for:
324
+ - Checking data availability
325
+ - Planning data updates
326
+ - Validating date ranges
327
+
328
+ Args:
329
+ pair (str): Trading pair (e.g., "BTCUSDT").
330
+
331
+ Returns:
332
+ tuple[datetime | None, datetime | None]: (min_timestamp, max_timestamp).
333
+ Both None if no data exists for pair.
334
+
335
+ Example:
336
+ ```python
337
+ # Check data availability
338
+ min_ts, max_ts = store.get_time_bounds("BTCUSDT")
339
+
340
+ if min_ts and max_ts:
341
+ print(f"Data available: {min_ts} to {max_ts}")
342
+ days = (max_ts - min_ts).days
343
+ print(f"Total days: {days}")
344
+ else:
345
+ print("No data available")
346
+
347
+ # Plan incremental update
348
+ _, max_ts = store.get_time_bounds("BTCUSDT")
349
+ if max_ts:
350
+ # Fetch data from max_ts to now
351
+ fetch_data(start=max_ts, end=datetime.now())
352
+ ```
353
+ """
354
+ result = self._con.execute("""
355
+ SELECT MIN(timestamp), MAX(timestamp)
356
+ FROM ohlcv
357
+ WHERE pair = ?
358
+ """, [pair]).fetchone()
359
+ return (result[0], result[1]) if result and result[0] else (None, None)
360
+
361
+ def find_gaps(
362
+ self,
363
+ pair: str,
364
+ start: datetime,
365
+ end: datetime,
366
+ tf_minutes: int,
367
+ ) -> list[tuple[datetime, datetime]]:
368
+ """Find gaps in data coverage for a pair.
369
+
370
+ Detects missing bars in expected continuous sequence based on timeframe.
371
+ Useful for data quality checks and incremental backfilling.
372
+
373
+ Args:
374
+ pair (str): Trading pair (e.g., "BTCUSDT").
375
+ start (datetime): Start of expected range.
376
+ end (datetime): End of expected range.
377
+ tf_minutes (int): Timeframe in minutes (e.g., 1 for 1m, 5 for 5m).
378
+
379
+ Returns:
380
+ list[tuple[datetime, datetime]]: List of (gap_start, gap_end) tuples.
381
+ Empty list if no gaps found.
382
+
383
+ Example:
384
+ ```python
385
+ from datetime import datetime
386
+
387
+ # Check for gaps in January 2024
388
+ gaps = store.find_gaps(
389
+ pair="BTCUSDT",
390
+ start=datetime(2024, 1, 1),
391
+ end=datetime(2024, 1, 31),
392
+ tf_minutes=1
393
+ )
394
+
395
+ if gaps:
396
+ print(f"Found {len(gaps)} gaps:")
397
+ for gap_start, gap_end in gaps:
398
+ duration = gap_end - gap_start
399
+ print(f" {gap_start} to {gap_end} ({duration})")
400
+
401
+ # Backfill gaps
402
+ backfill_data(pair="BTCUSDT", start=gap_start, end=gap_end)
403
+ else:
404
+ print("No gaps found - data is continuous")
405
+
406
+ # Data quality report
407
+ gaps = store.find_gaps("BTCUSDT", start, end, tf_minutes=1)
408
+ total_expected = int((end - start).total_seconds() / 60)
409
+ total_missing = sum((g[1] - g[0]).total_seconds() / 60 for g in gaps)
410
+ coverage = (1 - total_missing / total_expected) * 100
411
+ print(f"Data coverage: {coverage:.2f}%")
412
+ ```
413
+
414
+ Note:
415
+ Returns full range [(start, end)] if no data exists.
416
+ Computationally expensive for large date ranges - use sparingly.
417
+ """
418
+ existing = self._con.execute("""
419
+ SELECT timestamp
420
+ FROM ohlcv
421
+ WHERE pair = ? AND timestamp BETWEEN ? AND ?
422
+ ORDER BY timestamp
423
+ """, [pair, start, end]).fetchall()
424
+
425
+ if not existing:
426
+ return [(start, end)]
427
+
428
+ existing_times = {row[0] for row in existing}
429
+ gaps: list[tuple[datetime, datetime]] = []
430
+
431
+ gap_start: Optional[datetime] = None
432
+ current = start
433
+
434
+ while current <= end:
435
+ if current not in existing_times:
436
+ if gap_start is None:
437
+ gap_start = current
438
+ else:
439
+ if gap_start is not None:
440
+ gaps.append((gap_start, current - timedelta(minutes=tf_minutes)))
441
+ gap_start = None
442
+ current += timedelta(minutes=tf_minutes)
443
+
444
+ if gap_start is not None:
445
+ gaps.append((gap_start, end))
446
+
447
+ return gaps
448
+
449
+ def load(
450
+ self,
451
+ pair: str,
452
+ hours: Optional[int] = None,
453
+ start: Optional[datetime] = None,
454
+ end: Optional[datetime] = None,
455
+ ) -> pl.DataFrame:
456
+ """Load data for a single trading pair.
457
+
458
+ Output columns: pair, timestamp, open, high, low, close, volume, trades
459
+
460
+ Args:
461
+ pair (str): Trading pair (e.g., "BTCUSDT").
462
+ hours (int | None): Load last N hours of data. Mutually exclusive with start/end.
463
+ start (datetime | None): Start datetime (inclusive). Requires end parameter.
464
+ end (datetime | None): End datetime (inclusive). Requires start parameter.
465
+
466
+ Returns:
467
+ pl.DataFrame: OHLCV data sorted by timestamp. Timezone-naive timestamps.
468
+
469
+ Example:
470
+ ```python
471
+ # Load last 24 hours
472
+ df = store.load("BTCUSDT", hours=24)
473
+
474
+ # Load specific range
475
+ df = store.load(
476
+ "BTCUSDT",
477
+ start=datetime(2024, 1, 1),
478
+ end=datetime(2024, 1, 31)
479
+ )
480
+
481
+ # Check loaded data
482
+ print(df.select(["timestamp", "close"]).head())
483
+ ```
484
+ """
485
+ query = """
486
+ SELECT
487
+ ? AS pair,
488
+ timestamp, open, high, low, close, volume, trades
489
+ FROM ohlcv
490
+ WHERE pair = ?
491
+ """
492
+ params: list[object] = [pair, pair]
493
+
494
+ if hours is not None:
495
+ query += f" AND timestamp > NOW() - INTERVAL '{int(hours)}' HOUR"
496
+ elif start and end:
497
+ query += " AND timestamp BETWEEN ? AND ?"
498
+ params.extend([start, end])
499
+ elif start:
500
+ query += " AND timestamp >= ?"
501
+ params.append(start)
502
+ elif end:
503
+ query += " AND timestamp <= ?"
504
+ params.append(end)
505
+
506
+ query += " ORDER BY timestamp"
507
+ df = self._con.execute(query, params).pl()
508
+
509
+ if 'timestamp' in df.columns:
510
+ df = df.with_columns(
511
+ pl.col('timestamp').dt.replace_time_zone(None)
512
+ )
513
+
514
+ return df
515
+
516
+ def load_many_pandas(
517
+ self,
518
+ pairs: list[str],
519
+ start: datetime | None = None,
520
+ end: datetime | None = None,
521
+ ) -> pd.DataFrame:
522
+ """Load data for multiple pairs as Pandas DataFrame.
523
+
524
+ Convenience wrapper around load_many() for Pandas compatibility.
525
+
526
+ Args:
527
+ pairs (list[str]): List of trading pairs.
528
+ start (datetime | None): Start datetime (inclusive).
529
+ end (datetime | None): End datetime (inclusive).
530
+
531
+ Returns:
532
+ pd.DataFrame: Combined OHLCV data as Pandas DataFrame.
533
+
534
+ Example:
535
+ ```python
536
+ df = store.load_many_pandas(
537
+ pairs=["BTCUSDT", "ETHUSDT"],
538
+ start=datetime(2024, 1, 1),
539
+ end=datetime(2024, 1, 31)
540
+ )
541
+
542
+ # Use with pandas
543
+ df["returns"] = df.groupby("pair")["close"].pct_change()
544
+ ```
545
+ """
546
+ df_pl = self.load_many(pairs=pairs, start=start, end=end)
547
+ return df_pl.to_pandas()
548
+
549
+ def load_many(
550
+ self,
551
+ pairs: Iterable[str],
552
+ hours: Optional[int] = None,
553
+ start: Optional[datetime] = None,
554
+ end: Optional[datetime] = None,
555
+ ) -> pl.DataFrame:
556
+ """Batch load for multiple pairs.
557
+
558
+ Output columns: pair, timestamp, open, high, low, close, volume, trades
559
+
560
+ More efficient than multiple load() calls due to single query.
561
+
562
+ Args:
563
+ pairs (Iterable[str]): Trading pairs to load.
564
+ hours (int | None): Load last N hours of data.
565
+ start (datetime | None): Start datetime (inclusive).
566
+ end (datetime | None): End datetime (inclusive).
567
+
568
+ Returns:
569
+ pl.DataFrame: Combined OHLCV data sorted by (pair, timestamp).
570
+ Empty DataFrame with correct schema if no pairs provided.
571
+
572
+ Example:
573
+ ```python
574
+ # Load multiple pairs
575
+ df = store.load_many(
576
+ pairs=["BTCUSDT", "ETHUSDT", "BNBUSDT"],
577
+ start=datetime(2024, 1, 1),
578
+ end=datetime(2024, 1, 31)
579
+ )
580
+
581
+ # Analyze by pair
582
+ for pair in df["pair"].unique():
583
+ pair_df = df.filter(pl.col("pair") == pair)
584
+ print(f"{pair}: {len(pair_df)} bars")
585
+ ```
586
+ """
587
+ pairs = list(pairs)
588
+ if not pairs:
589
+ return pl.DataFrame(
590
+ schema={
591
+ "pair": pl.Utf8,
592
+ "timestamp": pl.Datetime,
593
+ "open": pl.Float64,
594
+ "high": pl.Float64,
595
+ "low": pl.Float64,
596
+ "close": pl.Float64,
597
+ "volume": pl.Float64,
598
+ "trades": pl.Int64,
599
+ }
600
+ )
601
+
602
+ placeholders = ",".join(["?"] * len(pairs))
603
+ query = f"""
604
+ SELECT
605
+ pair,
606
+ timestamp, open, high, low, close, volume, trades
607
+ FROM ohlcv
608
+ WHERE pair IN ({placeholders})
609
+ """
610
+ params: list[object] = [*pairs]
611
+
612
+ if hours is not None:
613
+ query += f" AND timestamp > NOW() - INTERVAL '{int(hours)}' HOUR"
614
+ elif start and end:
615
+ query += " AND timestamp BETWEEN ? AND ?"
616
+ params.extend([start, end])
617
+ elif start:
618
+ query += " AND timestamp >= ?"
619
+ params.append(start)
620
+ elif end:
621
+ query += " AND timestamp <= ?"
622
+ params.append(end)
623
+
624
+ query += " ORDER BY pair, timestamp"
625
+
626
+ df = self._con.execute(query, params).pl()
627
+
628
+ if 'timestamp' in df.columns:
629
+ df = df.with_columns(
630
+ pl.col('timestamp').dt.replace_time_zone(None)
631
+ )
632
+
633
+ return df
634
+
635
+ def get_stats(self) -> pl.DataFrame:
636
+ """Get database statistics per pair.
637
+
638
+ Returns summary statistics for all pairs in database.
639
+
640
+ Returns:
641
+ pl.DataFrame: Statistics with columns:
642
+ - pair (str): Trading pair
643
+ - rows (int): Number of bars
644
+ - first_candle (datetime): Earliest timestamp
645
+ - last_candle (datetime): Latest timestamp
646
+ - total_volume (float): Sum of volume
647
+
648
+ Example:
649
+ ```python
650
+ # Get overview
651
+ stats = store.get_stats()
652
+ print(stats)
653
+
654
+ # Check coverage
655
+ for row in stats.iter_rows(named=True):
656
+ pair = row["pair"]
657
+ days = (row["last_candle"] - row["first_candle"]).days
658
+ print(f"{pair}: {row['rows']:,} bars over {days} days")
659
+
660
+ # Identify incomplete data
661
+ min_rows = stats["rows"].min()
662
+ incomplete = stats.filter(pl.col("rows") < min_rows * 0.9)
663
+ print(f"Pairs with <90% coverage: {incomplete['pair'].to_list()}")
664
+ ```
665
+
666
+ Note:
667
+ Timeframe not included in output (stored in meta table).
668
+ Sorted alphabetically by pair.
669
+ """
670
+ return self._con.execute("""
671
+ SELECT
672
+ pair,
673
+ COUNT(*) as rows,
674
+ MIN(timestamp) as first_candle,
675
+ MAX(timestamp) as last_candle,
676
+ ROUND(SUM(volume), 2) as total_volume
677
+ FROM ohlcv
678
+ GROUP BY pair
679
+ ORDER BY pair
680
+ """).pl()
681
+
682
+ def close(self) -> None:
683
+ """Close database connection and cleanup resources.
684
+
685
+ Always call in finally block or use context manager to ensure cleanup.
686
+
687
+ Example:
688
+ ```python
689
+ store = DuckDbSpotStore(Path("data/binance.duckdb"))
690
+ try:
691
+ df = store.load("BTCUSDT", hours=24)
692
+ finally:
693
+ store.close()
694
+ ```
695
+ """
696
+ self._con.close()
@@ -0,0 +1,10 @@
1
+ from signalflow.data.source.base import RawDataSource, RawDataLoader
2
+ from signalflow.data.source.binance import BinanceClient, BinanceSpotLoader
3
+
4
+
5
+ __all__ = [
6
+ "RawDataSource",
7
+ "RawDataLoader",
8
+ "BinanceSpotLoader",
9
+ "BinanceClient",
10
+ ]