dccd 2.1.0__tar.gz → 2.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {dccd-2.1.0 → dccd-2.2.0}/CHANGELOG.md +25 -0
  2. {dccd-2.1.0 → dccd-2.2.0}/PKG-INFO +108 -10
  3. {dccd-2.1.0 → dccd-2.2.0}/README.rst +98 -9
  4. {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/binance.py +12 -25
  5. {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/bitfinex.py +11 -15
  6. {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/bitmex.py +10 -20
  7. {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/bybit.py +15 -30
  8. {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/exchange.py +146 -30
  9. {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/kraken.py +12 -25
  10. {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/okx.py +12 -25
  11. dccd-2.2.0/dccd/daemon/__init__.py +36 -0
  12. dccd-2.2.0/dccd/daemon/cli.py +260 -0
  13. dccd-2.2.0/dccd/daemon/config.py +258 -0
  14. dccd-2.2.0/dccd/daemon/health.py +245 -0
  15. dccd-2.2.0/dccd/daemon/scheduler.py +153 -0
  16. dccd-2.2.0/dccd/daemon/storage.py +118 -0
  17. dccd-2.2.0/dccd/daemon/stream_manager.py +364 -0
  18. {dccd-2.1.0 → dccd-2.2.0}/dccd/histo_dl/binance.py +54 -5
  19. {dccd-2.1.0 → dccd-2.2.0}/dccd/histo_dl/bybit.py +58 -2
  20. {dccd-2.1.0 → dccd-2.2.0}/dccd/histo_dl/coinbase.py +67 -4
  21. {dccd-2.1.0 → dccd-2.2.0}/dccd/histo_dl/exchange.py +263 -1
  22. {dccd-2.1.0 → dccd-2.2.0}/dccd/histo_dl/kraken.py +67 -12
  23. {dccd-2.1.0 → dccd-2.2.0}/dccd/histo_dl/okx.py +51 -2
  24. {dccd-2.1.0 → dccd-2.2.0}/dccd/models.py +16 -11
  25. dccd-2.2.0/dccd/tests/conftest.py +222 -0
  26. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_binance.py +33 -0
  27. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_binance_ws.py +72 -5
  28. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_bitfinex.py +3 -3
  29. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_bitmex.py +5 -5
  30. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_bybit.py +33 -0
  31. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_bybit_ws.py +6 -5
  32. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_coinbase.py +33 -0
  33. dccd-2.2.0/dccd/tests/test_daemon_cli.py +108 -0
  34. dccd-2.2.0/dccd/tests/test_daemon_config.py +175 -0
  35. dccd-2.2.0/dccd/tests/test_daemon_health.py +85 -0
  36. dccd-2.2.0/dccd/tests/test_daemon_scheduler.py +152 -0
  37. dccd-2.2.0/dccd/tests/test_daemon_storage.py +204 -0
  38. dccd-2.2.0/dccd/tests/test_daemon_stream_manager.py +368 -0
  39. dccd-2.2.0/dccd/tests/test_kraken.py +80 -0
  40. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_kraken_ws.py +7 -6
  41. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_models.py +7 -1
  42. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_okx.py +33 -0
  43. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_okx_ws.py +6 -5
  44. {dccd-2.1.0 → dccd-2.2.0}/dccd.egg-info/PKG-INFO +108 -10
  45. {dccd-2.1.0 → dccd-2.2.0}/dccd.egg-info/SOURCES.txt +14 -0
  46. dccd-2.2.0/dccd.egg-info/entry_points.txt +2 -0
  47. {dccd-2.1.0 → dccd-2.2.0}/dccd.egg-info/requires.txt +10 -0
  48. {dccd-2.1.0 → dccd-2.2.0}/pyproject.toml +7 -3
  49. dccd-2.1.0/dccd/tests/conftest.py +0 -104
  50. dccd-2.1.0/dccd/tests/test_kraken.py +0 -40
  51. {dccd-2.1.0 → dccd-2.2.0}/CONTRIBUTING.md +0 -0
  52. {dccd-2.1.0 → dccd-2.2.0}/LICENSE.txt +0 -0
  53. {dccd-2.1.0 → dccd-2.2.0}/MANIFEST.in +0 -0
  54. {dccd-2.1.0 → dccd-2.2.0}/dccd/__init__.py +0 -0
  55. {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/__init__.py +0 -0
  56. {dccd-2.1.0 → dccd-2.2.0}/dccd/histo_dl/__init__.py +0 -0
  57. {dccd-2.1.0 → dccd-2.2.0}/dccd/process_data.py +0 -0
  58. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/__init__.py +0 -0
  59. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_date_time.py +0 -0
  60. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_histo_dl.py +0 -0
  61. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_io.py +0 -0
  62. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_process_data.py +0 -0
  63. {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_websocket.py +0 -0
  64. {dccd-2.1.0 → dccd-2.2.0}/dccd/tools/__init__.py +0 -0
  65. {dccd-2.1.0 → dccd-2.2.0}/dccd/tools/date_time.py +0 -0
  66. {dccd-2.1.0 → dccd-2.2.0}/dccd/tools/io.py +0 -0
  67. {dccd-2.1.0 → dccd-2.2.0}/dccd/tools/websocket.py +0 -0
  68. {dccd-2.1.0 → dccd-2.2.0}/dccd.egg-info/dependency_links.txt +0 -0
  69. {dccd-2.1.0 → dccd-2.2.0}/dccd.egg-info/top_level.txt +0 -0
  70. {dccd-2.1.0 → dccd-2.2.0}/setup.cfg +0 -0
@@ -6,6 +6,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [2.2.0] - 2026-05-17
10
+
11
+ ### Added
12
+
13
+ - `dccd/histo_dl/exchange.py` — `import_trades(start, end)` and `import_orderbook(depth)` public methods on `ImportDataCryptoCurrencies`; `_sort_trades` / `_sort_orderbook` helpers validate via Pydantic, sort and deduplicate; `trades_df` / `orderbook_df` attributes; `save_trades` / `save_orderbook` save helpers (#31)
14
+ - `dccd/histo_dl/{binance,kraken,bybit,okx,coinbase}.py` — `_import_trades(start, end)` and `_import_orderbook(depth)` implemented for all five exchanges; Binance and Kraken support full history via paginated endpoints; Bybit (≤ 1 000) and Coinbase (≤ 100) return recent-only snapshots (#31)
15
+ - `dccd/models.py` — `Trade.tid` made optional (`int | None`); `OrderBookEntry` gains required `side` field (`'bid'` or `'ask'`) and `count` made optional (`int | None`) (#31)
16
+ - `dccd/daemon/health.py` — `HealthMonitor`: rotating log handler (10 MB × 5 files), per-job metrics JSON, and optional Slack/Discord webhook alerts on consecutive failures; `JobMetrics` dataclass (#30)
17
+ - `dccd/daemon/cli.py` — `dccd` CLI (`validate`, `run`, `start`, `status`, `add` commands) via typer; `[project.scripts]` entrypoint; `typer>=0.12` added to the `daemon` extra (#30)
18
+ - `dccd/daemon/stream_manager.py` — `StreamManager` (one thread per `(exchange, pair)`, auto-restart on crash) and `SyncService` (periodic rclone push to all remotes, decoupled from collection) (#26)
19
+ - `dccd/daemon/config.py` — declarative YAML config with Pydantic v2: `CollectorConfig`, `HistoJob`, `StreamJob`, `StorageConfig`, `AlertConfig`, `RemoteConfig`, `load_config()` (#25)
20
+ - `dccd/daemon/storage.py` — `RemoteStorage.push()` via rclone; supports multiple remotes and root-path sync (#25, #26)
21
+ - `dccd/daemon/scheduler.py` — `build_histo_scheduler()` (APScheduler 3.x), `run_histo_job()`, `run_once()` (#25)
22
+ - `examples/config.example.yml` — annotated reference config for the daemon (#25)
23
+ - `examples/daemon_example.py` — programmatic daemon example in 6 steps (#30)
24
+ - `pyproject.toml` — `[daemon]` optional extra (`pyyaml`, `apscheduler`, `typer`) (#25, #30)
25
+
26
+ ### Changed
27
+
28
+ - `dccd/daemon/scheduler.py` — `run_histo_job`, `build_histo_scheduler`, `run_once` accept an optional `health: HealthMonitor` parameter (#30)
29
+ - `dccd/daemon/stream_manager.py` — `StreamManager.__init__` accepts optional `health: HealthMonitor`; `_run_forever` records success/failure on each iteration (#30)
30
+ - `dccd/daemon/config.py` — `StorageConfig.remote` replaced by `remotes: list[RemoteConfig]` and `sync_interval: int` (#26)
31
+ - `dccd/histo_dl/{binance,coinbase,bybit,okx,kraken}.py` — `format_pair(crypto, fiat)` extracted as a static method, independently testable (#29)
32
+ - `dccd/continuous_dl/exchange.py` — unified `__call__`, `_push_trades`, `_push_book_updates`, `_get_book_state`, `_restore_book_state` in base class; separate `set_trades_saver` / `set_book_saver`; crash-recovery checkpoint; `snapshot_ts` injected into every snapshot payload (#28, #29)
33
+
9
34
  ## [2.1.0] - 2026-05-15
10
35
 
11
36
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dccd
3
- Version: 2.1.0
3
+ Version: 2.2.0
4
4
  Summary: Download Crypto Currency Data from different exchanges.
5
5
  Author-email: Arthur Bernard <arthur.bernard.92@gmail.com>
6
6
  License: MIT
@@ -33,6 +33,10 @@ Requires-Dist: pydantic>=2.0
33
33
  Provides-Extra: io
34
34
  Requires-Dist: pyarrow>=13; extra == "io"
35
35
  Requires-Dist: polars>=0.20; extra == "io"
36
+ Provides-Extra: daemon
37
+ Requires-Dist: pyyaml>=6.0; extra == "daemon"
38
+ Requires-Dist: apscheduler<4,>=3.10; extra == "daemon"
39
+ Requires-Dist: typer>=0.12; extra == "daemon"
36
40
  Provides-Extra: dev
37
41
  Requires-Dist: pytest>=7.4; extra == "dev"
38
42
  Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
@@ -41,12 +45,17 @@ Requires-Dist: ruff>=0.4; extra == "dev"
41
45
  Requires-Dist: interrogate>=1.5; extra == "dev"
42
46
  Requires-Dist: mypy>=1.0; extra == "dev"
43
47
  Requires-Dist: pandas-stubs>=2.0; extra == "dev"
48
+ Requires-Dist: pyyaml>=6.0; extra == "dev"
49
+ Requires-Dist: apscheduler<4,>=3.10; extra == "dev"
50
+ Requires-Dist: typer>=0.12; extra == "dev"
44
51
  Provides-Extra: doc
45
52
  Requires-Dist: sphinx>=7.0; extra == "doc"
46
53
  Requires-Dist: furo; extra == "doc"
47
54
  Requires-Dist: numpydoc; extra == "doc"
48
55
  Requires-Dist: sphinx-design; extra == "doc"
49
56
  Requires-Dist: sphinx-copybutton; extra == "doc"
57
+ Requires-Dist: pyyaml>=6.0; extra == "doc"
58
+ Requires-Dist: apscheduler<4,>=3.10; extra == "doc"
50
59
  Dynamic: license-file
51
60
 
52
61
  =============================
@@ -103,6 +112,10 @@ With optional Parquet / Polars support::
103
112
 
104
113
  $ pip install "dccd[io]"
105
114
 
115
+ With autonomous daemon support (APScheduler + PyYAML)::
116
+
117
+ $ pip install "dccd[daemon]"
118
+
106
119
  From source::
107
120
 
108
121
  $ git clone https://github.com/ArthurBernard/Download_Crypto_Currencies_Data
@@ -115,15 +128,15 @@ Supported exchanges
115
128
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
116
129
  | Exchange | REST OHLCV | REST Trades | REST Order Book | WS OHLCV | WS Trades | WS Order Book |
117
130
  +==================+============+=============+=================+==========+===========+================+
118
- | Binance | ✓ | | | | ✓ | ✓ |
131
+ | Binance | ✓ ||| | ✓ | ✓ |
119
132
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
120
- | Coinbase | ✓ | | | | | |
133
+ | Coinbase | ✓ | ✓† || | | |
121
134
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
122
- | Kraken | ✓ | | | ✓ | ✓ | ✓ |
135
+ | Kraken | ✓ ||| ✓ | ✓ | ✓ |
123
136
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
124
- | Bybit | ✓ | | | | ✓ | ✓ |
137
+ | Bybit | ✓ | ✓† || | ✓ | ✓ |
125
138
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
126
- | OKX | ✓ | | | ✓ | ✓ | ✓ |
139
+ | OKX | ✓ ||| ✓ | ✓ | ✓ |
127
140
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
128
141
  | Bitfinex | | | | ✓\* | ✓ | ✓ |
129
142
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
@@ -132,6 +145,8 @@ Supported exchanges
132
145
 
133
146
  \* Bitfinex WS OHLCV is aggregated from the trades stream via ``get_ohlc_bitfinex``.
134
147
 
148
+ † Recent trades only (Bybit ≤ 1 000, Coinbase ≤ 100) — no deep historical pagination via the public REST API.
149
+
135
150
  Presentation
136
151
  ============
137
152
 
@@ -144,6 +159,14 @@ Presentation
144
159
  Stream real-time data (order book, trades) via WebSocket with automatic
145
160
  reconnection and configurable processing/saving callbacks.
146
161
 
162
+ **Daemon** ``dccd.daemon``
163
+ Autonomous, server-side collector driven by a YAML config. Runs REST
164
+ jobs on a schedule (APScheduler), opens WebSocket streams for real-time
165
+ collection, and periodically syncs all local data to one or more remote
166
+ destinations (NAS, S3, SFTP, …) via rclone. Multiple remotes and a
167
+ configurable sync interval are supported; collection is never blocked by
168
+ remote availability.
169
+
147
170
  Output formats
148
171
  --------------
149
172
 
@@ -155,7 +178,9 @@ Parquet files can be read back as either a ``pandas.DataFrame`` or a
155
178
  Quick start
156
179
  ===========
157
180
 
158
- Historical data (pandas)::
181
+ Historical data (pandas):
182
+
183
+ .. code-block:: python
159
184
 
160
185
  from dccd.histo_dl import FromBinance
161
186
 
@@ -164,15 +189,21 @@ Historical data (pandas)::
164
189
  obj.save(form='parquet')
165
190
  df = obj.get_data() # pandas DataFrame
166
191
 
167
- Polars output::
192
+ Polars output:
193
+
194
+ .. code-block:: python
168
195
 
169
196
  df_pl = obj.get_data(format='polars')
170
197
 
171
- Incremental update (resume from last saved point)::
198
+ Incremental update (resume from last saved point):
199
+
200
+ .. code-block:: python
172
201
 
173
202
  obj.import_data(start='last', end='now').save(form='parquet')
174
203
 
175
- Other exchanges::
204
+ Other exchanges:
205
+
206
+ .. code-block:: python
176
207
 
177
208
  from dccd.histo_dl import FromKraken, FromBybit, FromOKX
178
209
 
@@ -180,6 +211,73 @@ Other exchanges::
180
211
  FromBybit('/path/', 'BTC', 86400).import_data(start='2024-01-01', end='now').save()
181
212
  FromOKX('/path/', 'BTC', 3600).import_data(start='2024-01-01', end='now').save()
182
213
 
214
+ Trades (historical or recent):
215
+
216
+ .. code-block:: python
217
+
218
+ from dccd.histo_dl import FromBinance, FromKraken
219
+
220
+ obj = FromBinance('/path/', 'BTC', 3600, fiat='USDT')
221
+ obj.import_trades(start='2024-01-01 00:00:00', end='2024-01-02 00:00:00')
222
+ obj.save_trades(form='csv')
223
+ df = obj.trades_df # pandas DataFrame — columns: timestamp, price, amount, type, tid
224
+
225
+ # Kraken also supports full history; Bybit/Coinbase return recent-only snapshots
226
+ FromKraken('/path/', 'BTC', 3600).import_trades(start='2024-01-01', end='2024-01-02').save_trades()
227
+
228
+ Order book snapshot:
229
+
230
+ .. code-block:: python
231
+
232
+ from dccd.histo_dl import FromOKX
233
+
234
+ obj = FromOKX('/path/', 'BTC', 3600)
235
+ obj.import_orderbook(depth=50)
236
+ obj.save_orderbook(form='csv')
237
+ df = obj.orderbook_df # columns: side, price, amount, count
238
+
239
+ Daemon (autonomous collector) — ``config.yml``:
240
+
241
+ .. code-block:: yaml
242
+
243
+ storage:
244
+ local_path: /data/crypto/
245
+ remotes:
246
+ - provider: rclone
247
+ remote: "mynas:crypto/"
248
+ sync_interval: 3600
249
+
250
+ histo_jobs:
251
+ - exchange: binance
252
+ pairs: [BTC/USDT, ETH/USDT]
253
+ span: 3600
254
+ format: parquet
255
+ by_period: Y
256
+
257
+ stream_jobs:
258
+ - exchange: binance
259
+ pairs: [BTC/USDT]
260
+ channels: [trades, book]
261
+ time_step: 60
262
+
263
+ .. code-block:: python
264
+
265
+ from dccd.daemon.config import load_config
266
+ from dccd.daemon.scheduler import run_once, build_histo_scheduler
267
+ from dccd.daemon.stream_manager import StreamManager
268
+
269
+ config = load_config('config.yml')
270
+
271
+ # One-shot: download all histo jobs once, then exit
272
+ run_once(config)
273
+
274
+ # Daemon mode: periodic REST + live WebSocket streams
275
+ scheduler = build_histo_scheduler(config)
276
+ scheduler.start()
277
+
278
+ mgr = StreamManager(config)
279
+ mgr.start() # runs until mgr.stop() is called
280
+
183
281
  Links
184
282
  =====
185
283
 
@@ -52,6 +52,10 @@ With optional Parquet / Polars support::
52
52
 
53
53
  $ pip install "dccd[io]"
54
54
 
55
+ With autonomous daemon support (APScheduler + PyYAML)::
56
+
57
+ $ pip install "dccd[daemon]"
58
+
55
59
  From source::
56
60
 
57
61
  $ git clone https://github.com/ArthurBernard/Download_Crypto_Currencies_Data
@@ -64,15 +68,15 @@ Supported exchanges
64
68
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
65
69
  | Exchange | REST OHLCV | REST Trades | REST Order Book | WS OHLCV | WS Trades | WS Order Book |
66
70
  +==================+============+=============+=================+==========+===========+================+
67
- | Binance | ✓ | | | | ✓ | ✓ |
71
+ | Binance | ✓ ||| | ✓ | ✓ |
68
72
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
69
- | Coinbase | ✓ | | | | | |
73
+ | Coinbase | ✓ | ✓† || | | |
70
74
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
71
- | Kraken | ✓ | | | ✓ | ✓ | ✓ |
75
+ | Kraken | ✓ ||| ✓ | ✓ | ✓ |
72
76
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
73
- | Bybit | ✓ | | | | ✓ | ✓ |
77
+ | Bybit | ✓ | ✓† || | ✓ | ✓ |
74
78
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
75
- | OKX | ✓ | | | ✓ | ✓ | ✓ |
79
+ | OKX | ✓ ||| ✓ | ✓ | ✓ |
76
80
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
77
81
  | Bitfinex | | | | ✓\* | ✓ | ✓ |
78
82
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
@@ -81,6 +85,8 @@ Supported exchanges
81
85
 
82
86
  \* Bitfinex WS OHLCV is aggregated from the trades stream via ``get_ohlc_bitfinex``.
83
87
 
88
+ † Recent trades only (Bybit ≤ 1 000, Coinbase ≤ 100) — no deep historical pagination via the public REST API.
89
+
84
90
  Presentation
85
91
  ============
86
92
 
@@ -93,6 +99,14 @@ Presentation
93
99
  Stream real-time data (order book, trades) via WebSocket with automatic
94
100
  reconnection and configurable processing/saving callbacks.
95
101
 
102
+ **Daemon** ``dccd.daemon``
103
+ Autonomous, server-side collector driven by a YAML config. Runs REST
104
+ jobs on a schedule (APScheduler), opens WebSocket streams for real-time
105
+ collection, and periodically syncs all local data to one or more remote
106
+ destinations (NAS, S3, SFTP, …) via rclone. Multiple remotes and a
107
+ configurable sync interval are supported; collection is never blocked by
108
+ remote availability.
109
+
96
110
  Output formats
97
111
  --------------
98
112
 
@@ -104,7 +118,9 @@ Parquet files can be read back as either a ``pandas.DataFrame`` or a
104
118
  Quick start
105
119
  ===========
106
120
 
107
- Historical data (pandas)::
121
+ Historical data (pandas):
122
+
123
+ .. code-block:: python
108
124
 
109
125
  from dccd.histo_dl import FromBinance
110
126
 
@@ -113,15 +129,21 @@ Historical data (pandas)::
113
129
  obj.save(form='parquet')
114
130
  df = obj.get_data() # pandas DataFrame
115
131
 
116
- Polars output::
132
+ Polars output:
133
+
134
+ .. code-block:: python
117
135
 
118
136
  df_pl = obj.get_data(format='polars')
119
137
 
120
- Incremental update (resume from last saved point)::
138
+ Incremental update (resume from last saved point):
139
+
140
+ .. code-block:: python
121
141
 
122
142
  obj.import_data(start='last', end='now').save(form='parquet')
123
143
 
124
- Other exchanges::
144
+ Other exchanges:
145
+
146
+ .. code-block:: python
125
147
 
126
148
  from dccd.histo_dl import FromKraken, FromBybit, FromOKX
127
149
 
@@ -129,6 +151,73 @@ Other exchanges::
129
151
  FromBybit('/path/', 'BTC', 86400).import_data(start='2024-01-01', end='now').save()
130
152
  FromOKX('/path/', 'BTC', 3600).import_data(start='2024-01-01', end='now').save()
131
153
 
154
+ Trades (historical or recent):
155
+
156
+ .. code-block:: python
157
+
158
+ from dccd.histo_dl import FromBinance, FromKraken
159
+
160
+ obj = FromBinance('/path/', 'BTC', 3600, fiat='USDT')
161
+ obj.import_trades(start='2024-01-01 00:00:00', end='2024-01-02 00:00:00')
162
+ obj.save_trades(form='csv')
163
+ df = obj.trades_df # pandas DataFrame — columns: timestamp, price, amount, type, tid
164
+
165
+ # Kraken also supports full history; Bybit/Coinbase return recent-only snapshots
166
+ FromKraken('/path/', 'BTC', 3600).import_trades(start='2024-01-01', end='2024-01-02').save_trades()
167
+
168
+ Order book snapshot:
169
+
170
+ .. code-block:: python
171
+
172
+ from dccd.histo_dl import FromOKX
173
+
174
+ obj = FromOKX('/path/', 'BTC', 3600)
175
+ obj.import_orderbook(depth=50)
176
+ obj.save_orderbook(form='csv')
177
+ df = obj.orderbook_df # columns: side, price, amount, count
178
+
179
+ Daemon (autonomous collector) — ``config.yml``:
180
+
181
+ .. code-block:: yaml
182
+
183
+ storage:
184
+ local_path: /data/crypto/
185
+ remotes:
186
+ - provider: rclone
187
+ remote: "mynas:crypto/"
188
+ sync_interval: 3600
189
+
190
+ histo_jobs:
191
+ - exchange: binance
192
+ pairs: [BTC/USDT, ETH/USDT]
193
+ span: 3600
194
+ format: parquet
195
+ by_period: Y
196
+
197
+ stream_jobs:
198
+ - exchange: binance
199
+ pairs: [BTC/USDT]
200
+ channels: [trades, book]
201
+ time_step: 60
202
+
203
+ .. code-block:: python
204
+
205
+ from dccd.daemon.config import load_config
206
+ from dccd.daemon.scheduler import run_once, build_histo_scheduler
207
+ from dccd.daemon.stream_manager import StreamManager
208
+
209
+ config = load_config('config.yml')
210
+
211
+ # One-shot: download all histo jobs once, then exit
212
+ run_once(config)
213
+
214
+ # Daemon mode: periodic REST + live WebSocket streams
215
+ scheduler = build_histo_scheduler(config)
216
+ scheduler.start()
217
+
218
+ mgr = StreamManager(config)
219
+ mgr.start() # runs until mgr.stop() is called
220
+
132
221
  Links
133
222
  =====
134
223
 
@@ -29,7 +29,6 @@ import time
29
29
  # Third party packages
30
30
  # Local packages
31
31
  from dccd.continuous_dl.exchange import ContinuousDownloader
32
- from dccd.process_data import set_marketdepth, set_orders, set_trades
33
32
  from dccd.tools.io import IODataBase
34
33
 
35
34
  __all__ = [
@@ -119,7 +118,7 @@ class DownloadBinanceData(ContinuousDownloader):
119
118
  """
120
119
 
121
120
  def __init__(self, pair: str = 'BTCUSDT', time_step: int = 60,
122
- until: int | None = 3600) -> None:
121
+ until: int | None = 3600, checkpoint_dir: str | None = None) -> None:
123
122
  """ Initialize object. """
124
123
  if until is None:
125
124
  until = 0
@@ -128,13 +127,14 @@ class DownloadBinanceData(ContinuousDownloader):
128
127
 
129
128
  self.pair = pair
130
129
  url = _BINANCE_WS_URL.format(sym=pair.lower())
131
- ContinuousDownloader.__init__(self, url, time_step=time_step, STOP=until)
130
+ ContinuousDownloader.__init__(self, url, time_step=time_step, STOP=until,
131
+ checkpoint_dir=checkpoint_dir)
132
132
  self._parser_data = {
133
133
  'trades': self.parser_trades,
134
134
  'book': self.parser_book,
135
135
  }
136
136
  self.logger = logging.getLogger(__name__)
137
- self.d: dict[str, float] = {}
137
+ self._load_checkpoint()
138
138
 
139
139
  async def _subscribe(self, **kwargs: object) -> None:
140
140
  """ Wait for connection; Binance streams are declared in the URL. """
@@ -165,8 +165,7 @@ class DownloadBinanceData(ContinuousDownloader):
165
165
  The ``data`` field from the combined-stream trade envelope.
166
166
 
167
167
  """
168
- for trade in _parser_trades(data):
169
- self._raw_parser(trade)
168
+ self._push_trades(_parser_trades(data))
170
169
 
171
170
  def parser_book(self, data: dict) -> None:
172
171
  """ Parse and update the order book from a depth message.
@@ -177,18 +176,7 @@ class DownloadBinanceData(ContinuousDownloader):
177
176
  The ``data`` field from the combined-stream depth envelope.
178
177
 
179
178
  """
180
- updates = _parser_book(data)
181
- for price, qty in updates.items():
182
- if qty == 0:
183
- self.d.pop(price, None)
184
- else:
185
- self.d[price] = qty
186
- self._data[self.t] = dict(self.d)
187
-
188
- def _raw_parser(self, data: object) -> None:
189
- if self.t not in self._data:
190
- self._data[self.t] = []
191
- self._data[self.t].append(data) # type: ignore[union-attr]
179
+ self._push_book_updates(_parser_book(data))
192
180
 
193
181
 
194
182
  def get_trades_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
@@ -210,8 +198,7 @@ def get_trades_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
210
198
 
211
199
  """
212
200
  downloader = DownloadBinanceData(pair=pair, time_step=time_step, until=until)
213
- downloader.set_process_data(set_trades)
214
- downloader.set_saver(IODataBase(path, method=form))
201
+ downloader.set_trades_saver(IODataBase(path, method=form))
215
202
  downloader(pair=pair)
216
203
 
217
204
 
@@ -234,8 +221,7 @@ def get_orderbook_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
234
221
 
235
222
  """
236
223
  downloader = DownloadBinanceData(pair=pair, time_step=time_step, until=until)
237
- downloader.set_process_data(set_marketdepth)
238
- downloader.set_saver(IODataBase(path, method=form))
224
+ downloader.set_book_saver(IODataBase(path, method=form))
239
225
  downloader(pair=pair)
240
226
 
241
227
 
@@ -246,7 +232,8 @@ def get_data_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
246
232
  Parameters
247
233
  ----------
248
234
  path : str
249
- Path to save data.
235
+ Root path; trades saved under ``<path>/trades/``, book under
236
+ ``<path>/book/``.
250
237
  pair : str, optional
251
238
  Trading pair in Binance format (e.g. 'BTCUSDT'), default is 'BTCUSDT'.
252
239
  time_step : int, optional
@@ -258,6 +245,6 @@ def get_data_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
258
245
 
259
246
  """
260
247
  downloader = DownloadBinanceData(pair=pair, time_step=time_step, until=until)
261
- downloader.set_process_data(set_orders)
262
- downloader.set_saver(IODataBase(path, method=form))
248
+ downloader.set_trades_saver(IODataBase(f'{path}/trades', method=form))
249
+ downloader.set_book_saver(IODataBase(f'{path}/book', method=form))
263
250
  downloader(pair=pair)
@@ -31,7 +31,6 @@ Low level API
31
31
  """
32
32
 
33
33
  # Built-in packages
34
- import asyncio
35
34
  import logging
36
35
  import time
37
36
  from typing import Any
@@ -117,7 +116,8 @@ class DownloadBitfinexData(ContinuousDownloader):
117
116
 
118
117
  """
119
118
 
120
- def __init__(self, time_step: int = 60, until: int | None = 3600) -> None:
119
+ def __init__(self, time_step: int = 60, until: int | None = 3600,
120
+ checkpoint_dir: str | None = None) -> None:
121
121
  """ Initialize object.
122
122
 
123
123
  Parameters
@@ -127,6 +127,9 @@ class DownloadBitfinexData(ContinuousDownloader):
127
127
  until : int or None, optional
128
128
  Seconds to run, or a future Unix timestamp to stop at.
129
129
  Default is ``3600``.
130
+ checkpoint_dir : str or None, optional
131
+ Directory to write the order-book crash-recovery checkpoint.
132
+ Disabled when ``None`` (default).
130
133
 
131
134
  """
132
135
  if until is None:
@@ -135,7 +138,7 @@ class DownloadBitfinexData(ContinuousDownloader):
135
138
  until -= int(time.time())
136
139
 
137
140
  ContinuousDownloader.__init__(self, 'bitfinex', time_step=time_step,
138
- STOP=until)
141
+ STOP=until, checkpoint_dir=checkpoint_dir)
139
142
 
140
143
  self._parser_data: dict[str, Any] = {
141
144
  'book': self.parser_book,
@@ -144,7 +147,7 @@ class DownloadBitfinexData(ContinuousDownloader):
144
147
  'trades_raw': self.parser_raw_trades,
145
148
  }
146
149
  self.logger = logging.getLogger(__name__)
147
- self.d: dict[str, Any] = {}
150
+ self._load_checkpoint()
148
151
 
149
152
  def parser_raw_book(self, data: list[Any]) -> None:
150
153
  """ Parse raw order book, each timestep set in a list all orders.
@@ -177,7 +180,9 @@ class DownloadBitfinexData(ContinuousDownloader):
177
180
  else:
178
181
  self.d.pop(parsed['price'])
179
182
 
180
- self._data[self.t] = {v['price']: v['amount'] for v in self.d.values()} # type: ignore[assignment]
183
+ self._data.setdefault(self.t, {'trades': [], 'book': {}})['book'] = {
184
+ v['price']: v['amount'] for v in self.d.values()
185
+ }
181
186
 
182
187
  def parser_raw_trades(self, data: list[Any]) -> None:
183
188
  """ Parse raw trade data tick-by-tick.
@@ -248,18 +253,9 @@ class DownloadBitfinexData(ContinuousDownloader):
248
253
 
249
254
  """
250
255
  self.parser = self.get_parser(channel)
251
-
252
256
  channel = channel[:-4] if channel[-4:] == '_raw' else channel
253
-
254
257
  self.logger.info('Try connect WS and set {} stream.'.format(channel))
255
-
256
- self.loop = asyncio.get_event_loop()
257
- self.loop.run_until_complete(asyncio.gather(
258
- self._connect(channel=channel, **kwargs),
259
- self._loop()
260
- ))
261
-
262
- return self
258
+ return super().__call__(channel=channel, **kwargs) # type: ignore[return-value]
263
259
 
264
260
 
265
261
  # =========================================================================== #
@@ -31,7 +31,6 @@ Low level API
31
31
  """
32
32
 
33
33
  # Built-in packages
34
- import asyncio
35
34
  import time
36
35
  from datetime import datetime as dt
37
36
  from typing import Any
@@ -180,8 +179,8 @@ class DownloadBitmexData(ContinuousDownloader):
180
179
  'orderBookL2_25': self.parser_book,
181
180
  'trade': self.parser_trades,
182
181
  }
183
- self.d: dict[int, Any] = {}
184
182
  self.start = False
183
+ self._load_checkpoint()
185
184
 
186
185
  def parser_book(self, data: dict[str, Any]) -> None:
187
186
  """ Parse and maintain a local copy of the order book.
@@ -214,7 +213,9 @@ class DownloadBitmexData(ContinuousDownloader):
214
213
  else:
215
214
  self.logger.error('Unknown action {}: {}'.format(action, data))
216
215
 
217
- self._data[self.t] = {v['price']: v['amount'] for v in self.d.values()} # type: ignore[assignment]
216
+ self._data.setdefault(self.t, {'trades': [], 'book': {}})['book'] = {
217
+ v['price']: v['amount'] for v in self.d.values()
218
+ }
218
219
 
219
220
  def parser_trades(self, data: dict[str, Any]) -> None:
220
221
  """ Parse trade data and accumulate records for the current timestep.
@@ -226,15 +227,12 @@ class DownloadBitmexData(ContinuousDownloader):
226
227
  key with a list of trade records.
227
228
 
228
229
  """
229
- i, _data = 0, []
230
- for d in data['data']:
231
- _data += [_parser_trades(d, i)]
232
- i += 1
230
+ slot = self._data.setdefault(self.t, {'trades': [], 'book': {}})
231
+ for i, d in enumerate(data['data']):
232
+ slot['trades'].append(_parser_trades(d, i))
233
233
 
234
- if self.t in self._data.keys():
235
- self._data[self.t] += _data
236
- else:
237
- self._data[self.t] = _data
234
+ def _restore_book_state(self, state: dict[int, Any]) -> None: # type: ignore[override]
235
+ self.d = {int(k): v for k, v in state.items()}
238
236
 
239
237
  async def on_message(self, data: dict[str, Any] | list[Any]) -> None:
240
238
  """ Route an incoming websocket message to the appropriate parser. """
@@ -270,16 +268,8 @@ class DownloadBitmexData(ContinuousDownloader):
270
268
 
271
269
  """
272
270
  self.parser = self.get_parser(args[0])
273
-
274
271
  self.logger.info('Try connect WS and set {} stream.'.format(args[0]))
275
-
276
- self.loop = asyncio.get_event_loop()
277
- self.loop.run_until_complete(asyncio.gather(
278
- self._connect(args=':'.join(args)),
279
- self._loop()
280
- ))
281
-
282
- return self
272
+ return super().__call__(args=':'.join(args)) # type: ignore[return-value]
283
273
 
284
274
 
285
275
  # =========================================================================== #