dccd 2.1.0__tar.gz → 2.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dccd-2.1.0 → dccd-2.2.0}/CHANGELOG.md +25 -0
- {dccd-2.1.0 → dccd-2.2.0}/PKG-INFO +108 -10
- {dccd-2.1.0 → dccd-2.2.0}/README.rst +98 -9
- {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/binance.py +12 -25
- {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/bitfinex.py +11 -15
- {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/bitmex.py +10 -20
- {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/bybit.py +15 -30
- {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/exchange.py +146 -30
- {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/kraken.py +12 -25
- {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/okx.py +12 -25
- dccd-2.2.0/dccd/daemon/__init__.py +36 -0
- dccd-2.2.0/dccd/daemon/cli.py +260 -0
- dccd-2.2.0/dccd/daemon/config.py +258 -0
- dccd-2.2.0/dccd/daemon/health.py +245 -0
- dccd-2.2.0/dccd/daemon/scheduler.py +153 -0
- dccd-2.2.0/dccd/daemon/storage.py +118 -0
- dccd-2.2.0/dccd/daemon/stream_manager.py +364 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/histo_dl/binance.py +54 -5
- {dccd-2.1.0 → dccd-2.2.0}/dccd/histo_dl/bybit.py +58 -2
- {dccd-2.1.0 → dccd-2.2.0}/dccd/histo_dl/coinbase.py +67 -4
- {dccd-2.1.0 → dccd-2.2.0}/dccd/histo_dl/exchange.py +263 -1
- {dccd-2.1.0 → dccd-2.2.0}/dccd/histo_dl/kraken.py +67 -12
- {dccd-2.1.0 → dccd-2.2.0}/dccd/histo_dl/okx.py +51 -2
- {dccd-2.1.0 → dccd-2.2.0}/dccd/models.py +16 -11
- dccd-2.2.0/dccd/tests/conftest.py +222 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_binance.py +33 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_binance_ws.py +72 -5
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_bitfinex.py +3 -3
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_bitmex.py +5 -5
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_bybit.py +33 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_bybit_ws.py +6 -5
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_coinbase.py +33 -0
- dccd-2.2.0/dccd/tests/test_daemon_cli.py +108 -0
- dccd-2.2.0/dccd/tests/test_daemon_config.py +175 -0
- dccd-2.2.0/dccd/tests/test_daemon_health.py +85 -0
- dccd-2.2.0/dccd/tests/test_daemon_scheduler.py +152 -0
- dccd-2.2.0/dccd/tests/test_daemon_storage.py +204 -0
- dccd-2.2.0/dccd/tests/test_daemon_stream_manager.py +368 -0
- dccd-2.2.0/dccd/tests/test_kraken.py +80 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_kraken_ws.py +7 -6
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_models.py +7 -1
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_okx.py +33 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_okx_ws.py +6 -5
- {dccd-2.1.0 → dccd-2.2.0}/dccd.egg-info/PKG-INFO +108 -10
- {dccd-2.1.0 → dccd-2.2.0}/dccd.egg-info/SOURCES.txt +14 -0
- dccd-2.2.0/dccd.egg-info/entry_points.txt +2 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd.egg-info/requires.txt +10 -0
- {dccd-2.1.0 → dccd-2.2.0}/pyproject.toml +7 -3
- dccd-2.1.0/dccd/tests/conftest.py +0 -104
- dccd-2.1.0/dccd/tests/test_kraken.py +0 -40
- {dccd-2.1.0 → dccd-2.2.0}/CONTRIBUTING.md +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/LICENSE.txt +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/MANIFEST.in +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/__init__.py +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/continuous_dl/__init__.py +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/histo_dl/__init__.py +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/process_data.py +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/__init__.py +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_date_time.py +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_histo_dl.py +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_io.py +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_process_data.py +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tests/test_websocket.py +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tools/__init__.py +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tools/date_time.py +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tools/io.py +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd/tools/websocket.py +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd.egg-info/dependency_links.txt +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/dccd.egg-info/top_level.txt +0 -0
- {dccd-2.1.0 → dccd-2.2.0}/setup.cfg +0 -0
|
@@ -6,6 +6,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
## [2.2.0] - 2026-05-17
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- `dccd/histo_dl/exchange.py` — `import_trades(start, end)` and `import_orderbook(depth)` public methods on `ImportDataCryptoCurrencies`; `_sort_trades` / `_sort_orderbook` helpers validate via Pydantic, sort and deduplicate; `trades_df` / `orderbook_df` attributes; `save_trades` / `save_orderbook` save helpers (#31)
|
|
14
|
+
- `dccd/histo_dl/{binance,kraken,bybit,okx,coinbase}.py` — `_import_trades(start, end)` and `_import_orderbook(depth)` implemented for all five exchanges; Binance and Kraken support full history via paginated endpoints; Bybit (≤ 1 000) and Coinbase (≤ 100) return recent-only snapshots (#31)
|
|
15
|
+
- `dccd/models.py` — `Trade.tid` made optional (`int | None`); `OrderBookEntry` gains required `side` field (`'bid'` or `'ask'`) and `count` made optional (`int | None`) (#31)
|
|
16
|
+
- `dccd/daemon/health.py` — `HealthMonitor`: rotating log handler (10 MB × 5 files), per-job metrics JSON, and optional Slack/Discord webhook alerts on consecutive failures; `JobMetrics` dataclass (#30)
|
|
17
|
+
- `dccd/daemon/cli.py` — `dccd` CLI (`validate`, `run`, `start`, `status`, `add` commands) via typer; `[project.scripts]` entrypoint; `typer>=0.12` added to the `daemon` extra (#30)
|
|
18
|
+
- `dccd/daemon/stream_manager.py` — `StreamManager` (one thread per `(exchange, pair)`, auto-restart on crash) and `SyncService` (periodic rclone push to all remotes, decoupled from collection) (#26)
|
|
19
|
+
- `dccd/daemon/config.py` — declarative YAML config with Pydantic v2: `CollectorConfig`, `HistoJob`, `StreamJob`, `StorageConfig`, `AlertConfig`, `RemoteConfig`, `load_config()` (#25)
|
|
20
|
+
- `dccd/daemon/storage.py` — `RemoteStorage.push()` via rclone; supports multiple remotes and root-path sync (#25, #26)
|
|
21
|
+
- `dccd/daemon/scheduler.py` — `build_histo_scheduler()` (APScheduler 3.x), `run_histo_job()`, `run_once()` (#25)
|
|
22
|
+
- `examples/config.example.yml` — annotated reference config for the daemon (#25)
|
|
23
|
+
- `examples/daemon_example.py` — programmatic daemon example in 6 steps (#30)
|
|
24
|
+
- `pyproject.toml` — `[daemon]` optional extra (`pyyaml`, `apscheduler`, `typer`) (#25, #30)
|
|
25
|
+
|
|
26
|
+
### Changed
|
|
27
|
+
|
|
28
|
+
- `dccd/daemon/scheduler.py` — `run_histo_job`, `build_histo_scheduler`, `run_once` accept an optional `health: HealthMonitor` parameter (#30)
|
|
29
|
+
- `dccd/daemon/stream_manager.py` — `StreamManager.__init__` accepts optional `health: HealthMonitor`; `_run_forever` records success/failure on each iteration (#30)
|
|
30
|
+
- `dccd/daemon/config.py` — `StorageConfig.remote` replaced by `remotes: list[RemoteConfig]` and `sync_interval: int` (#26)
|
|
31
|
+
- `dccd/histo_dl/{binance,coinbase,bybit,okx,kraken}.py` — `format_pair(crypto, fiat)` extracted as a static method, independently testable (#29)
|
|
32
|
+
- `dccd/continuous_dl/exchange.py` — unified `__call__`, `_push_trades`, `_push_book_updates`, `_get_book_state`, `_restore_book_state` in base class; separate `set_trades_saver` / `set_book_saver`; crash-recovery checkpoint; `snapshot_ts` injected into every snapshot payload (#28, #29)
|
|
33
|
+
|
|
9
34
|
## [2.1.0] - 2026-05-15
|
|
10
35
|
|
|
11
36
|
### Added
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dccd
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.2.0
|
|
4
4
|
Summary: Download Crypto Currency Data from different exchanges.
|
|
5
5
|
Author-email: Arthur Bernard <arthur.bernard.92@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -33,6 +33,10 @@ Requires-Dist: pydantic>=2.0
|
|
|
33
33
|
Provides-Extra: io
|
|
34
34
|
Requires-Dist: pyarrow>=13; extra == "io"
|
|
35
35
|
Requires-Dist: polars>=0.20; extra == "io"
|
|
36
|
+
Provides-Extra: daemon
|
|
37
|
+
Requires-Dist: pyyaml>=6.0; extra == "daemon"
|
|
38
|
+
Requires-Dist: apscheduler<4,>=3.10; extra == "daemon"
|
|
39
|
+
Requires-Dist: typer>=0.12; extra == "daemon"
|
|
36
40
|
Provides-Extra: dev
|
|
37
41
|
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
38
42
|
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
@@ -41,12 +45,17 @@ Requires-Dist: ruff>=0.4; extra == "dev"
|
|
|
41
45
|
Requires-Dist: interrogate>=1.5; extra == "dev"
|
|
42
46
|
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
43
47
|
Requires-Dist: pandas-stubs>=2.0; extra == "dev"
|
|
48
|
+
Requires-Dist: pyyaml>=6.0; extra == "dev"
|
|
49
|
+
Requires-Dist: apscheduler<4,>=3.10; extra == "dev"
|
|
50
|
+
Requires-Dist: typer>=0.12; extra == "dev"
|
|
44
51
|
Provides-Extra: doc
|
|
45
52
|
Requires-Dist: sphinx>=7.0; extra == "doc"
|
|
46
53
|
Requires-Dist: furo; extra == "doc"
|
|
47
54
|
Requires-Dist: numpydoc; extra == "doc"
|
|
48
55
|
Requires-Dist: sphinx-design; extra == "doc"
|
|
49
56
|
Requires-Dist: sphinx-copybutton; extra == "doc"
|
|
57
|
+
Requires-Dist: pyyaml>=6.0; extra == "doc"
|
|
58
|
+
Requires-Dist: apscheduler<4,>=3.10; extra == "doc"
|
|
50
59
|
Dynamic: license-file
|
|
51
60
|
|
|
52
61
|
=============================
|
|
@@ -103,6 +112,10 @@ With optional Parquet / Polars support::
|
|
|
103
112
|
|
|
104
113
|
$ pip install "dccd[io]"
|
|
105
114
|
|
|
115
|
+
With autonomous daemon support (APScheduler + PyYAML)::
|
|
116
|
+
|
|
117
|
+
$ pip install "dccd[daemon]"
|
|
118
|
+
|
|
106
119
|
From source::
|
|
107
120
|
|
|
108
121
|
$ git clone https://github.com/ArthurBernard/Download_Crypto_Currencies_Data
|
|
@@ -115,15 +128,15 @@ Supported exchanges
|
|
|
115
128
|
+------------------+------------+-------------+-----------------+----------+-----------+----------------+
|
|
116
129
|
| Exchange | REST OHLCV | REST Trades | REST Order Book | WS OHLCV | WS Trades | WS Order Book |
|
|
117
130
|
+==================+============+=============+=================+==========+===========+================+
|
|
118
|
-
| Binance | ✓ |
|
|
131
|
+
| Binance | ✓ | ✓ | ✓ | | ✓ | ✓ |
|
|
119
132
|
+------------------+------------+-------------+-----------------+----------+-----------+----------------+
|
|
120
|
-
| Coinbase | ✓ |
|
|
133
|
+
| Coinbase | ✓ | ✓† | ✓ | | | |
|
|
121
134
|
+------------------+------------+-------------+-----------------+----------+-----------+----------------+
|
|
122
|
-
| Kraken | ✓ |
|
|
135
|
+
| Kraken | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
123
136
|
+------------------+------------+-------------+-----------------+----------+-----------+----------------+
|
|
124
|
-
| Bybit | ✓ |
|
|
137
|
+
| Bybit | ✓ | ✓† | ✓ | | ✓ | ✓ |
|
|
125
138
|
+------------------+------------+-------------+-----------------+----------+-----------+----------------+
|
|
126
|
-
| OKX | ✓ |
|
|
139
|
+
| OKX | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
127
140
|
+------------------+------------+-------------+-----------------+----------+-----------+----------------+
|
|
128
141
|
| Bitfinex | | | | ✓\* | ✓ | ✓ |
|
|
129
142
|
+------------------+------------+-------------+-----------------+----------+-----------+----------------+
|
|
@@ -132,6 +145,8 @@ Supported exchanges
|
|
|
132
145
|
|
|
133
146
|
\* Bitfinex WS OHLCV is aggregated from the trades stream via ``get_ohlc_bitfinex``.
|
|
134
147
|
|
|
148
|
+
† Recent trades only (Bybit ≤ 1 000, Coinbase ≤ 100) — no deep historical pagination via the public REST API.
|
|
149
|
+
|
|
135
150
|
Presentation
|
|
136
151
|
============
|
|
137
152
|
|
|
@@ -144,6 +159,14 @@ Presentation
|
|
|
144
159
|
Stream real-time data (order book, trades) via WebSocket with automatic
|
|
145
160
|
reconnection and configurable processing/saving callbacks.
|
|
146
161
|
|
|
162
|
+
**Daemon** ``dccd.daemon``
|
|
163
|
+
Autonomous, server-side collector driven by a YAML config. Runs REST
|
|
164
|
+
jobs on a schedule (APScheduler), opens WebSocket streams for real-time
|
|
165
|
+
collection, and periodically syncs all local data to one or more remote
|
|
166
|
+
destinations (NAS, S3, SFTP, …) via rclone. Multiple remotes and a
|
|
167
|
+
configurable sync interval are supported; collection is never blocked by
|
|
168
|
+
remote availability.
|
|
169
|
+
|
|
147
170
|
Output formats
|
|
148
171
|
--------------
|
|
149
172
|
|
|
@@ -155,7 +178,9 @@ Parquet files can be read back as either a ``pandas.DataFrame`` or a
|
|
|
155
178
|
Quick start
|
|
156
179
|
===========
|
|
157
180
|
|
|
158
|
-
Historical data (pandas)
|
|
181
|
+
Historical data (pandas):
|
|
182
|
+
|
|
183
|
+
.. code-block:: python
|
|
159
184
|
|
|
160
185
|
from dccd.histo_dl import FromBinance
|
|
161
186
|
|
|
@@ -164,15 +189,21 @@ Historical data (pandas)::
|
|
|
164
189
|
obj.save(form='parquet')
|
|
165
190
|
df = obj.get_data() # pandas DataFrame
|
|
166
191
|
|
|
167
|
-
Polars output
|
|
192
|
+
Polars output:
|
|
193
|
+
|
|
194
|
+
.. code-block:: python
|
|
168
195
|
|
|
169
196
|
df_pl = obj.get_data(format='polars')
|
|
170
197
|
|
|
171
|
-
Incremental update (resume from last saved point)
|
|
198
|
+
Incremental update (resume from last saved point):
|
|
199
|
+
|
|
200
|
+
.. code-block:: python
|
|
172
201
|
|
|
173
202
|
obj.import_data(start='last', end='now').save(form='parquet')
|
|
174
203
|
|
|
175
|
-
Other exchanges
|
|
204
|
+
Other exchanges:
|
|
205
|
+
|
|
206
|
+
.. code-block:: python
|
|
176
207
|
|
|
177
208
|
from dccd.histo_dl import FromKraken, FromBybit, FromOKX
|
|
178
209
|
|
|
@@ -180,6 +211,73 @@ Other exchanges::
|
|
|
180
211
|
FromBybit('/path/', 'BTC', 86400).import_data(start='2024-01-01', end='now').save()
|
|
181
212
|
FromOKX('/path/', 'BTC', 3600).import_data(start='2024-01-01', end='now').save()
|
|
182
213
|
|
|
214
|
+
Trades (historical or recent):
|
|
215
|
+
|
|
216
|
+
.. code-block:: python
|
|
217
|
+
|
|
218
|
+
from dccd.histo_dl import FromBinance, FromKraken
|
|
219
|
+
|
|
220
|
+
obj = FromBinance('/path/', 'BTC', 3600, fiat='USDT')
|
|
221
|
+
obj.import_trades(start='2024-01-01 00:00:00', end='2024-01-02 00:00:00')
|
|
222
|
+
obj.save_trades(form='csv')
|
|
223
|
+
df = obj.trades_df # pandas DataFrame — columns: timestamp, price, amount, type, tid
|
|
224
|
+
|
|
225
|
+
# Kraken also supports full history; Bybit/Coinbase return recent-only snapshots
|
|
226
|
+
FromKraken('/path/', 'BTC', 3600).import_trades(start='2024-01-01', end='2024-01-02').save_trades()
|
|
227
|
+
|
|
228
|
+
Order book snapshot:
|
|
229
|
+
|
|
230
|
+
.. code-block:: python
|
|
231
|
+
|
|
232
|
+
from dccd.histo_dl import FromOKX
|
|
233
|
+
|
|
234
|
+
obj = FromOKX('/path/', 'BTC', 3600)
|
|
235
|
+
obj.import_orderbook(depth=50)
|
|
236
|
+
obj.save_orderbook(form='csv')
|
|
237
|
+
df = obj.orderbook_df # columns: side, price, amount, count
|
|
238
|
+
|
|
239
|
+
Daemon (autonomous collector) — ``config.yml``:
|
|
240
|
+
|
|
241
|
+
.. code-block:: yaml
|
|
242
|
+
|
|
243
|
+
storage:
|
|
244
|
+
local_path: /data/crypto/
|
|
245
|
+
remotes:
|
|
246
|
+
- provider: rclone
|
|
247
|
+
remote: "mynas:crypto/"
|
|
248
|
+
sync_interval: 3600
|
|
249
|
+
|
|
250
|
+
histo_jobs:
|
|
251
|
+
- exchange: binance
|
|
252
|
+
pairs: [BTC/USDT, ETH/USDT]
|
|
253
|
+
span: 3600
|
|
254
|
+
format: parquet
|
|
255
|
+
by_period: Y
|
|
256
|
+
|
|
257
|
+
stream_jobs:
|
|
258
|
+
- exchange: binance
|
|
259
|
+
pairs: [BTC/USDT]
|
|
260
|
+
channels: [trades, book]
|
|
261
|
+
time_step: 60
|
|
262
|
+
|
|
263
|
+
.. code-block:: python
|
|
264
|
+
|
|
265
|
+
from dccd.daemon.config import load_config
|
|
266
|
+
from dccd.daemon.scheduler import run_once, build_histo_scheduler
|
|
267
|
+
from dccd.daemon.stream_manager import StreamManager
|
|
268
|
+
|
|
269
|
+
config = load_config('config.yml')
|
|
270
|
+
|
|
271
|
+
# One-shot: download all histo jobs once, then exit
|
|
272
|
+
run_once(config)
|
|
273
|
+
|
|
274
|
+
# Daemon mode: periodic REST + live WebSocket streams
|
|
275
|
+
scheduler = build_histo_scheduler(config)
|
|
276
|
+
scheduler.start()
|
|
277
|
+
|
|
278
|
+
mgr = StreamManager(config)
|
|
279
|
+
mgr.start() # runs until mgr.stop() is called
|
|
280
|
+
|
|
183
281
|
Links
|
|
184
282
|
=====
|
|
185
283
|
|
|
@@ -52,6 +52,10 @@ With optional Parquet / Polars support::
|
|
|
52
52
|
|
|
53
53
|
$ pip install "dccd[io]"
|
|
54
54
|
|
|
55
|
+
With autonomous daemon support (APScheduler + PyYAML)::
|
|
56
|
+
|
|
57
|
+
$ pip install "dccd[daemon]"
|
|
58
|
+
|
|
55
59
|
From source::
|
|
56
60
|
|
|
57
61
|
$ git clone https://github.com/ArthurBernard/Download_Crypto_Currencies_Data
|
|
@@ -64,15 +68,15 @@ Supported exchanges
|
|
|
64
68
|
+------------------+------------+-------------+-----------------+----------+-----------+----------------+
|
|
65
69
|
| Exchange | REST OHLCV | REST Trades | REST Order Book | WS OHLCV | WS Trades | WS Order Book |
|
|
66
70
|
+==================+============+=============+=================+==========+===========+================+
|
|
67
|
-
| Binance | ✓ |
|
|
71
|
+
| Binance | ✓ | ✓ | ✓ | | ✓ | ✓ |
|
|
68
72
|
+------------------+------------+-------------+-----------------+----------+-----------+----------------+
|
|
69
|
-
| Coinbase | ✓ |
|
|
73
|
+
| Coinbase | ✓ | ✓† | ✓ | | | |
|
|
70
74
|
+------------------+------------+-------------+-----------------+----------+-----------+----------------+
|
|
71
|
-
| Kraken | ✓ |
|
|
75
|
+
| Kraken | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
72
76
|
+------------------+------------+-------------+-----------------+----------+-----------+----------------+
|
|
73
|
-
| Bybit | ✓ |
|
|
77
|
+
| Bybit | ✓ | ✓† | ✓ | | ✓ | ✓ |
|
|
74
78
|
+------------------+------------+-------------+-----------------+----------+-----------+----------------+
|
|
75
|
-
| OKX | ✓ |
|
|
79
|
+
| OKX | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
76
80
|
+------------------+------------+-------------+-----------------+----------+-----------+----------------+
|
|
77
81
|
| Bitfinex | | | | ✓\* | ✓ | ✓ |
|
|
78
82
|
+------------------+------------+-------------+-----------------+----------+-----------+----------------+
|
|
@@ -81,6 +85,8 @@ Supported exchanges
|
|
|
81
85
|
|
|
82
86
|
\* Bitfinex WS OHLCV is aggregated from the trades stream via ``get_ohlc_bitfinex``.
|
|
83
87
|
|
|
88
|
+
† Recent trades only (Bybit ≤ 1 000, Coinbase ≤ 100) — no deep historical pagination via the public REST API.
|
|
89
|
+
|
|
84
90
|
Presentation
|
|
85
91
|
============
|
|
86
92
|
|
|
@@ -93,6 +99,14 @@ Presentation
|
|
|
93
99
|
Stream real-time data (order book, trades) via WebSocket with automatic
|
|
94
100
|
reconnection and configurable processing/saving callbacks.
|
|
95
101
|
|
|
102
|
+
**Daemon** ``dccd.daemon``
|
|
103
|
+
Autonomous, server-side collector driven by a YAML config. Runs REST
|
|
104
|
+
jobs on a schedule (APScheduler), opens WebSocket streams for real-time
|
|
105
|
+
collection, and periodically syncs all local data to one or more remote
|
|
106
|
+
destinations (NAS, S3, SFTP, …) via rclone. Multiple remotes and a
|
|
107
|
+
configurable sync interval are supported; collection is never blocked by
|
|
108
|
+
remote availability.
|
|
109
|
+
|
|
96
110
|
Output formats
|
|
97
111
|
--------------
|
|
98
112
|
|
|
@@ -104,7 +118,9 @@ Parquet files can be read back as either a ``pandas.DataFrame`` or a
|
|
|
104
118
|
Quick start
|
|
105
119
|
===========
|
|
106
120
|
|
|
107
|
-
Historical data (pandas)
|
|
121
|
+
Historical data (pandas):
|
|
122
|
+
|
|
123
|
+
.. code-block:: python
|
|
108
124
|
|
|
109
125
|
from dccd.histo_dl import FromBinance
|
|
110
126
|
|
|
@@ -113,15 +129,21 @@ Historical data (pandas)::
|
|
|
113
129
|
obj.save(form='parquet')
|
|
114
130
|
df = obj.get_data() # pandas DataFrame
|
|
115
131
|
|
|
116
|
-
Polars output
|
|
132
|
+
Polars output:
|
|
133
|
+
|
|
134
|
+
.. code-block:: python
|
|
117
135
|
|
|
118
136
|
df_pl = obj.get_data(format='polars')
|
|
119
137
|
|
|
120
|
-
Incremental update (resume from last saved point)
|
|
138
|
+
Incremental update (resume from last saved point):
|
|
139
|
+
|
|
140
|
+
.. code-block:: python
|
|
121
141
|
|
|
122
142
|
obj.import_data(start='last', end='now').save(form='parquet')
|
|
123
143
|
|
|
124
|
-
Other exchanges
|
|
144
|
+
Other exchanges:
|
|
145
|
+
|
|
146
|
+
.. code-block:: python
|
|
125
147
|
|
|
126
148
|
from dccd.histo_dl import FromKraken, FromBybit, FromOKX
|
|
127
149
|
|
|
@@ -129,6 +151,73 @@ Other exchanges::
|
|
|
129
151
|
FromBybit('/path/', 'BTC', 86400).import_data(start='2024-01-01', end='now').save()
|
|
130
152
|
FromOKX('/path/', 'BTC', 3600).import_data(start='2024-01-01', end='now').save()
|
|
131
153
|
|
|
154
|
+
Trades (historical or recent):
|
|
155
|
+
|
|
156
|
+
.. code-block:: python
|
|
157
|
+
|
|
158
|
+
from dccd.histo_dl import FromBinance, FromKraken
|
|
159
|
+
|
|
160
|
+
obj = FromBinance('/path/', 'BTC', 3600, fiat='USDT')
|
|
161
|
+
obj.import_trades(start='2024-01-01 00:00:00', end='2024-01-02 00:00:00')
|
|
162
|
+
obj.save_trades(form='csv')
|
|
163
|
+
df = obj.trades_df # pandas DataFrame — columns: timestamp, price, amount, type, tid
|
|
164
|
+
|
|
165
|
+
# Kraken also supports full history; Bybit/Coinbase return recent-only snapshots
|
|
166
|
+
FromKraken('/path/', 'BTC', 3600).import_trades(start='2024-01-01', end='2024-01-02').save_trades()
|
|
167
|
+
|
|
168
|
+
Order book snapshot:
|
|
169
|
+
|
|
170
|
+
.. code-block:: python
|
|
171
|
+
|
|
172
|
+
from dccd.histo_dl import FromOKX
|
|
173
|
+
|
|
174
|
+
obj = FromOKX('/path/', 'BTC', 3600)
|
|
175
|
+
obj.import_orderbook(depth=50)
|
|
176
|
+
obj.save_orderbook(form='csv')
|
|
177
|
+
df = obj.orderbook_df # columns: side, price, amount, count
|
|
178
|
+
|
|
179
|
+
Daemon (autonomous collector) — ``config.yml``:
|
|
180
|
+
|
|
181
|
+
.. code-block:: yaml
|
|
182
|
+
|
|
183
|
+
storage:
|
|
184
|
+
local_path: /data/crypto/
|
|
185
|
+
remotes:
|
|
186
|
+
- provider: rclone
|
|
187
|
+
remote: "mynas:crypto/"
|
|
188
|
+
sync_interval: 3600
|
|
189
|
+
|
|
190
|
+
histo_jobs:
|
|
191
|
+
- exchange: binance
|
|
192
|
+
pairs: [BTC/USDT, ETH/USDT]
|
|
193
|
+
span: 3600
|
|
194
|
+
format: parquet
|
|
195
|
+
by_period: Y
|
|
196
|
+
|
|
197
|
+
stream_jobs:
|
|
198
|
+
- exchange: binance
|
|
199
|
+
pairs: [BTC/USDT]
|
|
200
|
+
channels: [trades, book]
|
|
201
|
+
time_step: 60
|
|
202
|
+
|
|
203
|
+
.. code-block:: python
|
|
204
|
+
|
|
205
|
+
from dccd.daemon.config import load_config
|
|
206
|
+
from dccd.daemon.scheduler import run_once, build_histo_scheduler
|
|
207
|
+
from dccd.daemon.stream_manager import StreamManager
|
|
208
|
+
|
|
209
|
+
config = load_config('config.yml')
|
|
210
|
+
|
|
211
|
+
# One-shot: download all histo jobs once, then exit
|
|
212
|
+
run_once(config)
|
|
213
|
+
|
|
214
|
+
# Daemon mode: periodic REST + live WebSocket streams
|
|
215
|
+
scheduler = build_histo_scheduler(config)
|
|
216
|
+
scheduler.start()
|
|
217
|
+
|
|
218
|
+
mgr = StreamManager(config)
|
|
219
|
+
mgr.start() # runs until mgr.stop() is called
|
|
220
|
+
|
|
132
221
|
Links
|
|
133
222
|
=====
|
|
134
223
|
|
|
@@ -29,7 +29,6 @@ import time
|
|
|
29
29
|
# Third party packages
|
|
30
30
|
# Local packages
|
|
31
31
|
from dccd.continuous_dl.exchange import ContinuousDownloader
|
|
32
|
-
from dccd.process_data import set_marketdepth, set_orders, set_trades
|
|
33
32
|
from dccd.tools.io import IODataBase
|
|
34
33
|
|
|
35
34
|
__all__ = [
|
|
@@ -119,7 +118,7 @@ class DownloadBinanceData(ContinuousDownloader):
|
|
|
119
118
|
"""
|
|
120
119
|
|
|
121
120
|
def __init__(self, pair: str = 'BTCUSDT', time_step: int = 60,
|
|
122
|
-
until: int | None = 3600) -> None:
|
|
121
|
+
until: int | None = 3600, checkpoint_dir: str | None = None) -> None:
|
|
123
122
|
""" Initialize object. """
|
|
124
123
|
if until is None:
|
|
125
124
|
until = 0
|
|
@@ -128,13 +127,14 @@ class DownloadBinanceData(ContinuousDownloader):
|
|
|
128
127
|
|
|
129
128
|
self.pair = pair
|
|
130
129
|
url = _BINANCE_WS_URL.format(sym=pair.lower())
|
|
131
|
-
ContinuousDownloader.__init__(self, url, time_step=time_step, STOP=until
|
|
130
|
+
ContinuousDownloader.__init__(self, url, time_step=time_step, STOP=until,
|
|
131
|
+
checkpoint_dir=checkpoint_dir)
|
|
132
132
|
self._parser_data = {
|
|
133
133
|
'trades': self.parser_trades,
|
|
134
134
|
'book': self.parser_book,
|
|
135
135
|
}
|
|
136
136
|
self.logger = logging.getLogger(__name__)
|
|
137
|
-
self.
|
|
137
|
+
self._load_checkpoint()
|
|
138
138
|
|
|
139
139
|
async def _subscribe(self, **kwargs: object) -> None:
|
|
140
140
|
""" Wait for connection; Binance streams are declared in the URL. """
|
|
@@ -165,8 +165,7 @@ class DownloadBinanceData(ContinuousDownloader):
|
|
|
165
165
|
The ``data`` field from the combined-stream trade envelope.
|
|
166
166
|
|
|
167
167
|
"""
|
|
168
|
-
|
|
169
|
-
self._raw_parser(trade)
|
|
168
|
+
self._push_trades(_parser_trades(data))
|
|
170
169
|
|
|
171
170
|
def parser_book(self, data: dict) -> None:
|
|
172
171
|
""" Parse and update the order book from a depth message.
|
|
@@ -177,18 +176,7 @@ class DownloadBinanceData(ContinuousDownloader):
|
|
|
177
176
|
The ``data`` field from the combined-stream depth envelope.
|
|
178
177
|
|
|
179
178
|
"""
|
|
180
|
-
|
|
181
|
-
for price, qty in updates.items():
|
|
182
|
-
if qty == 0:
|
|
183
|
-
self.d.pop(price, None)
|
|
184
|
-
else:
|
|
185
|
-
self.d[price] = qty
|
|
186
|
-
self._data[self.t] = dict(self.d)
|
|
187
|
-
|
|
188
|
-
def _raw_parser(self, data: object) -> None:
|
|
189
|
-
if self.t not in self._data:
|
|
190
|
-
self._data[self.t] = []
|
|
191
|
-
self._data[self.t].append(data) # type: ignore[union-attr]
|
|
179
|
+
self._push_book_updates(_parser_book(data))
|
|
192
180
|
|
|
193
181
|
|
|
194
182
|
def get_trades_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
|
|
@@ -210,8 +198,7 @@ def get_trades_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
|
|
|
210
198
|
|
|
211
199
|
"""
|
|
212
200
|
downloader = DownloadBinanceData(pair=pair, time_step=time_step, until=until)
|
|
213
|
-
downloader.
|
|
214
|
-
downloader.set_saver(IODataBase(path, method=form))
|
|
201
|
+
downloader.set_trades_saver(IODataBase(path, method=form))
|
|
215
202
|
downloader(pair=pair)
|
|
216
203
|
|
|
217
204
|
|
|
@@ -234,8 +221,7 @@ def get_orderbook_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
|
|
|
234
221
|
|
|
235
222
|
"""
|
|
236
223
|
downloader = DownloadBinanceData(pair=pair, time_step=time_step, until=until)
|
|
237
|
-
downloader.
|
|
238
|
-
downloader.set_saver(IODataBase(path, method=form))
|
|
224
|
+
downloader.set_book_saver(IODataBase(path, method=form))
|
|
239
225
|
downloader(pair=pair)
|
|
240
226
|
|
|
241
227
|
|
|
@@ -246,7 +232,8 @@ def get_data_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
|
|
|
246
232
|
Parameters
|
|
247
233
|
----------
|
|
248
234
|
path : str
|
|
249
|
-
|
|
235
|
+
Root path; trades saved under ``<path>/trades/``, book under
|
|
236
|
+
``<path>/book/``.
|
|
250
237
|
pair : str, optional
|
|
251
238
|
Trading pair in Binance format (e.g. 'BTCUSDT'), default is 'BTCUSDT'.
|
|
252
239
|
time_step : int, optional
|
|
@@ -258,6 +245,6 @@ def get_data_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
|
|
|
258
245
|
|
|
259
246
|
"""
|
|
260
247
|
downloader = DownloadBinanceData(pair=pair, time_step=time_step, until=until)
|
|
261
|
-
downloader.
|
|
262
|
-
downloader.
|
|
248
|
+
downloader.set_trades_saver(IODataBase(f'{path}/trades', method=form))
|
|
249
|
+
downloader.set_book_saver(IODataBase(f'{path}/book', method=form))
|
|
263
250
|
downloader(pair=pair)
|
|
@@ -31,7 +31,6 @@ Low level API
|
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
33
|
# Built-in packages
|
|
34
|
-
import asyncio
|
|
35
34
|
import logging
|
|
36
35
|
import time
|
|
37
36
|
from typing import Any
|
|
@@ -117,7 +116,8 @@ class DownloadBitfinexData(ContinuousDownloader):
|
|
|
117
116
|
|
|
118
117
|
"""
|
|
119
118
|
|
|
120
|
-
def __init__(self, time_step: int = 60, until: int | None = 3600
|
|
119
|
+
def __init__(self, time_step: int = 60, until: int | None = 3600,
|
|
120
|
+
checkpoint_dir: str | None = None) -> None:
|
|
121
121
|
""" Initialize object.
|
|
122
122
|
|
|
123
123
|
Parameters
|
|
@@ -127,6 +127,9 @@ class DownloadBitfinexData(ContinuousDownloader):
|
|
|
127
127
|
until : int or None, optional
|
|
128
128
|
Seconds to run, or a future Unix timestamp to stop at.
|
|
129
129
|
Default is ``3600``.
|
|
130
|
+
checkpoint_dir : str or None, optional
|
|
131
|
+
Directory to write the order-book crash-recovery checkpoint.
|
|
132
|
+
Disabled when ``None`` (default).
|
|
130
133
|
|
|
131
134
|
"""
|
|
132
135
|
if until is None:
|
|
@@ -135,7 +138,7 @@ class DownloadBitfinexData(ContinuousDownloader):
|
|
|
135
138
|
until -= int(time.time())
|
|
136
139
|
|
|
137
140
|
ContinuousDownloader.__init__(self, 'bitfinex', time_step=time_step,
|
|
138
|
-
STOP=until)
|
|
141
|
+
STOP=until, checkpoint_dir=checkpoint_dir)
|
|
139
142
|
|
|
140
143
|
self._parser_data: dict[str, Any] = {
|
|
141
144
|
'book': self.parser_book,
|
|
@@ -144,7 +147,7 @@ class DownloadBitfinexData(ContinuousDownloader):
|
|
|
144
147
|
'trades_raw': self.parser_raw_trades,
|
|
145
148
|
}
|
|
146
149
|
self.logger = logging.getLogger(__name__)
|
|
147
|
-
self.
|
|
150
|
+
self._load_checkpoint()
|
|
148
151
|
|
|
149
152
|
def parser_raw_book(self, data: list[Any]) -> None:
|
|
150
153
|
""" Parse raw order book, each timestep set in a list all orders.
|
|
@@ -177,7 +180,9 @@ class DownloadBitfinexData(ContinuousDownloader):
|
|
|
177
180
|
else:
|
|
178
181
|
self.d.pop(parsed['price'])
|
|
179
182
|
|
|
180
|
-
self._data
|
|
183
|
+
self._data.setdefault(self.t, {'trades': [], 'book': {}})['book'] = {
|
|
184
|
+
v['price']: v['amount'] for v in self.d.values()
|
|
185
|
+
}
|
|
181
186
|
|
|
182
187
|
def parser_raw_trades(self, data: list[Any]) -> None:
|
|
183
188
|
""" Parse raw trade data tick-by-tick.
|
|
@@ -248,18 +253,9 @@ class DownloadBitfinexData(ContinuousDownloader):
|
|
|
248
253
|
|
|
249
254
|
"""
|
|
250
255
|
self.parser = self.get_parser(channel)
|
|
251
|
-
|
|
252
256
|
channel = channel[:-4] if channel[-4:] == '_raw' else channel
|
|
253
|
-
|
|
254
257
|
self.logger.info('Try connect WS and set {} stream.'.format(channel))
|
|
255
|
-
|
|
256
|
-
self.loop = asyncio.get_event_loop()
|
|
257
|
-
self.loop.run_until_complete(asyncio.gather(
|
|
258
|
-
self._connect(channel=channel, **kwargs),
|
|
259
|
-
self._loop()
|
|
260
|
-
))
|
|
261
|
-
|
|
262
|
-
return self
|
|
258
|
+
return super().__call__(channel=channel, **kwargs) # type: ignore[return-value]
|
|
263
259
|
|
|
264
260
|
|
|
265
261
|
# =========================================================================== #
|
|
@@ -31,7 +31,6 @@ Low level API
|
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
33
|
# Built-in packages
|
|
34
|
-
import asyncio
|
|
35
34
|
import time
|
|
36
35
|
from datetime import datetime as dt
|
|
37
36
|
from typing import Any
|
|
@@ -180,8 +179,8 @@ class DownloadBitmexData(ContinuousDownloader):
|
|
|
180
179
|
'orderBookL2_25': self.parser_book,
|
|
181
180
|
'trade': self.parser_trades,
|
|
182
181
|
}
|
|
183
|
-
self.d: dict[int, Any] = {}
|
|
184
182
|
self.start = False
|
|
183
|
+
self._load_checkpoint()
|
|
185
184
|
|
|
186
185
|
def parser_book(self, data: dict[str, Any]) -> None:
|
|
187
186
|
""" Parse and maintain a local copy of the order book.
|
|
@@ -214,7 +213,9 @@ class DownloadBitmexData(ContinuousDownloader):
|
|
|
214
213
|
else:
|
|
215
214
|
self.logger.error('Unknown action {}: {}'.format(action, data))
|
|
216
215
|
|
|
217
|
-
self._data
|
|
216
|
+
self._data.setdefault(self.t, {'trades': [], 'book': {}})['book'] = {
|
|
217
|
+
v['price']: v['amount'] for v in self.d.values()
|
|
218
|
+
}
|
|
218
219
|
|
|
219
220
|
def parser_trades(self, data: dict[str, Any]) -> None:
|
|
220
221
|
""" Parse trade data and accumulate records for the current timestep.
|
|
@@ -226,15 +227,12 @@ class DownloadBitmexData(ContinuousDownloader):
|
|
|
226
227
|
key with a list of trade records.
|
|
227
228
|
|
|
228
229
|
"""
|
|
229
|
-
|
|
230
|
-
for d in data['data']:
|
|
231
|
-
|
|
232
|
-
i += 1
|
|
230
|
+
slot = self._data.setdefault(self.t, {'trades': [], 'book': {}})
|
|
231
|
+
for i, d in enumerate(data['data']):
|
|
232
|
+
slot['trades'].append(_parser_trades(d, i))
|
|
233
233
|
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
else:
|
|
237
|
-
self._data[self.t] = _data
|
|
234
|
+
def _restore_book_state(self, state: dict[int, Any]) -> None: # type: ignore[override]
|
|
235
|
+
self.d = {int(k): v for k, v in state.items()}
|
|
238
236
|
|
|
239
237
|
async def on_message(self, data: dict[str, Any] | list[Any]) -> None:
|
|
240
238
|
""" Route an incoming websocket message to the appropriate parser. """
|
|
@@ -270,16 +268,8 @@ class DownloadBitmexData(ContinuousDownloader):
|
|
|
270
268
|
|
|
271
269
|
"""
|
|
272
270
|
self.parser = self.get_parser(args[0])
|
|
273
|
-
|
|
274
271
|
self.logger.info('Try connect WS and set {} stream.'.format(args[0]))
|
|
275
|
-
|
|
276
|
-
self.loop = asyncio.get_event_loop()
|
|
277
|
-
self.loop.run_until_complete(asyncio.gather(
|
|
278
|
-
self._connect(args=':'.join(args)),
|
|
279
|
-
self._loop()
|
|
280
|
-
))
|
|
281
|
-
|
|
282
|
-
return self
|
|
272
|
+
return super().__call__(args=':'.join(args)) # type: ignore[return-value]
|
|
283
273
|
|
|
284
274
|
|
|
285
275
|
# =========================================================================== #
|