dccd 2.1.0__tar.gz → 2.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {dccd-2.1.0 → dccd-2.3.0}/CHANGELOG.md +65 -0
  2. {dccd-2.1.0 → dccd-2.3.0}/PKG-INFO +136 -10
  3. {dccd-2.1.0 → dccd-2.3.0}/README.rst +124 -9
  4. {dccd-2.1.0 → dccd-2.3.0}/dccd/continuous_dl/binance.py +12 -25
  5. {dccd-2.1.0 → dccd-2.3.0}/dccd/continuous_dl/bitfinex.py +11 -15
  6. {dccd-2.1.0 → dccd-2.3.0}/dccd/continuous_dl/bitmex.py +10 -20
  7. {dccd-2.1.0 → dccd-2.3.0}/dccd/continuous_dl/bybit.py +15 -30
  8. {dccd-2.1.0 → dccd-2.3.0}/dccd/continuous_dl/exchange.py +146 -30
  9. {dccd-2.1.0 → dccd-2.3.0}/dccd/continuous_dl/kraken.py +12 -25
  10. {dccd-2.1.0 → dccd-2.3.0}/dccd/continuous_dl/okx.py +12 -25
  11. dccd-2.3.0/dccd/daemon/__init__.py +42 -0
  12. dccd-2.3.0/dccd/daemon/backfill.py +619 -0
  13. dccd-2.3.0/dccd/daemon/cli.py +319 -0
  14. dccd-2.3.0/dccd/daemon/config.py +295 -0
  15. dccd-2.3.0/dccd/daemon/health.py +245 -0
  16. dccd-2.3.0/dccd/daemon/scheduler.py +158 -0
  17. dccd-2.3.0/dccd/daemon/storage.py +118 -0
  18. dccd-2.3.0/dccd/daemon/stream_manager.py +375 -0
  19. dccd-2.3.0/dccd/histo_dl/__init__.py +64 -0
  20. {dccd-2.1.0 → dccd-2.3.0}/dccd/histo_dl/binance.py +59 -12
  21. {dccd-2.1.0 → dccd-2.3.0}/dccd/histo_dl/bybit.py +64 -7
  22. {dccd-2.1.0 → dccd-2.3.0}/dccd/histo_dl/coinbase.py +70 -10
  23. dccd-2.3.0/dccd/histo_dl/exchange.py +542 -0
  24. {dccd-2.1.0 → dccd-2.3.0}/dccd/histo_dl/kraken.py +71 -17
  25. {dccd-2.1.0 → dccd-2.3.0}/dccd/histo_dl/okx.py +56 -6
  26. {dccd-2.1.0 → dccd-2.3.0}/dccd/models.py +16 -11
  27. dccd-2.3.0/dccd/storage.py +338 -0
  28. dccd-2.3.0/dccd/tests/conftest.py +222 -0
  29. dccd-2.3.0/dccd/tests/test_backfill.py +285 -0
  30. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_binance.py +33 -0
  31. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_binance_ws.py +72 -5
  32. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_bitfinex.py +3 -3
  33. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_bitmex.py +5 -5
  34. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_bybit.py +33 -0
  35. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_bybit_ws.py +6 -5
  36. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_coinbase.py +33 -0
  37. dccd-2.3.0/dccd/tests/test_daemon_cli.py +108 -0
  38. dccd-2.3.0/dccd/tests/test_daemon_config.py +168 -0
  39. dccd-2.3.0/dccd/tests/test_daemon_health.py +85 -0
  40. dccd-2.3.0/dccd/tests/test_daemon_scheduler.py +152 -0
  41. dccd-2.3.0/dccd/tests/test_daemon_storage.py +204 -0
  42. dccd-2.3.0/dccd/tests/test_daemon_stream_manager.py +368 -0
  43. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_date_time.py +6 -1
  44. dccd-2.3.0/dccd/tests/test_histo_dl.py +76 -0
  45. dccd-2.3.0/dccd/tests/test_kraken.py +80 -0
  46. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_kraken_ws.py +7 -6
  47. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_models.py +7 -1
  48. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_okx.py +33 -0
  49. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_okx_ws.py +6 -5
  50. dccd-2.3.0/dccd/tests/test_storage.py +345 -0
  51. {dccd-2.1.0 → dccd-2.3.0}/dccd/tools/date_time.py +77 -24
  52. {dccd-2.1.0 → dccd-2.3.0}/dccd.egg-info/PKG-INFO +136 -10
  53. {dccd-2.1.0 → dccd-2.3.0}/dccd.egg-info/SOURCES.txt +18 -0
  54. dccd-2.3.0/dccd.egg-info/entry_points.txt +2 -0
  55. {dccd-2.1.0 → dccd-2.3.0}/dccd.egg-info/requires.txt +12 -0
  56. {dccd-2.1.0 → dccd-2.3.0}/pyproject.toml +7 -3
  57. dccd-2.1.0/dccd/histo_dl/__init__.py +0 -72
  58. dccd-2.1.0/dccd/histo_dl/exchange.py +0 -404
  59. dccd-2.1.0/dccd/tests/conftest.py +0 -104
  60. dccd-2.1.0/dccd/tests/test_histo_dl.py +0 -60
  61. dccd-2.1.0/dccd/tests/test_kraken.py +0 -40
  62. {dccd-2.1.0 → dccd-2.3.0}/CONTRIBUTING.md +0 -0
  63. {dccd-2.1.0 → dccd-2.3.0}/LICENSE.txt +0 -0
  64. {dccd-2.1.0 → dccd-2.3.0}/MANIFEST.in +0 -0
  65. {dccd-2.1.0 → dccd-2.3.0}/dccd/__init__.py +0 -0
  66. {dccd-2.1.0 → dccd-2.3.0}/dccd/continuous_dl/__init__.py +0 -0
  67. {dccd-2.1.0 → dccd-2.3.0}/dccd/process_data.py +0 -0
  68. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/__init__.py +0 -0
  69. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_io.py +0 -0
  70. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_process_data.py +0 -0
  71. {dccd-2.1.0 → dccd-2.3.0}/dccd/tests/test_websocket.py +0 -0
  72. {dccd-2.1.0 → dccd-2.3.0}/dccd/tools/__init__.py +0 -0
  73. {dccd-2.1.0 → dccd-2.3.0}/dccd/tools/io.py +0 -0
  74. {dccd-2.1.0 → dccd-2.3.0}/dccd/tools/websocket.py +0 -0
  75. {dccd-2.1.0 → dccd-2.3.0}/dccd.egg-info/dependency_links.txt +0 -0
  76. {dccd-2.1.0 → dccd-2.3.0}/dccd.egg-info/top_level.txt +0 -0
  77. {dccd-2.1.0 → dccd-2.3.0}/setup.cfg +0 -0
@@ -6,6 +6,71 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [2.3.0] - 2026-05-22
10
+
11
+ ### Added
12
+
13
+ - `dccd/storage.py` — `DataStore.is_period_complete(year)`: checks whether an annual parquet file contains all expected candles; `DataStore.missing_intervals(start, end)`: gap-detection — complete past years are skipped, incomplete years resume from the last saved row (#41)
14
+ - `dccd/daemon/backfill.py` — `_BackfillBase.run()` now iterates over `DataStore.missing_intervals()` instead of a single sliding window from `last_saved`; complete years are never re-downloaded (#41)
15
+ - `dccd/storage.py` — new `DataStore` class: unified read/write interface for OHLC, trades, and orderbook; `save(df)` (merge-on-TS, annual OHLC / daily trades+orderbook), `load(start, end)`, `existing_periods()`, `last_timestamp()` (#39)
16
+ - `dccd/tools/date_time.py` — `span_label(span)` converts seconds to short directory labels (``'1m'``, ``'1h'``, ``'1d'``…); `_SPAN_LABEL` mapping exported (#39)
17
+ - `doc/source/storage.rst` — Sphinx page for `DataStore` with directory layout examples (#39)
18
+
19
+ ### Changed
20
+
21
+ - `dccd collect` (formerly `dccd run`) — renamed to clarify the distinction: `collect` = one incremental batch, `backfill` = full historical download with gap detection, `start` = continuous daemon (#41)
22
+ - New storage arborescence: ``{data_path}/{exchange}/ohlc/{pair}/{span}/YYYY.parquet``, ``…/trades/{pair}/YYYY-MM-DD.parquet``, ``…/orderbook/{pair}/YYYY-MM-DD.parquet`` — replaces the old ``{Exchange}/Data/Clean_Data/{per}/{pair}/`` layout (#39)
23
+ - `dccd/histo_dl/exchange.py` — `save()`, `_get_last_date()`, `save_trades()`, `save_orderbook()` now delegate to `DataStore`; removed `last_df`, `_set_by_period`, `_name_file`, `_excel_format`; removed unused `set_hierarchy()` (#39, #41)
24
+ - `dccd/histo_dl/{binance,bybit,coinbase,okx}.py` — removed `full_path` overrides (base class sets the correct path via `DataStore`) (#39)
25
+ - `dccd/daemon/backfill.py`, `scheduler.py` — removed `by_period` parameter; `save()` call simplified (#39)
26
+ - `dccd/daemon/stream_manager.py` — WebSocket save path now built from `DataStore.directory` (#39)
27
+ - `dccd/daemon/config.py` — `HistoJob.by_period` field removed; granularity is automatic (#39)
28
+
29
+ - `dccd/histo_dl/exchange.py` — `save()` now supports `form='parquet'`; previously only `'xlsx'` and `'csv'` were handled (#35)
30
+ - `config.yml` — ready-to-use daemon config for minutely OHLC + real-time orderbook/trades on Binance, Kraken, and Bybit (#35)
31
+ - `dccd/daemon/backfill.py` — `OHLCBackfill` and `KrakenBackfill` strategy classes with shared retry/progress/save loop; `make_job()` factory; `run_backfill()` orchestrator; tqdm progress bars and optional `--parallel` execution (#38)
32
+ - `dccd/daemon/cli.py` — `dccd backfill` command: reads all job definitions from config, supports `--exchange` / `--pairs` filters, `--start`, `--parallel`, and `--dry-run` flags (#38)
33
+ - `dccd/daemon/config.py` — `SettingsConfig` with `data_path` and `timezone` fields; `CollectorConfig.settings` propagates `data_path` to `StorageConfig.local_path` when not set explicitly (#38)
34
+
35
+ ### Removed
36
+
37
+ - `scripts/backfill.py` — replaced by `dccd backfill` CLI command and `dccd.daemon.backfill` module (#38)
38
+
39
+ ### Fixed
40
+
41
+ - `dccd/histo_dl/exchange.py` — `save(form='parquet')` was silently ignored (logged a warning instead of writing the file) (#35)
42
+ - `dccd/histo_dl/exchange.py` — `_sort_data()` crashed with a ValueError when the API returned fewer candles than the expected window size; index is now derived from actual data (#36)
43
+ - `dccd/histo_dl/exchange.py` — `by_period='M'` produced minute-level file names (strftime `%M`) instead of year-month; added `_PERIOD_FMT` mapping so `'M'` → `'%Y-%m'` (#36)
44
+ - `dccd/histo_dl/exchange.py` — `self.end` now reflects the last candle timestamp so window-loop callers advance correctly (was stuck at `now` for Kraken) (#36)
45
+ - `dccd/histo_dl/binance.py` — missing `limit=1000` parameter caused Binance to return only 500 candles per request (#36)
46
+ - `dccd/histo_dl/bybit.py` — `limit` was 200; raised to 1 000 to match the API maximum (#36)
47
+ - `dccd/histo_dl/exchange.py` — `_sort_data()` dropped the minute just before a window boundary when the last trade arrived ≥2 spans early; grid now uses `self.end` directly as the exclusive stop (#36)
48
+
49
+ ## [2.2.0] - 2026-05-17
50
+
51
+ ### Added
52
+
53
+ - `dccd/histo_dl/exchange.py` — `import_trades(start, end)` and `import_orderbook(depth)` public methods on `ImportDataCryptoCurrencies`; `_sort_trades` / `_sort_orderbook` helpers validate via Pydantic, sort and deduplicate; `trades_df` / `orderbook_df` attributes; `save_trades` / `save_orderbook` save helpers (#31)
54
+ - `dccd/histo_dl/{binance,kraken,bybit,okx,coinbase}.py` — `_import_trades(start, end)` and `_import_orderbook(depth)` implemented for all five exchanges; Binance and Kraken support full history via paginated endpoints; Bybit (≤ 1 000) and Coinbase (≤ 100) return recent-only snapshots (#31)
55
+ - `dccd/models.py` — `Trade.tid` made optional (`int | None`); `OrderBookEntry` gains required `side` field (`'bid'` or `'ask'`) and `count` made optional (`int | None`) (#31)
56
+ - `dccd/daemon/health.py` — `HealthMonitor`: rotating log handler (10 MB × 5 files), per-job metrics JSON, and optional Slack/Discord webhook alerts on consecutive failures; `JobMetrics` dataclass (#30)
57
+ - `dccd/daemon/cli.py` — `dccd` CLI (`validate`, `run`, `start`, `status`, `add` commands) via typer; `[project.scripts]` entrypoint; `typer>=0.12` added to the `daemon` extra (#30)
58
+ - `dccd/daemon/stream_manager.py` — `StreamManager` (one thread per `(exchange, pair)`, auto-restart on crash) and `SyncService` (periodic rclone push to all remotes, decoupled from collection) (#26)
59
+ - `dccd/daemon/config.py` — declarative YAML config with Pydantic v2: `CollectorConfig`, `HistoJob`, `StreamJob`, `StorageConfig`, `AlertConfig`, `RemoteConfig`, `load_config()` (#25)
60
+ - `dccd/daemon/storage.py` — `RemoteStorage.push()` via rclone; supports multiple remotes and root-path sync (#25, #26)
61
+ - `dccd/daemon/scheduler.py` — `build_histo_scheduler()` (APScheduler 3.x), `run_histo_job()`, `run_once()` (#25)
62
+ - `examples/config.example.yml` — annotated reference config for the daemon (#25)
63
+ - `examples/daemon_example.py` — programmatic daemon example in 6 steps (#30)
64
+ - `pyproject.toml` — `[daemon]` optional extra (`pyyaml`, `apscheduler`, `typer`) (#25, #30)
65
+
66
+ ### Changed
67
+
68
+ - `dccd/daemon/scheduler.py` — `run_histo_job`, `build_histo_scheduler`, `run_once` accept an optional `health: HealthMonitor` parameter (#30)
69
+ - `dccd/daemon/stream_manager.py` — `StreamManager.__init__` accepts optional `health: HealthMonitor`; `_run_forever` records success/failure on each iteration (#30)
70
+ - `dccd/daemon/config.py` — `StorageConfig.remote` replaced by `remotes: list[RemoteConfig]` and `sync_interval: int` (#26)
71
+ - `dccd/histo_dl/{binance,coinbase,bybit,okx,kraken}.py` — `format_pair(crypto, fiat)` extracted as a static method, independently testable (#29)
72
+ - `dccd/continuous_dl/exchange.py` — unified `__call__`, `_push_trades`, `_push_book_updates`, `_get_book_state`, `_restore_book_state` in base class; separate `set_trades_saver` / `set_book_saver`; crash-recovery checkpoint; `snapshot_ts` injected into every snapshot payload (#28, #29)
73
+
9
74
  ## [2.1.0] - 2026-05-15
10
75
 
11
76
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dccd
3
- Version: 2.1.0
3
+ Version: 2.3.0
4
4
  Summary: Download Crypto Currency Data from different exchanges.
5
5
  Author-email: Arthur Bernard <arthur.bernard.92@gmail.com>
6
6
  License: MIT
@@ -33,6 +33,11 @@ Requires-Dist: pydantic>=2.0
33
33
  Provides-Extra: io
34
34
  Requires-Dist: pyarrow>=13; extra == "io"
35
35
  Requires-Dist: polars>=0.20; extra == "io"
36
+ Provides-Extra: daemon
37
+ Requires-Dist: pyyaml>=6.0; extra == "daemon"
38
+ Requires-Dist: apscheduler<4,>=3.10; extra == "daemon"
39
+ Requires-Dist: typer>=0.12; extra == "daemon"
40
+ Requires-Dist: tqdm>=4.64; extra == "daemon"
36
41
  Provides-Extra: dev
37
42
  Requires-Dist: pytest>=7.4; extra == "dev"
38
43
  Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
@@ -41,12 +46,18 @@ Requires-Dist: ruff>=0.4; extra == "dev"
41
46
  Requires-Dist: interrogate>=1.5; extra == "dev"
42
47
  Requires-Dist: mypy>=1.0; extra == "dev"
43
48
  Requires-Dist: pandas-stubs>=2.0; extra == "dev"
49
+ Requires-Dist: pyyaml>=6.0; extra == "dev"
50
+ Requires-Dist: apscheduler<4,>=3.10; extra == "dev"
51
+ Requires-Dist: typer>=0.12; extra == "dev"
52
+ Requires-Dist: tqdm>=4.64; extra == "dev"
44
53
  Provides-Extra: doc
45
54
  Requires-Dist: sphinx>=7.0; extra == "doc"
46
55
  Requires-Dist: furo; extra == "doc"
47
56
  Requires-Dist: numpydoc; extra == "doc"
48
57
  Requires-Dist: sphinx-design; extra == "doc"
49
58
  Requires-Dist: sphinx-copybutton; extra == "doc"
59
+ Requires-Dist: pyyaml>=6.0; extra == "doc"
60
+ Requires-Dist: apscheduler<4,>=3.10; extra == "doc"
50
61
  Dynamic: license-file
51
62
 
52
63
  =============================
@@ -103,6 +114,10 @@ With optional Parquet / Polars support::
103
114
 
104
115
  $ pip install "dccd[io]"
105
116
 
117
+ With autonomous daemon support (APScheduler + PyYAML)::
118
+
119
+ $ pip install "dccd[daemon]"
120
+
106
121
  From source::
107
122
 
108
123
  $ git clone https://github.com/ArthurBernard/Download_Crypto_Currencies_Data
@@ -115,15 +130,15 @@ Supported exchanges
115
130
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
116
131
  | Exchange | REST OHLCV | REST Trades | REST Order Book | WS OHLCV | WS Trades | WS Order Book |
117
132
  +==================+============+=============+=================+==========+===========+================+
118
- | Binance | ✓ | | | | ✓ | ✓ |
133
+ | Binance | ✓ ||| | ✓ | ✓ |
119
134
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
120
- | Coinbase | ✓ | | | | | |
135
+ | Coinbase | ✓ | ✓† || | | |
121
136
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
122
- | Kraken | ✓ | | | ✓ | ✓ | ✓ |
137
+ | Kraken | ✓ ||| ✓ | ✓ | ✓ |
123
138
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
124
- | Bybit | ✓ | | | | ✓ | ✓ |
139
+ | Bybit | ✓ | ✓† || | ✓ | ✓ |
125
140
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
126
- | OKX | ✓ | | | ✓ | ✓ | ✓ |
141
+ | OKX | ✓ ||| ✓ | ✓ | ✓ |
127
142
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
128
143
  | Bitfinex | | | | ✓\* | ✓ | ✓ |
129
144
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
@@ -132,6 +147,8 @@ Supported exchanges
132
147
 
133
148
  \* Bitfinex WS OHLCV is aggregated from the trades stream via ``get_ohlc_bitfinex``.
134
149
 
150
+ † Recent trades only (Bybit ≤ 1 000, Coinbase ≤ 100) — no deep historical pagination via the public REST API.
151
+
135
152
  Presentation
136
153
  ============
137
154
 
@@ -144,6 +161,14 @@ Presentation
144
161
  Stream real-time data (order book, trades) via WebSocket with automatic
145
162
  reconnection and configurable processing/saving callbacks.
146
163
 
164
+ **Daemon** ``dccd.daemon``
165
+ Autonomous, server-side collector driven by a YAML config. Runs REST
166
+ jobs on a schedule (APScheduler), opens WebSocket streams for real-time
167
+ collection, and periodically syncs all local data to one or more remote
168
+ destinations (NAS, S3, SFTP, …) via rclone. Multiple remotes and a
169
+ configurable sync interval are supported; collection is never blocked by
170
+ remote availability.
171
+
147
172
  Output formats
148
173
  --------------
149
174
 
@@ -155,7 +180,9 @@ Parquet files can be read back as either a ``pandas.DataFrame`` or a
155
180
  Quick start
156
181
  ===========
157
182
 
158
- Historical data (pandas)::
183
+ Historical data (pandas):
184
+
185
+ .. code-block:: python
159
186
 
160
187
  from dccd.histo_dl import FromBinance
161
188
 
@@ -164,15 +191,21 @@ Historical data (pandas)::
164
191
  obj.save(form='parquet')
165
192
  df = obj.get_data() # pandas DataFrame
166
193
 
167
- Polars output::
194
+ Polars output:
195
+
196
+ .. code-block:: python
168
197
 
169
198
  df_pl = obj.get_data(format='polars')
170
199
 
171
- Incremental update (resume from last saved point)::
200
+ Incremental update (resume from last saved point):
201
+
202
+ .. code-block:: python
172
203
 
173
204
  obj.import_data(start='last', end='now').save(form='parquet')
174
205
 
175
- Other exchanges::
206
+ Other exchanges:
207
+
208
+ .. code-block:: python
176
209
 
177
210
  from dccd.histo_dl import FromKraken, FromBybit, FromOKX
178
211
 
@@ -180,6 +213,99 @@ Other exchanges::
180
213
  FromBybit('/path/', 'BTC', 86400).import_data(start='2024-01-01', end='now').save()
181
214
  FromOKX('/path/', 'BTC', 3600).import_data(start='2024-01-01', end='now').save()
182
215
 
216
+ Trades (historical or recent):
217
+
218
+ .. code-block:: python
219
+
220
+ from dccd.histo_dl import FromBinance, FromKraken
221
+
222
+ obj = FromBinance('/path/', 'BTC', 3600, fiat='USDT')
223
+ obj.import_trades(start='2024-01-01 00:00:00', end='2024-01-02 00:00:00')
224
+ obj.save_trades(form='csv')
225
+ df = obj.trades_df # pandas DataFrame — columns: timestamp, price, amount, type, tid
226
+
227
+ # Kraken also supports full history; Bybit/Coinbase return recent-only snapshots
228
+ FromKraken('/path/', 'BTC', 3600).import_trades(start='2024-01-01', end='2024-01-02').save_trades()
229
+
230
+ Order book snapshot:
231
+
232
+ .. code-block:: python
233
+
234
+ from dccd.histo_dl import FromOKX
235
+
236
+ obj = FromOKX('/path/', 'BTC', 3600)
237
+ obj.import_orderbook(depth=50)
238
+ obj.save_orderbook(form='csv')
239
+ df = obj.orderbook_df # columns: side, price, amount, count
240
+
241
+ Daemon (autonomous collector) — ``config.yml``:
242
+
243
+ .. code-block:: yaml
244
+
245
+ settings:
246
+ data_path: /data/crypto/
247
+ timezone: UTC
248
+
249
+ storage:
250
+ remotes:
251
+ - provider: rclone
252
+ remote: "mynas:crypto/"
253
+ sync_interval: 3600
254
+
255
+ histo_jobs:
256
+ - exchange: binance
257
+ pairs: [BTC/USDT, ETH/USDT]
258
+ span: 3600
259
+ format: parquet
260
+
261
+ stream_jobs:
262
+ - exchange: binance
263
+ pairs: [BTC/USDT]
264
+ channels: [trades, book]
265
+ time_step: 60
266
+
267
+ CLI quick start:
268
+
269
+ .. code-block:: bash
270
+
271
+ # Validate the config
272
+ dccd validate --config config.yml
273
+
274
+ # Backfill all OHLC history defined in config (resumable)
275
+ dccd backfill --config config.yml --start "2020-01-01 00:00:00"
276
+
277
+ # Dry run — estimate windows and time without downloading
278
+ dccd backfill --config config.yml --dry-run
279
+
280
+ # Backfill only one exchange
281
+ dccd backfill --config config.yml --exchange kraken
282
+
283
+ # One incremental batch per job, then exit (for cron)
284
+ dccd collect --config config.yml
285
+
286
+ # Continuous daemon (Ctrl-C to stop)
287
+ dccd start --config config.yml
288
+
289
+ Python API:
290
+
291
+ .. code-block:: python
292
+
293
+ from dccd.daemon.config import load_config
294
+ from dccd.daemon.scheduler import run_once, build_histo_scheduler
295
+ from dccd.daemon.stream_manager import StreamManager
296
+
297
+ config = load_config('config.yml')
298
+
299
+ # One-shot: download all histo jobs once, then exit
300
+ run_once(config)
301
+
302
+ # Daemon mode: periodic REST + live WebSocket streams
303
+ scheduler = build_histo_scheduler(config)
304
+ scheduler.start()
305
+
306
+ mgr = StreamManager(config)
307
+ mgr.start() # runs until mgr.stop() is called
308
+
183
309
  Links
184
310
  =====
185
311
 
@@ -52,6 +52,10 @@ With optional Parquet / Polars support::
52
52
 
53
53
  $ pip install "dccd[io]"
54
54
 
55
+ With autonomous daemon support (APScheduler + PyYAML)::
56
+
57
+ $ pip install "dccd[daemon]"
58
+
55
59
  From source::
56
60
 
57
61
  $ git clone https://github.com/ArthurBernard/Download_Crypto_Currencies_Data
@@ -64,15 +68,15 @@ Supported exchanges
64
68
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
65
69
  | Exchange | REST OHLCV | REST Trades | REST Order Book | WS OHLCV | WS Trades | WS Order Book |
66
70
  +==================+============+=============+=================+==========+===========+================+
67
- | Binance | ✓ | | | | ✓ | ✓ |
71
+ | Binance | ✓ ||| | ✓ | ✓ |
68
72
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
69
- | Coinbase | ✓ | | | | | |
73
+ | Coinbase | ✓ | ✓† || | | |
70
74
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
71
- | Kraken | ✓ | | | ✓ | ✓ | ✓ |
75
+ | Kraken | ✓ ||| ✓ | ✓ | ✓ |
72
76
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
73
- | Bybit | ✓ | | | | ✓ | ✓ |
77
+ | Bybit | ✓ | ✓† || | ✓ | ✓ |
74
78
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
75
- | OKX | ✓ | | | ✓ | ✓ | ✓ |
79
+ | OKX | ✓ ||| ✓ | ✓ | ✓ |
76
80
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
77
81
  | Bitfinex | | | | ✓\* | ✓ | ✓ |
78
82
  +------------------+------------+-------------+-----------------+----------+-----------+----------------+
@@ -81,6 +85,8 @@ Supported exchanges
81
85
 
82
86
  \* Bitfinex WS OHLCV is aggregated from the trades stream via ``get_ohlc_bitfinex``.
83
87
 
88
+ † Recent trades only (Bybit ≤ 1 000, Coinbase ≤ 100) — no deep historical pagination via the public REST API.
89
+
84
90
  Presentation
85
91
  ============
86
92
 
@@ -93,6 +99,14 @@ Presentation
93
99
  Stream real-time data (order book, trades) via WebSocket with automatic
94
100
  reconnection and configurable processing/saving callbacks.
95
101
 
102
+ **Daemon** ``dccd.daemon``
103
+ Autonomous, server-side collector driven by a YAML config. Runs REST
104
+ jobs on a schedule (APScheduler), opens WebSocket streams for real-time
105
+ collection, and periodically syncs all local data to one or more remote
106
+ destinations (NAS, S3, SFTP, …) via rclone. Multiple remotes and a
107
+ configurable sync interval are supported; collection is never blocked by
108
+ remote availability.
109
+
96
110
  Output formats
97
111
  --------------
98
112
 
@@ -104,7 +118,9 @@ Parquet files can be read back as either a ``pandas.DataFrame`` or a
104
118
  Quick start
105
119
  ===========
106
120
 
107
- Historical data (pandas)::
121
+ Historical data (pandas):
122
+
123
+ .. code-block:: python
108
124
 
109
125
  from dccd.histo_dl import FromBinance
110
126
 
@@ -113,15 +129,21 @@ Historical data (pandas)::
113
129
  obj.save(form='parquet')
114
130
  df = obj.get_data() # pandas DataFrame
115
131
 
116
- Polars output::
132
+ Polars output:
133
+
134
+ .. code-block:: python
117
135
 
118
136
  df_pl = obj.get_data(format='polars')
119
137
 
120
- Incremental update (resume from last saved point)::
138
+ Incremental update (resume from last saved point):
139
+
140
+ .. code-block:: python
121
141
 
122
142
  obj.import_data(start='last', end='now').save(form='parquet')
123
143
 
124
- Other exchanges::
144
+ Other exchanges:
145
+
146
+ .. code-block:: python
125
147
 
126
148
  from dccd.histo_dl import FromKraken, FromBybit, FromOKX
127
149
 
@@ -129,6 +151,99 @@ Other exchanges::
129
151
  FromBybit('/path/', 'BTC', 86400).import_data(start='2024-01-01', end='now').save()
130
152
  FromOKX('/path/', 'BTC', 3600).import_data(start='2024-01-01', end='now').save()
131
153
 
154
+ Trades (historical or recent):
155
+
156
+ .. code-block:: python
157
+
158
+ from dccd.histo_dl import FromBinance, FromKraken
159
+
160
+ obj = FromBinance('/path/', 'BTC', 3600, fiat='USDT')
161
+ obj.import_trades(start='2024-01-01 00:00:00', end='2024-01-02 00:00:00')
162
+ obj.save_trades(form='csv')
163
+ df = obj.trades_df # pandas DataFrame — columns: timestamp, price, amount, type, tid
164
+
165
+ # Kraken also supports full history; Bybit/Coinbase return recent-only snapshots
166
+ FromKraken('/path/', 'BTC', 3600).import_trades(start='2024-01-01', end='2024-01-02').save_trades()
167
+
168
+ Order book snapshot:
169
+
170
+ .. code-block:: python
171
+
172
+ from dccd.histo_dl import FromOKX
173
+
174
+ obj = FromOKX('/path/', 'BTC', 3600)
175
+ obj.import_orderbook(depth=50)
176
+ obj.save_orderbook(form='csv')
177
+ df = obj.orderbook_df # columns: side, price, amount, count
178
+
179
+ Daemon (autonomous collector) — ``config.yml``:
180
+
181
+ .. code-block:: yaml
182
+
183
+ settings:
184
+ data_path: /data/crypto/
185
+ timezone: UTC
186
+
187
+ storage:
188
+ remotes:
189
+ - provider: rclone
190
+ remote: "mynas:crypto/"
191
+ sync_interval: 3600
192
+
193
+ histo_jobs:
194
+ - exchange: binance
195
+ pairs: [BTC/USDT, ETH/USDT]
196
+ span: 3600
197
+ format: parquet
198
+
199
+ stream_jobs:
200
+ - exchange: binance
201
+ pairs: [BTC/USDT]
202
+ channels: [trades, book]
203
+ time_step: 60
204
+
205
+ CLI quick start:
206
+
207
+ .. code-block:: bash
208
+
209
+ # Validate the config
210
+ dccd validate --config config.yml
211
+
212
+ # Backfill all OHLC history defined in config (resumable)
213
+ dccd backfill --config config.yml --start "2020-01-01 00:00:00"
214
+
215
+ # Dry run — estimate windows and time without downloading
216
+ dccd backfill --config config.yml --dry-run
217
+
218
+ # Backfill only one exchange
219
+ dccd backfill --config config.yml --exchange kraken
220
+
221
+ # One incremental batch per job, then exit (for cron)
222
+ dccd collect --config config.yml
223
+
224
+ # Continuous daemon (Ctrl-C to stop)
225
+ dccd start --config config.yml
226
+
227
+ Python API:
228
+
229
+ .. code-block:: python
230
+
231
+ from dccd.daemon.config import load_config
232
+ from dccd.daemon.scheduler import run_once, build_histo_scheduler
233
+ from dccd.daemon.stream_manager import StreamManager
234
+
235
+ config = load_config('config.yml')
236
+
237
+ # One-shot: download all histo jobs once, then exit
238
+ run_once(config)
239
+
240
+ # Daemon mode: periodic REST + live WebSocket streams
241
+ scheduler = build_histo_scheduler(config)
242
+ scheduler.start()
243
+
244
+ mgr = StreamManager(config)
245
+ mgr.start() # runs until mgr.stop() is called
246
+
132
247
  Links
133
248
  =====
134
249
 
@@ -29,7 +29,6 @@ import time
29
29
  # Third party packages
30
30
  # Local packages
31
31
  from dccd.continuous_dl.exchange import ContinuousDownloader
32
- from dccd.process_data import set_marketdepth, set_orders, set_trades
33
32
  from dccd.tools.io import IODataBase
34
33
 
35
34
  __all__ = [
@@ -119,7 +118,7 @@ class DownloadBinanceData(ContinuousDownloader):
119
118
  """
120
119
 
121
120
  def __init__(self, pair: str = 'BTCUSDT', time_step: int = 60,
122
- until: int | None = 3600) -> None:
121
+ until: int | None = 3600, checkpoint_dir: str | None = None) -> None:
123
122
  """ Initialize object. """
124
123
  if until is None:
125
124
  until = 0
@@ -128,13 +127,14 @@ class DownloadBinanceData(ContinuousDownloader):
128
127
 
129
128
  self.pair = pair
130
129
  url = _BINANCE_WS_URL.format(sym=pair.lower())
131
- ContinuousDownloader.__init__(self, url, time_step=time_step, STOP=until)
130
+ ContinuousDownloader.__init__(self, url, time_step=time_step, STOP=until,
131
+ checkpoint_dir=checkpoint_dir)
132
132
  self._parser_data = {
133
133
  'trades': self.parser_trades,
134
134
  'book': self.parser_book,
135
135
  }
136
136
  self.logger = logging.getLogger(__name__)
137
- self.d: dict[str, float] = {}
137
+ self._load_checkpoint()
138
138
 
139
139
  async def _subscribe(self, **kwargs: object) -> None:
140
140
  """ Wait for connection; Binance streams are declared in the URL. """
@@ -165,8 +165,7 @@ class DownloadBinanceData(ContinuousDownloader):
165
165
  The ``data`` field from the combined-stream trade envelope.
166
166
 
167
167
  """
168
- for trade in _parser_trades(data):
169
- self._raw_parser(trade)
168
+ self._push_trades(_parser_trades(data))
170
169
 
171
170
  def parser_book(self, data: dict) -> None:
172
171
  """ Parse and update the order book from a depth message.
@@ -177,18 +176,7 @@ class DownloadBinanceData(ContinuousDownloader):
177
176
  The ``data`` field from the combined-stream depth envelope.
178
177
 
179
178
  """
180
- updates = _parser_book(data)
181
- for price, qty in updates.items():
182
- if qty == 0:
183
- self.d.pop(price, None)
184
- else:
185
- self.d[price] = qty
186
- self._data[self.t] = dict(self.d)
187
-
188
- def _raw_parser(self, data: object) -> None:
189
- if self.t not in self._data:
190
- self._data[self.t] = []
191
- self._data[self.t].append(data) # type: ignore[union-attr]
179
+ self._push_book_updates(_parser_book(data))
192
180
 
193
181
 
194
182
  def get_trades_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
@@ -210,8 +198,7 @@ def get_trades_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
210
198
 
211
199
  """
212
200
  downloader = DownloadBinanceData(pair=pair, time_step=time_step, until=until)
213
- downloader.set_process_data(set_trades)
214
- downloader.set_saver(IODataBase(path, method=form))
201
+ downloader.set_trades_saver(IODataBase(path, method=form))
215
202
  downloader(pair=pair)
216
203
 
217
204
 
@@ -234,8 +221,7 @@ def get_orderbook_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
234
221
 
235
222
  """
236
223
  downloader = DownloadBinanceData(pair=pair, time_step=time_step, until=until)
237
- downloader.set_process_data(set_marketdepth)
238
- downloader.set_saver(IODataBase(path, method=form))
224
+ downloader.set_book_saver(IODataBase(path, method=form))
239
225
  downloader(pair=pair)
240
226
 
241
227
 
@@ -246,7 +232,8 @@ def get_data_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
246
232
  Parameters
247
233
  ----------
248
234
  path : str
249
- Path to save data.
235
+ Root path; trades saved under ``<path>/trades/``, book under
236
+ ``<path>/book/``.
250
237
  pair : str, optional
251
238
  Trading pair in Binance format (e.g. 'BTCUSDT'), default is 'BTCUSDT'.
252
239
  time_step : int, optional
@@ -258,6 +245,6 @@ def get_data_binance(path: str, pair: str = 'BTCUSDT', time_step: int = 60,
258
245
 
259
246
  """
260
247
  downloader = DownloadBinanceData(pair=pair, time_step=time_step, until=until)
261
- downloader.set_process_data(set_orders)
262
- downloader.set_saver(IODataBase(path, method=form))
248
+ downloader.set_trades_saver(IODataBase(f'{path}/trades', method=form))
249
+ downloader.set_book_saver(IODataBase(f'{path}/book', method=form))
263
250
  downloader(pair=pair)
@@ -31,7 +31,6 @@ Low level API
31
31
  """
32
32
 
33
33
  # Built-in packages
34
- import asyncio
35
34
  import logging
36
35
  import time
37
36
  from typing import Any
@@ -117,7 +116,8 @@ class DownloadBitfinexData(ContinuousDownloader):
117
116
 
118
117
  """
119
118
 
120
- def __init__(self, time_step: int = 60, until: int | None = 3600) -> None:
119
+ def __init__(self, time_step: int = 60, until: int | None = 3600,
120
+ checkpoint_dir: str | None = None) -> None:
121
121
  """ Initialize object.
122
122
 
123
123
  Parameters
@@ -127,6 +127,9 @@ class DownloadBitfinexData(ContinuousDownloader):
127
127
  until : int or None, optional
128
128
  Seconds to run, or a future Unix timestamp to stop at.
129
129
  Default is ``3600``.
130
+ checkpoint_dir : str or None, optional
131
+ Directory to write the order-book crash-recovery checkpoint.
132
+ Disabled when ``None`` (default).
130
133
 
131
134
  """
132
135
  if until is None:
@@ -135,7 +138,7 @@ class DownloadBitfinexData(ContinuousDownloader):
135
138
  until -= int(time.time())
136
139
 
137
140
  ContinuousDownloader.__init__(self, 'bitfinex', time_step=time_step,
138
- STOP=until)
141
+ STOP=until, checkpoint_dir=checkpoint_dir)
139
142
 
140
143
  self._parser_data: dict[str, Any] = {
141
144
  'book': self.parser_book,
@@ -144,7 +147,7 @@ class DownloadBitfinexData(ContinuousDownloader):
144
147
  'trades_raw': self.parser_raw_trades,
145
148
  }
146
149
  self.logger = logging.getLogger(__name__)
147
- self.d: dict[str, Any] = {}
150
+ self._load_checkpoint()
148
151
 
149
152
  def parser_raw_book(self, data: list[Any]) -> None:
150
153
  """ Parse raw order book, each timestep set in a list all orders.
@@ -177,7 +180,9 @@ class DownloadBitfinexData(ContinuousDownloader):
177
180
  else:
178
181
  self.d.pop(parsed['price'])
179
182
 
180
- self._data[self.t] = {v['price']: v['amount'] for v in self.d.values()} # type: ignore[assignment]
183
+ self._data.setdefault(self.t, {'trades': [], 'book': {}})['book'] = {
184
+ v['price']: v['amount'] for v in self.d.values()
185
+ }
181
186
 
182
187
  def parser_raw_trades(self, data: list[Any]) -> None:
183
188
  """ Parse raw trade data tick-by-tick.
@@ -248,18 +253,9 @@ class DownloadBitfinexData(ContinuousDownloader):
248
253
 
249
254
  """
250
255
  self.parser = self.get_parser(channel)
251
-
252
256
  channel = channel[:-4] if channel[-4:] == '_raw' else channel
253
-
254
257
  self.logger.info('Try connect WS and set {} stream.'.format(channel))
255
-
256
- self.loop = asyncio.get_event_loop()
257
- self.loop.run_until_complete(asyncio.gather(
258
- self._connect(channel=channel, **kwargs),
259
- self._loop()
260
- ))
261
-
262
- return self
258
+ return super().__call__(channel=channel, **kwargs) # type: ignore[return-value]
263
259
 
264
260
 
265
261
  # =========================================================================== #