dccd 2.2.0__tar.gz → 2.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dccd-2.2.0 → dccd-2.3.1}/CHANGELOG.md +52 -0
- {dccd-2.2.0 → dccd-2.3.1}/PKG-INFO +31 -3
- {dccd-2.2.0 → dccd-2.3.1}/README.rst +28 -2
- {dccd-2.2.0 → dccd-2.3.1}/dccd/daemon/__init__.py +6 -0
- dccd-2.3.1/dccd/daemon/backfill.py +619 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/daemon/cli.py +71 -12
- {dccd-2.2.0 → dccd-2.3.1}/dccd/daemon/config.py +56 -19
- {dccd-2.2.0 → dccd-2.3.1}/dccd/daemon/scheduler.py +12 -7
- {dccd-2.2.0 → dccd-2.3.1}/dccd/daemon/stream_manager.py +16 -5
- dccd-2.3.1/dccd/histo_dl/__init__.py +64 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/histo_dl/binance.py +5 -7
- {dccd-2.2.0 → dccd-2.3.1}/dccd/histo_dl/bybit.py +6 -5
- {dccd-2.2.0 → dccd-2.3.1}/dccd/histo_dl/coinbase.py +10 -10
- {dccd-2.2.0 → dccd-2.3.1}/dccd/histo_dl/exchange.py +100 -211
- {dccd-2.2.0 → dccd-2.3.1}/dccd/histo_dl/kraken.py +4 -5
- {dccd-2.2.0 → dccd-2.3.1}/dccd/histo_dl/okx.py +10 -6
- dccd-2.3.1/dccd/storage.py +343 -0
- dccd-2.3.1/dccd/tests/test_backfill.py +285 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_coinbase.py +1 -1
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_daemon_cli.py +2 -2
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_daemon_config.py +0 -7
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_daemon_scheduler.py +3 -3
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_date_time.py +6 -1
- dccd-2.3.1/dccd/tests/test_histo_dl.py +76 -0
- dccd-2.3.1/dccd/tests/test_storage.py +391 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tools/date_time.py +77 -24
- {dccd-2.2.0 → dccd-2.3.1}/dccd.egg-info/PKG-INFO +31 -3
- {dccd-2.2.0 → dccd-2.3.1}/dccd.egg-info/SOURCES.txt +4 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd.egg-info/requires.txt +2 -0
- {dccd-2.2.0 → dccd-2.3.1}/pyproject.toml +3 -3
- dccd-2.2.0/dccd/histo_dl/__init__.py +0 -72
- dccd-2.2.0/dccd/tests/test_histo_dl.py +0 -60
- {dccd-2.2.0 → dccd-2.3.1}/CONTRIBUTING.md +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/LICENSE.txt +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/MANIFEST.in +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/__init__.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/continuous_dl/__init__.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/continuous_dl/binance.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/continuous_dl/bitfinex.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/continuous_dl/bitmex.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/continuous_dl/bybit.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/continuous_dl/exchange.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/continuous_dl/kraken.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/continuous_dl/okx.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/daemon/health.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/daemon/storage.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/models.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/process_data.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/__init__.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/conftest.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_binance.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_binance_ws.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_bitfinex.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_bitmex.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_bybit.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_bybit_ws.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_daemon_health.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_daemon_storage.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_daemon_stream_manager.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_io.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_kraken.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_kraken_ws.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_models.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_okx.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_okx_ws.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_process_data.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tests/test_websocket.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tools/__init__.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tools/io.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd/tools/websocket.py +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd.egg-info/dependency_links.txt +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd.egg-info/entry_points.txt +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/dccd.egg-info/top_level.txt +0 -0
- {dccd-2.2.0 → dccd-2.3.1}/setup.cfg +0 -0
|
@@ -6,6 +6,58 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
## [2.3.1] - 2026-05-24
|
|
10
|
+
|
|
11
|
+
### Fixed
|
|
12
|
+
|
|
13
|
+
- `dccd/storage.py` — `DataStore.missing_intervals` now detects the gap **before** the first saved row when the requested `start` predates `file_min`; previously only the trailing gap (after `file_max`) was returned, causing `dccd backfill --start <early-date>` to silently skip all historical data before the first existing candle (#46)
|
|
14
|
+
- `dccd/histo_dl/coinbase.py` — raise `RuntimeError` when Coinbase returns HTTP 200 with a JSON dict (e.g. `{"message": "..."}` for near-future windows) instead of silently iterating dict keys and crashing with `ValueError` (#45)
|
|
15
|
+
- `dccd/histo_dl/coinbase.py` — additional guard: raise `RuntimeError` when Coinbase returns a JSON list whose first element is not itself a list/tuple (e.g. `["message"]`); previously caused `float("m")` `ValueError` (#45)
|
|
16
|
+
- `dccd/histo_dl/exchange.py` — `_sort_data` no longer raises `KeyError: 'TS'` when the API returns empty data; returns early with an empty `self.df` so the backfill skips the window cleanly (#45)
|
|
17
|
+
- `dccd/histo_dl/exchange.py` — `_sort_data` strips any candle at or beyond `self.end` before merging; exchanges with inclusive endpoint semantics (Coinbase) no longer cause `_advance` to overshoot by one span per window, preventing drift that accumulated into near-future requests (#45)
|
|
18
|
+
- `dccd/histo_dl/okx.py` — raise `RuntimeError` when OKX response code is not `"0"`, letting the backfill retry/skip logic handle API-level errors (#45)
|
|
19
|
+
- `dccd/histo_dl/okx.py` — switch `_import_data` from `/market/candles` to `/market/history-candles`; the former only serves the last ~24 h of 1-minute bars and silently returns empty data for older windows (#45)
|
|
20
|
+
|
|
21
|
+
## [2.3.0] - 2026-05-22
|
|
22
|
+
|
|
23
|
+
### Added
|
|
24
|
+
|
|
25
|
+
- `dccd/storage.py` — `DataStore.is_period_complete(year)`: checks whether an annual parquet file contains all expected candles; `DataStore.missing_intervals(start, end)`: gap-detection — complete past years are skipped, incomplete years resume from the last saved row (#41)
|
|
26
|
+
- `dccd/daemon/backfill.py` — `_BackfillBase.run()` now iterates over `DataStore.missing_intervals()` instead of a single sliding window from `last_saved`; complete years are never re-downloaded (#41)
|
|
27
|
+
- `dccd/storage.py` — new `DataStore` class: unified read/write interface for OHLC, trades, and orderbook; `save(df)` (merge-on-TS, annual OHLC / daily trades+orderbook), `load(start, end)`, `existing_periods()`, `last_timestamp()` (#39)
|
|
28
|
+
- `dccd/tools/date_time.py` — `span_label(span)` converts seconds to short directory labels (``'1m'``, ``'1h'``, ``'1d'``…); `_SPAN_LABEL` mapping exported (#39)
|
|
29
|
+
- `doc/source/storage.rst` — Sphinx page for `DataStore` with directory layout examples (#39)
|
|
30
|
+
|
|
31
|
+
### Changed
|
|
32
|
+
|
|
33
|
+
- `dccd collect` (formerly `dccd run`) — renamed to clarify the distinction: `collect` = one incremental batch, `backfill` = full historical download with gap detection, `start` = continuous daemon (#41)
|
|
34
|
+
- New storage arborescence: ``{data_path}/{exchange}/ohlc/{pair}/{span}/YYYY.parquet``, ``…/trades/{pair}/YYYY-MM-DD.parquet``, ``…/orderbook/{pair}/YYYY-MM-DD.parquet`` — replaces the old ``{Exchange}/Data/Clean_Data/{per}/{pair}/`` layout (#39)
|
|
35
|
+
- `dccd/histo_dl/exchange.py` — `save()`, `_get_last_date()`, `save_trades()`, `save_orderbook()` now delegate to `DataStore`; removed `last_df`, `_set_by_period`, `_name_file`, `_excel_format`; removed unused `set_hierarchy()` (#39, #41)
|
|
36
|
+
- `dccd/histo_dl/{binance,bybit,coinbase,okx}.py` — removed `full_path` overrides (base class sets the correct path via `DataStore`) (#39)
|
|
37
|
+
- `dccd/daemon/backfill.py`, `scheduler.py` — removed `by_period` parameter; `save()` call simplified (#39)
|
|
38
|
+
- `dccd/daemon/stream_manager.py` — WebSocket save path now built from `DataStore.directory` (#39)
|
|
39
|
+
- `dccd/daemon/config.py` — `HistoJob.by_period` field removed; granularity is automatic (#39)
|
|
40
|
+
|
|
41
|
+
- `dccd/histo_dl/exchange.py` — `save()` now supports `form='parquet'`; previously only `'xlsx'` and `'csv'` were handled (#35)
|
|
42
|
+
- `config.yml` — ready-to-use daemon config for minutely OHLC + real-time orderbook/trades on Binance, Kraken, and Bybit (#35)
|
|
43
|
+
- `dccd/daemon/backfill.py` — `OHLCBackfill` and `KrakenBackfill` strategy classes with shared retry/progress/save loop; `make_job()` factory; `run_backfill()` orchestrator; tqdm progress bars and optional `--parallel` execution (#38)
|
|
44
|
+
- `dccd/daemon/cli.py` — `dccd backfill` command: reads all job definitions from config, supports `--exchange` / `--pairs` filters, `--start`, `--parallel`, and `--dry-run` flags (#38)
|
|
45
|
+
- `dccd/daemon/config.py` — `SettingsConfig` with `data_path` and `timezone` fields; `CollectorConfig.settings` propagates `data_path` to `StorageConfig.local_path` when not set explicitly (#38)
|
|
46
|
+
|
|
47
|
+
### Removed
|
|
48
|
+
|
|
49
|
+
- `scripts/backfill.py` — replaced by `dccd backfill` CLI command and `dccd.daemon.backfill` module (#38)
|
|
50
|
+
|
|
51
|
+
### Fixed
|
|
52
|
+
|
|
53
|
+
- `dccd/histo_dl/exchange.py` — `save(form='parquet')` was silently ignored (logged a warning instead of writing the file) (#35)
|
|
54
|
+
- `dccd/histo_dl/exchange.py` — `_sort_data()` crashed with a ValueError when the API returned fewer candles than the expected window size; index is now derived from actual data (#36)
|
|
55
|
+
- `dccd/histo_dl/exchange.py` — `by_period='M'` produced minute-level file names (strftime `%M`) instead of year-month; added `_PERIOD_FMT` mapping so `'M'` → `'%Y-%m'` (#36)
|
|
56
|
+
- `dccd/histo_dl/exchange.py` — `self.end` now reflects the last candle timestamp so window-loop callers advance correctly (was stuck at `now` for Kraken) (#36)
|
|
57
|
+
- `dccd/histo_dl/binance.py` — missing `limit=1000` parameter caused Binance to return only 500 candles per request (#36)
|
|
58
|
+
- `dccd/histo_dl/bybit.py` — `limit` was 200; raised to 1 000 to match the API maximum (#36)
|
|
59
|
+
- `dccd/histo_dl/exchange.py` — `_sort_data()` dropped the minute just before a window boundary when the last trade arrived ≥2 spans early; grid now uses `self.end` directly as the exclusive stop (#36)
|
|
60
|
+
|
|
9
61
|
## [2.2.0] - 2026-05-17
|
|
10
62
|
|
|
11
63
|
### Added
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dccd
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.1
|
|
4
4
|
Summary: Download Crypto Currency Data from different exchanges.
|
|
5
5
|
Author-email: Arthur Bernard <arthur.bernard.92@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -37,6 +37,7 @@ Provides-Extra: daemon
|
|
|
37
37
|
Requires-Dist: pyyaml>=6.0; extra == "daemon"
|
|
38
38
|
Requires-Dist: apscheduler<4,>=3.10; extra == "daemon"
|
|
39
39
|
Requires-Dist: typer>=0.12; extra == "daemon"
|
|
40
|
+
Requires-Dist: tqdm>=4.64; extra == "daemon"
|
|
40
41
|
Provides-Extra: dev
|
|
41
42
|
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
42
43
|
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
@@ -48,6 +49,7 @@ Requires-Dist: pandas-stubs>=2.0; extra == "dev"
|
|
|
48
49
|
Requires-Dist: pyyaml>=6.0; extra == "dev"
|
|
49
50
|
Requires-Dist: apscheduler<4,>=3.10; extra == "dev"
|
|
50
51
|
Requires-Dist: typer>=0.12; extra == "dev"
|
|
52
|
+
Requires-Dist: tqdm>=4.64; extra == "dev"
|
|
51
53
|
Provides-Extra: doc
|
|
52
54
|
Requires-Dist: sphinx>=7.0; extra == "doc"
|
|
53
55
|
Requires-Dist: furo; extra == "doc"
|
|
@@ -240,8 +242,11 @@ Daemon (autonomous collector) — ``config.yml``:
|
|
|
240
242
|
|
|
241
243
|
.. code-block:: yaml
|
|
242
244
|
|
|
245
|
+
settings:
|
|
246
|
+
data_path: /data/crypto/
|
|
247
|
+
timezone: UTC
|
|
248
|
+
|
|
243
249
|
storage:
|
|
244
|
-
local_path: /data/crypto/
|
|
245
250
|
remotes:
|
|
246
251
|
- provider: rclone
|
|
247
252
|
remote: "mynas:crypto/"
|
|
@@ -252,7 +257,6 @@ Daemon (autonomous collector) — ``config.yml``:
|
|
|
252
257
|
pairs: [BTC/USDT, ETH/USDT]
|
|
253
258
|
span: 3600
|
|
254
259
|
format: parquet
|
|
255
|
-
by_period: Y
|
|
256
260
|
|
|
257
261
|
stream_jobs:
|
|
258
262
|
- exchange: binance
|
|
@@ -260,6 +264,30 @@ Daemon (autonomous collector) — ``config.yml``:
|
|
|
260
264
|
channels: [trades, book]
|
|
261
265
|
time_step: 60
|
|
262
266
|
|
|
267
|
+
CLI quick start:
|
|
268
|
+
|
|
269
|
+
.. code-block:: bash
|
|
270
|
+
|
|
271
|
+
# Validate the config
|
|
272
|
+
dccd validate --config config.yml
|
|
273
|
+
|
|
274
|
+
# Backfill all OHLC history defined in config (resumable)
|
|
275
|
+
dccd backfill --config config.yml --start "2020-01-01 00:00:00"
|
|
276
|
+
|
|
277
|
+
# Dry run — estimate windows and time without downloading
|
|
278
|
+
dccd backfill --config config.yml --dry-run
|
|
279
|
+
|
|
280
|
+
# Backfill only one exchange
|
|
281
|
+
dccd backfill --config config.yml --exchange kraken
|
|
282
|
+
|
|
283
|
+
# One incremental batch per job, then exit (for cron)
|
|
284
|
+
dccd collect --config config.yml
|
|
285
|
+
|
|
286
|
+
# Continuous daemon (Ctrl-C to stop)
|
|
287
|
+
dccd start --config config.yml
|
|
288
|
+
|
|
289
|
+
Python API:
|
|
290
|
+
|
|
263
291
|
.. code-block:: python
|
|
264
292
|
|
|
265
293
|
from dccd.daemon.config import load_config
|
|
@@ -180,8 +180,11 @@ Daemon (autonomous collector) — ``config.yml``:
|
|
|
180
180
|
|
|
181
181
|
.. code-block:: yaml
|
|
182
182
|
|
|
183
|
+
settings:
|
|
184
|
+
data_path: /data/crypto/
|
|
185
|
+
timezone: UTC
|
|
186
|
+
|
|
183
187
|
storage:
|
|
184
|
-
local_path: /data/crypto/
|
|
185
188
|
remotes:
|
|
186
189
|
- provider: rclone
|
|
187
190
|
remote: "mynas:crypto/"
|
|
@@ -192,7 +195,6 @@ Daemon (autonomous collector) — ``config.yml``:
|
|
|
192
195
|
pairs: [BTC/USDT, ETH/USDT]
|
|
193
196
|
span: 3600
|
|
194
197
|
format: parquet
|
|
195
|
-
by_period: Y
|
|
196
198
|
|
|
197
199
|
stream_jobs:
|
|
198
200
|
- exchange: binance
|
|
@@ -200,6 +202,30 @@ Daemon (autonomous collector) — ``config.yml``:
|
|
|
200
202
|
channels: [trades, book]
|
|
201
203
|
time_step: 60
|
|
202
204
|
|
|
205
|
+
CLI quick start:
|
|
206
|
+
|
|
207
|
+
.. code-block:: bash
|
|
208
|
+
|
|
209
|
+
# Validate the config
|
|
210
|
+
dccd validate --config config.yml
|
|
211
|
+
|
|
212
|
+
# Backfill all OHLC history defined in config (resumable)
|
|
213
|
+
dccd backfill --config config.yml --start "2020-01-01 00:00:00"
|
|
214
|
+
|
|
215
|
+
# Dry run — estimate windows and time without downloading
|
|
216
|
+
dccd backfill --config config.yml --dry-run
|
|
217
|
+
|
|
218
|
+
# Backfill only one exchange
|
|
219
|
+
dccd backfill --config config.yml --exchange kraken
|
|
220
|
+
|
|
221
|
+
# One incremental batch per job, then exit (for cron)
|
|
222
|
+
dccd collect --config config.yml
|
|
223
|
+
|
|
224
|
+
# Continuous daemon (Ctrl-C to stop)
|
|
225
|
+
dccd start --config config.yml
|
|
226
|
+
|
|
227
|
+
Python API:
|
|
228
|
+
|
|
203
229
|
.. code-block:: python
|
|
204
230
|
|
|
205
231
|
from dccd.daemon.config import load_config
|
|
@@ -10,6 +10,7 @@ Submodules
|
|
|
10
10
|
|
|
11
11
|
.. autosummary::
|
|
12
12
|
|
|
13
|
+
backfill
|
|
13
14
|
config
|
|
14
15
|
health
|
|
15
16
|
storage
|
|
@@ -18,6 +19,7 @@ Submodules
|
|
|
18
19
|
|
|
19
20
|
"""
|
|
20
21
|
|
|
22
|
+
from dccd.daemon.backfill import KrakenBackfill, OHLCBackfill, make_job, run_backfill
|
|
21
23
|
from dccd.daemon.config import CollectorConfig, load_config
|
|
22
24
|
from dccd.daemon.health import HealthMonitor
|
|
23
25
|
from dccd.daemon.scheduler import build_histo_scheduler, run_once
|
|
@@ -27,7 +29,11 @@ from dccd.daemon.stream_manager import StreamManager, SyncService
|
|
|
27
29
|
__all__ = [
|
|
28
30
|
'CollectorConfig',
|
|
29
31
|
'HealthMonitor',
|
|
32
|
+
'KrakenBackfill',
|
|
33
|
+
'OHLCBackfill',
|
|
30
34
|
'load_config',
|
|
35
|
+
'make_job',
|
|
36
|
+
'run_backfill',
|
|
31
37
|
'RemoteStorage',
|
|
32
38
|
'StreamManager',
|
|
33
39
|
'SyncService',
|