dccd 2.2.0__tar.gz → 2.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {dccd-2.2.0 → dccd-2.3.0}/CHANGELOG.md +40 -0
  2. {dccd-2.2.0 → dccd-2.3.0}/PKG-INFO +31 -3
  3. {dccd-2.2.0 → dccd-2.3.0}/README.rst +28 -2
  4. {dccd-2.2.0 → dccd-2.3.0}/dccd/daemon/__init__.py +6 -0
  5. dccd-2.3.0/dccd/daemon/backfill.py +619 -0
  6. {dccd-2.2.0 → dccd-2.3.0}/dccd/daemon/cli.py +71 -12
  7. {dccd-2.2.0 → dccd-2.3.0}/dccd/daemon/config.py +56 -19
  8. {dccd-2.2.0 → dccd-2.3.0}/dccd/daemon/scheduler.py +12 -7
  9. {dccd-2.2.0 → dccd-2.3.0}/dccd/daemon/stream_manager.py +16 -5
  10. dccd-2.3.0/dccd/histo_dl/__init__.py +64 -0
  11. {dccd-2.2.0 → dccd-2.3.0}/dccd/histo_dl/binance.py +5 -7
  12. {dccd-2.2.0 → dccd-2.3.0}/dccd/histo_dl/bybit.py +6 -5
  13. {dccd-2.2.0 → dccd-2.3.0}/dccd/histo_dl/coinbase.py +4 -7
  14. {dccd-2.2.0 → dccd-2.3.0}/dccd/histo_dl/exchange.py +84 -208
  15. {dccd-2.2.0 → dccd-2.3.0}/dccd/histo_dl/kraken.py +4 -5
  16. {dccd-2.2.0 → dccd-2.3.0}/dccd/histo_dl/okx.py +5 -4
  17. dccd-2.3.0/dccd/storage.py +338 -0
  18. dccd-2.3.0/dccd/tests/test_backfill.py +285 -0
  19. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_daemon_cli.py +2 -2
  20. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_daemon_config.py +0 -7
  21. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_daemon_scheduler.py +3 -3
  22. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_date_time.py +6 -1
  23. dccd-2.3.0/dccd/tests/test_histo_dl.py +76 -0
  24. dccd-2.3.0/dccd/tests/test_storage.py +345 -0
  25. {dccd-2.2.0 → dccd-2.3.0}/dccd/tools/date_time.py +77 -24
  26. {dccd-2.2.0 → dccd-2.3.0}/dccd.egg-info/PKG-INFO +31 -3
  27. {dccd-2.2.0 → dccd-2.3.0}/dccd.egg-info/SOURCES.txt +4 -0
  28. {dccd-2.2.0 → dccd-2.3.0}/dccd.egg-info/requires.txt +2 -0
  29. {dccd-2.2.0 → dccd-2.3.0}/pyproject.toml +3 -3
  30. dccd-2.2.0/dccd/histo_dl/__init__.py +0 -72
  31. dccd-2.2.0/dccd/tests/test_histo_dl.py +0 -60
  32. {dccd-2.2.0 → dccd-2.3.0}/CONTRIBUTING.md +0 -0
  33. {dccd-2.2.0 → dccd-2.3.0}/LICENSE.txt +0 -0
  34. {dccd-2.2.0 → dccd-2.3.0}/MANIFEST.in +0 -0
  35. {dccd-2.2.0 → dccd-2.3.0}/dccd/__init__.py +0 -0
  36. {dccd-2.2.0 → dccd-2.3.0}/dccd/continuous_dl/__init__.py +0 -0
  37. {dccd-2.2.0 → dccd-2.3.0}/dccd/continuous_dl/binance.py +0 -0
  38. {dccd-2.2.0 → dccd-2.3.0}/dccd/continuous_dl/bitfinex.py +0 -0
  39. {dccd-2.2.0 → dccd-2.3.0}/dccd/continuous_dl/bitmex.py +0 -0
  40. {dccd-2.2.0 → dccd-2.3.0}/dccd/continuous_dl/bybit.py +0 -0
  41. {dccd-2.2.0 → dccd-2.3.0}/dccd/continuous_dl/exchange.py +0 -0
  42. {dccd-2.2.0 → dccd-2.3.0}/dccd/continuous_dl/kraken.py +0 -0
  43. {dccd-2.2.0 → dccd-2.3.0}/dccd/continuous_dl/okx.py +0 -0
  44. {dccd-2.2.0 → dccd-2.3.0}/dccd/daemon/health.py +0 -0
  45. {dccd-2.2.0 → dccd-2.3.0}/dccd/daemon/storage.py +0 -0
  46. {dccd-2.2.0 → dccd-2.3.0}/dccd/models.py +0 -0
  47. {dccd-2.2.0 → dccd-2.3.0}/dccd/process_data.py +0 -0
  48. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/__init__.py +0 -0
  49. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/conftest.py +0 -0
  50. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_binance.py +0 -0
  51. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_binance_ws.py +0 -0
  52. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_bitfinex.py +0 -0
  53. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_bitmex.py +0 -0
  54. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_bybit.py +0 -0
  55. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_bybit_ws.py +0 -0
  56. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_coinbase.py +0 -0
  57. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_daemon_health.py +0 -0
  58. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_daemon_storage.py +0 -0
  59. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_daemon_stream_manager.py +0 -0
  60. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_io.py +0 -0
  61. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_kraken.py +0 -0
  62. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_kraken_ws.py +0 -0
  63. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_models.py +0 -0
  64. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_okx.py +0 -0
  65. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_okx_ws.py +0 -0
  66. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_process_data.py +0 -0
  67. {dccd-2.2.0 → dccd-2.3.0}/dccd/tests/test_websocket.py +0 -0
  68. {dccd-2.2.0 → dccd-2.3.0}/dccd/tools/__init__.py +0 -0
  69. {dccd-2.2.0 → dccd-2.3.0}/dccd/tools/io.py +0 -0
  70. {dccd-2.2.0 → dccd-2.3.0}/dccd/tools/websocket.py +0 -0
  71. {dccd-2.2.0 → dccd-2.3.0}/dccd.egg-info/dependency_links.txt +0 -0
  72. {dccd-2.2.0 → dccd-2.3.0}/dccd.egg-info/entry_points.txt +0 -0
  73. {dccd-2.2.0 → dccd-2.3.0}/dccd.egg-info/top_level.txt +0 -0
  74. {dccd-2.2.0 → dccd-2.3.0}/setup.cfg +0 -0
@@ -6,6 +6,46 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [2.3.0] - 2026-05-22
10
+
11
+ ### Added
12
+
13
+ - `dccd/storage.py` — `DataStore.is_period_complete(year)`: checks whether an annual parquet file contains all expected candles; `DataStore.missing_intervals(start, end)`: gap-detection — complete past years are skipped, incomplete years resume from the last saved row (#41)
14
+ - `dccd/daemon/backfill.py` — `_BackfillBase.run()` now iterates over `DataStore.missing_intervals()` instead of a single sliding window from `last_saved`; complete years are never re-downloaded (#41)
15
+ - `dccd/storage.py` — new `DataStore` class: unified read/write interface for OHLC, trades, and orderbook; `save(df)` (merge-on-TS, annual OHLC / daily trades+orderbook), `load(start, end)`, `existing_periods()`, `last_timestamp()` (#39)
16
+ - `dccd/tools/date_time.py` — `span_label(span)` converts seconds to short directory labels (``'1m'``, ``'1h'``, ``'1d'``…); `_SPAN_LABEL` mapping exported (#39)
17
+ - `doc/source/storage.rst` — Sphinx page for `DataStore` with directory layout examples (#39)
18
+
19
+ ### Changed
20
+
21
+ - `dccd collect` (formerly `dccd run`) — renamed to clarify the distinction: `collect` = one incremental batch, `backfill` = full historical download with gap detection, `start` = continuous daemon (#41)
22
+ - New storage arborescence: ``{data_path}/{exchange}/ohlc/{pair}/{span}/YYYY.parquet``, ``…/trades/{pair}/YYYY-MM-DD.parquet``, ``…/orderbook/{pair}/YYYY-MM-DD.parquet`` — replaces the old ``{Exchange}/Data/Clean_Data/{per}/{pair}/`` layout (#39)
23
+ - `dccd/histo_dl/exchange.py` — `save()`, `_get_last_date()`, `save_trades()`, `save_orderbook()` now delegate to `DataStore`; removed `last_df`, `_set_by_period`, `_name_file`, `_excel_format`; removed unused `set_hierarchy()` (#39, #41)
24
+ - `dccd/histo_dl/{binance,bybit,coinbase,okx}.py` — removed `full_path` overrides (base class sets the correct path via `DataStore`) (#39)
25
+ - `dccd/daemon/backfill.py`, `scheduler.py` — removed `by_period` parameter; `save()` call simplified (#39)
26
+ - `dccd/daemon/stream_manager.py` — WebSocket save path now built from `DataStore.directory` (#39)
27
+ - `dccd/daemon/config.py` — `HistoJob.by_period` field removed; granularity is automatic (#39)
28
+
29
+ - `dccd/histo_dl/exchange.py` — `save()` now supports `form='parquet'`; previously only `'xlsx'` and `'csv'` were handled (#35)
30
+ - `config.yml` — ready-to-use daemon config for minutely OHLC + real-time orderbook/trades on Binance, Kraken, and Bybit (#35)
31
+ - `dccd/daemon/backfill.py` — `OHLCBackfill` and `KrakenBackfill` strategy classes with shared retry/progress/save loop; `make_job()` factory; `run_backfill()` orchestrator; tqdm progress bars and optional `--parallel` execution (#38)
32
+ - `dccd/daemon/cli.py` — `dccd backfill` command: reads all job definitions from config, supports `--exchange` / `--pairs` filters, `--start`, `--parallel`, and `--dry-run` flags (#38)
33
+ - `dccd/daemon/config.py` — `SettingsConfig` with `data_path` and `timezone` fields; `CollectorConfig.settings` propagates `data_path` to `StorageConfig.local_path` when not set explicitly (#38)
34
+
35
+ ### Removed
36
+
37
+ - `scripts/backfill.py` — replaced by `dccd backfill` CLI command and `dccd.daemon.backfill` module (#38)
38
+
39
+ ### Fixed
40
+
41
+ - `dccd/histo_dl/exchange.py` — `save(form='parquet')` was silently ignored (logged a warning instead of writing the file) (#35)
42
+ - `dccd/histo_dl/exchange.py` — `_sort_data()` crashed with a ValueError when the API returned fewer candles than the expected window size; index is now derived from actual data (#36)
43
+ - `dccd/histo_dl/exchange.py` — `by_period='M'` produced minute-level file names (strftime `%M`) instead of year-month; added `_PERIOD_FMT` mapping so `'M'` → `'%Y-%m'` (#36)
44
+ - `dccd/histo_dl/exchange.py` — `self.end` now reflects the last candle timestamp so window-loop callers advance correctly (was stuck at `now` for Kraken) (#36)
45
+ - `dccd/histo_dl/binance.py` — missing `limit=1000` parameter caused Binance to return only 500 candles per request (#36)
46
+ - `dccd/histo_dl/bybit.py` — `limit` was 200; raised to 1 000 to match the API maximum (#36)
47
+ - `dccd/histo_dl/exchange.py` — `_sort_data()` dropped the minute just before a window boundary when the last trade arrived ≥2 spans early; grid now uses `self.end` directly as the exclusive stop (#36)
48
+
9
49
  ## [2.2.0] - 2026-05-17
10
50
 
11
51
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dccd
3
- Version: 2.2.0
3
+ Version: 2.3.0
4
4
  Summary: Download Crypto Currency Data from different exchanges.
5
5
  Author-email: Arthur Bernard <arthur.bernard.92@gmail.com>
6
6
  License: MIT
@@ -37,6 +37,7 @@ Provides-Extra: daemon
37
37
  Requires-Dist: pyyaml>=6.0; extra == "daemon"
38
38
  Requires-Dist: apscheduler<4,>=3.10; extra == "daemon"
39
39
  Requires-Dist: typer>=0.12; extra == "daemon"
40
+ Requires-Dist: tqdm>=4.64; extra == "daemon"
40
41
  Provides-Extra: dev
41
42
  Requires-Dist: pytest>=7.4; extra == "dev"
42
43
  Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
@@ -48,6 +49,7 @@ Requires-Dist: pandas-stubs>=2.0; extra == "dev"
48
49
  Requires-Dist: pyyaml>=6.0; extra == "dev"
49
50
  Requires-Dist: apscheduler<4,>=3.10; extra == "dev"
50
51
  Requires-Dist: typer>=0.12; extra == "dev"
52
+ Requires-Dist: tqdm>=4.64; extra == "dev"
51
53
  Provides-Extra: doc
52
54
  Requires-Dist: sphinx>=7.0; extra == "doc"
53
55
  Requires-Dist: furo; extra == "doc"
@@ -240,8 +242,11 @@ Daemon (autonomous collector) — ``config.yml``:
240
242
 
241
243
  .. code-block:: yaml
242
244
 
245
+ settings:
246
+ data_path: /data/crypto/
247
+ timezone: UTC
248
+
243
249
  storage:
244
- local_path: /data/crypto/
245
250
  remotes:
246
251
  - provider: rclone
247
252
  remote: "mynas:crypto/"
@@ -252,7 +257,6 @@ Daemon (autonomous collector) — ``config.yml``:
252
257
  pairs: [BTC/USDT, ETH/USDT]
253
258
  span: 3600
254
259
  format: parquet
255
- by_period: Y
256
260
 
257
261
  stream_jobs:
258
262
  - exchange: binance
@@ -260,6 +264,30 @@ Daemon (autonomous collector) — ``config.yml``:
260
264
  channels: [trades, book]
261
265
  time_step: 60
262
266
 
267
+ CLI quick start:
268
+
269
+ .. code-block:: bash
270
+
271
+ # Validate the config
272
+ dccd validate --config config.yml
273
+
274
+ # Backfill all OHLC history defined in config (resumable)
275
+ dccd backfill --config config.yml --start "2020-01-01 00:00:00"
276
+
277
+ # Dry run — estimate windows and time without downloading
278
+ dccd backfill --config config.yml --dry-run
279
+
280
+ # Backfill only one exchange
281
+ dccd backfill --config config.yml --exchange kraken
282
+
283
+ # One incremental batch per job, then exit (for cron)
284
+ dccd collect --config config.yml
285
+
286
+ # Continuous daemon (Ctrl-C to stop)
287
+ dccd start --config config.yml
288
+
289
+ Python API:
290
+
263
291
  .. code-block:: python
264
292
 
265
293
  from dccd.daemon.config import load_config
@@ -180,8 +180,11 @@ Daemon (autonomous collector) — ``config.yml``:
180
180
 
181
181
  .. code-block:: yaml
182
182
 
183
+ settings:
184
+ data_path: /data/crypto/
185
+ timezone: UTC
186
+
183
187
  storage:
184
- local_path: /data/crypto/
185
188
  remotes:
186
189
  - provider: rclone
187
190
  remote: "mynas:crypto/"
@@ -192,7 +195,6 @@ Daemon (autonomous collector) — ``config.yml``:
192
195
  pairs: [BTC/USDT, ETH/USDT]
193
196
  span: 3600
194
197
  format: parquet
195
- by_period: Y
196
198
 
197
199
  stream_jobs:
198
200
  - exchange: binance
@@ -200,6 +202,30 @@ Daemon (autonomous collector) — ``config.yml``:
200
202
  channels: [trades, book]
201
203
  time_step: 60
202
204
 
205
+ CLI quick start:
206
+
207
+ .. code-block:: bash
208
+
209
+ # Validate the config
210
+ dccd validate --config config.yml
211
+
212
+ # Backfill all OHLC history defined in config (resumable)
213
+ dccd backfill --config config.yml --start "2020-01-01 00:00:00"
214
+
215
+ # Dry run — estimate windows and time without downloading
216
+ dccd backfill --config config.yml --dry-run
217
+
218
+ # Backfill only one exchange
219
+ dccd backfill --config config.yml --exchange kraken
220
+
221
+ # One incremental batch per job, then exit (for cron)
222
+ dccd collect --config config.yml
223
+
224
+ # Continuous daemon (Ctrl-C to stop)
225
+ dccd start --config config.yml
226
+
227
+ Python API:
228
+
203
229
  .. code-block:: python
204
230
 
205
231
  from dccd.daemon.config import load_config
@@ -10,6 +10,7 @@ Submodules
10
10
 
11
11
  .. autosummary::
12
12
 
13
+ backfill
13
14
  config
14
15
  health
15
16
  storage
@@ -18,6 +19,7 @@ Submodules
18
19
 
19
20
  """
20
21
 
22
+ from dccd.daemon.backfill import KrakenBackfill, OHLCBackfill, make_job, run_backfill
21
23
  from dccd.daemon.config import CollectorConfig, load_config
22
24
  from dccd.daemon.health import HealthMonitor
23
25
  from dccd.daemon.scheduler import build_histo_scheduler, run_once
@@ -27,7 +29,11 @@ from dccd.daemon.stream_manager import StreamManager, SyncService
27
29
  __all__ = [
28
30
  'CollectorConfig',
29
31
  'HealthMonitor',
32
+ 'KrakenBackfill',
33
+ 'OHLCBackfill',
30
34
  'load_config',
35
+ 'make_job',
36
+ 'run_backfill',
31
37
  'RemoteStorage',
32
38
  'StreamManager',
33
39
  'SyncService',