dccd 2.3.3__tar.gz → 3.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dccd-2.3.3 → dccd-3.0.0}/CHANGELOG.md +126 -0
- dccd-3.0.0/CLAUDE.md +276 -0
- dccd-3.0.0/PKG-INFO +250 -0
- dccd-3.0.0/README.md +183 -0
- dccd-3.0.0/dccd/__init__.py +273 -0
- dccd-3.0.0/dccd/application/__init__.py +1 -0
- dccd-3.0.0/dccd/application/config.py +347 -0
- dccd-3.0.0/dccd/application/events.py +167 -0
- dccd-3.0.0/dccd/application/jobs.py +97 -0
- dccd-3.0.0/dccd/application/monitor.py +68 -0
- dccd-3.0.0/dccd/application/operations.py +472 -0
- dccd-3.0.0/dccd/application/registry.py +94 -0
- dccd-3.0.0/dccd/application/scheduler.py +257 -0
- dccd-3.0.0/dccd/application/service_factory.py +79 -0
- dccd-3.0.0/dccd/domain/__init__.py +22 -0
- dccd-3.0.0/dccd/domain/capability.py +53 -0
- dccd-3.0.0/dccd/domain/dataset.py +53 -0
- dccd-3.0.0/dccd/domain/errors.py +34 -0
- dccd-3.0.0/dccd/domain/records.py +114 -0
- dccd-3.0.0/dccd/domain/symbol.py +63 -0
- dccd-3.0.0/dccd/domain/timeutils.py +298 -0
- dccd-3.0.0/dccd/domain/transforms.py +60 -0
- dccd-3.0.0/dccd/domain/types.py +27 -0
- dccd-3.0.0/dccd/interfaces/__init__.py +1 -0
- dccd-3.0.0/dccd/interfaces/api/__init__.py +5 -0
- dccd-3.0.0/dccd/interfaces/api/app.py +683 -0
- dccd-3.0.0/dccd/interfaces/cli/__init__.py +5 -0
- dccd-3.0.0/dccd/interfaces/cli/main.py +241 -0
- dccd-3.0.0/dccd/interfaces/ui/__init__.py +1 -0
- dccd-3.0.0/dccd/interfaces/ui/static/favicon.svg +27 -0
- dccd-3.0.0/dccd/interfaces/ui/static/logo.svg +59 -0
- dccd-3.0.0/dccd/interfaces/ui/templates/base.html +326 -0
- dccd-3.0.0/dccd/interfaces/ui/templates/config.html +256 -0
- dccd-3.0.0/dccd/interfaces/ui/templates/dashboard.html +123 -0
- dccd-3.0.0/dccd/interfaces/ui/templates/data.html +106 -0
- dccd-3.0.0/dccd/interfaces/ui/templates/historical.html +434 -0
- dccd-3.0.0/dccd/interfaces/ui/templates/live.html +308 -0
- dccd-3.0.0/dccd/interfaces/ui/templates/logs.html +130 -0
- dccd-3.0.0/dccd/interfaces/ui/templates/storage.html +53 -0
- dccd-3.0.0/dccd/sources/__init__.py +23 -0
- dccd-3.0.0/dccd/sources/base.py +135 -0
- dccd-3.0.0/dccd/sources/binance.py +281 -0
- dccd-3.0.0/dccd/sources/bitfinex.py +280 -0
- dccd-3.0.0/dccd/sources/bitmex.py +283 -0
- dccd-3.0.0/dccd/sources/bybit.py +234 -0
- dccd-3.0.0/dccd/sources/coinbase.py +247 -0
- dccd-3.0.0/dccd/sources/kraken.py +341 -0
- dccd-3.0.0/dccd/sources/okx.py +247 -0
- dccd-3.0.0/dccd/sources/registry.py +111 -0
- dccd-3.0.0/dccd/storage/__init__.py +6 -0
- dccd-3.0.0/dccd/storage/parquet.py +531 -0
- dccd-3.0.0/dccd/storage/remote.py +72 -0
- dccd-3.0.0/dccd/storage/runs_sqlite.py +179 -0
- dccd-3.0.0/dccd/tests/v3/__init__.py +1 -0
- dccd-3.0.0/dccd/tests/v3/test_api.py +261 -0
- dccd-3.0.0/dccd/tests/v3/test_application.py +360 -0
- dccd-3.0.0/dccd/tests/v3/test_backfill_lookback.py +66 -0
- dccd-3.0.0/dccd/tests/v3/test_client.py +24 -0
- dccd-3.0.0/dccd/tests/v3/test_domain.py +259 -0
- dccd-3.0.0/dccd/tests/v3/test_domain_extended.py +230 -0
- dccd-3.0.0/dccd/tests/v3/test_network.py +100 -0
- dccd-3.0.0/dccd/tests/v3/test_sources.py +317 -0
- dccd-3.0.0/dccd/tests/v3/test_storage.py +179 -0
- dccd-3.0.0/dccd/tests/v3/test_storage_extended.py +151 -0
- dccd-3.0.0/dccd/tests/v3/test_transport.py +32 -0
- dccd-3.0.0/dccd/transport/__init__.py +7 -0
- dccd-3.0.0/dccd/transport/http.py +114 -0
- dccd-3.0.0/dccd/transport/paginate.py +212 -0
- dccd-3.0.0/dccd/transport/ratelimit.py +81 -0
- dccd-3.0.0/dccd/transport/ws.py +95 -0
- dccd-3.0.0/dccd.egg-info/PKG-INFO +250 -0
- dccd-3.0.0/dccd.egg-info/SOURCES.txt +80 -0
- dccd-3.0.0/dccd.egg-info/entry_points.txt +2 -0
- {dccd-2.3.3 → dccd-3.0.0}/dccd.egg-info/requires.txt +18 -13
- dccd-3.0.0/pyproject.toml +142 -0
- dccd-2.3.3/PKG-INFO +0 -275
- dccd-2.3.3/README.md +0 -213
- dccd-2.3.3/dccd/__init__.py +0 -43
- dccd-2.3.3/dccd/continuous_dl/__init__.py +0 -49
- dccd-2.3.3/dccd/continuous_dl/binance.py +0 -250
- dccd-2.3.3/dccd/continuous_dl/bitfinex.py +0 -370
- dccd-2.3.3/dccd/continuous_dl/bitmex.py +0 -350
- dccd-2.3.3/dccd/continuous_dl/bybit.py +0 -241
- dccd-2.3.3/dccd/continuous_dl/exchange.py +0 -346
- dccd-2.3.3/dccd/continuous_dl/kraken.py +0 -323
- dccd-2.3.3/dccd/continuous_dl/okx.py +0 -317
- dccd-2.3.3/dccd/daemon/__init__.py +0 -42
- dccd-2.3.3/dccd/daemon/backfill.py +0 -654
- dccd-2.3.3/dccd/daemon/cli.py +0 -527
- dccd-2.3.3/dccd/daemon/config.py +0 -342
- dccd-2.3.3/dccd/daemon/health.py +0 -245
- dccd-2.3.3/dccd/daemon/scheduler.py +0 -158
- dccd-2.3.3/dccd/daemon/storage.py +0 -118
- dccd-2.3.3/dccd/daemon/stream_manager.py +0 -374
- dccd-2.3.3/dccd/histo_dl/__init__.py +0 -64
- dccd-2.3.3/dccd/histo_dl/binance.py +0 -204
- dccd-2.3.3/dccd/histo_dl/bybit.py +0 -221
- dccd-2.3.3/dccd/histo_dl/coinbase.py +0 -209
- dccd-2.3.3/dccd/histo_dl/exchange.py +0 -549
- dccd-2.3.3/dccd/histo_dl/kraken.py +0 -217
- dccd-2.3.3/dccd/histo_dl/okx.py +0 -216
- dccd-2.3.3/dccd/models.py +0 -85
- dccd-2.3.3/dccd/process_data.py +0 -139
- dccd-2.3.3/dccd/storage.py +0 -340
- dccd-2.3.3/dccd/tests/conftest.py +0 -222
- dccd-2.3.3/dccd/tests/test_backfill.py +0 -333
- dccd-2.3.3/dccd/tests/test_binance.py +0 -84
- dccd-2.3.3/dccd/tests/test_binance_ws.py +0 -187
- dccd-2.3.3/dccd/tests/test_bitfinex.py +0 -83
- dccd-2.3.3/dccd/tests/test_bitmex.py +0 -108
- dccd-2.3.3/dccd/tests/test_bybit.py +0 -77
- dccd-2.3.3/dccd/tests/test_bybit_ws.py +0 -114
- dccd-2.3.3/dccd/tests/test_coinbase.py +0 -73
- dccd-2.3.3/dccd/tests/test_daemon_cli.py +0 -358
- dccd-2.3.3/dccd/tests/test_daemon_config.py +0 -216
- dccd-2.3.3/dccd/tests/test_daemon_health.py +0 -85
- dccd-2.3.3/dccd/tests/test_daemon_scheduler.py +0 -152
- dccd-2.3.3/dccd/tests/test_daemon_storage.py +0 -204
- dccd-2.3.3/dccd/tests/test_daemon_stream_manager.py +0 -368
- dccd-2.3.3/dccd/tests/test_date_time.py +0 -132
- dccd-2.3.3/dccd/tests/test_histo_dl.py +0 -84
- dccd-2.3.3/dccd/tests/test_io.py +0 -142
- dccd-2.3.3/dccd/tests/test_kraken.py +0 -80
- dccd-2.3.3/dccd/tests/test_kraken_ws.py +0 -163
- dccd-2.3.3/dccd/tests/test_models.py +0 -53
- dccd-2.3.3/dccd/tests/test_okx.py +0 -101
- dccd-2.3.3/dccd/tests/test_okx_ws.py +0 -152
- dccd-2.3.3/dccd/tests/test_process_data.py +0 -78
- dccd-2.3.3/dccd/tests/test_storage.py +0 -392
- dccd-2.3.3/dccd/tests/test_websocket.py +0 -69
- dccd-2.3.3/dccd/tools/__init__.py +0 -32
- dccd-2.3.3/dccd/tools/date_time.py +0 -290
- dccd-2.3.3/dccd/tools/io.py +0 -240
- dccd-2.3.3/dccd/tools/websocket.py +0 -165
- dccd-2.3.3/dccd.egg-info/PKG-INFO +0 -275
- dccd-2.3.3/dccd.egg-info/SOURCES.txt +0 -70
- dccd-2.3.3/dccd.egg-info/entry_points.txt +0 -2
- dccd-2.3.3/pyproject.toml +0 -99
- {dccd-2.3.3 → dccd-3.0.0}/CONTRIBUTING.md +0 -0
- {dccd-2.3.3 → dccd-3.0.0}/LICENSE.txt +0 -0
- {dccd-2.3.3 → dccd-3.0.0}/MANIFEST.in +0 -0
- {dccd-2.3.3 → dccd-3.0.0}/dccd/tests/__init__.py +0 -0
- {dccd-2.3.3 → dccd-3.0.0}/dccd.egg-info/dependency_links.txt +0 -0
- {dccd-2.3.3 → dccd-3.0.0}/dccd.egg-info/top_level.txt +0 -0
- {dccd-2.3.3 → dccd-3.0.0}/setup.cfg +0 -0
|
@@ -6,10 +6,136 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
|
|
13
|
+
### Fixed
|
|
14
|
+
|
|
15
|
+
### Deprecated
|
|
16
|
+
|
|
17
|
+
### Removed
|
|
18
|
+
|
|
19
|
+
## [3.0.0] - 2026-06-07
|
|
20
|
+
|
|
21
|
+
### Added
|
|
22
|
+
|
|
23
|
+
- Reworked web UI split by concern: a read-only enriched **Inventory** (data
|
|
24
|
+
freshness, OHLC gap detection, on-disk size, per-exchange totals) and two
|
|
25
|
+
collection pages — **Historical** and **Live** — each with data-type tabs and
|
|
26
|
+
per-exchange accordions. Jobs are created, edited (first date) and deleted
|
|
27
|
+
inline on the page; the Live page shows a real-time liveness indicator (last
|
|
28
|
+
trade/quote + age) fed by a throttled stream heartbeat over SSE. (#76)
|
|
29
|
+
- Job CRUD over the API: `POST /api/jobs/create|delete|update`, backed by
|
|
30
|
+
`AppConfig.add_job`/`remove_job`/`update_job_start` (persisted to `config.yml`).
|
|
31
|
+
- `ParquetStore.inventory()` now reports on-disk `bytes` and, for OHLC,
|
|
32
|
+
`expected_rows`/`missing_rows` (gap detection) at no extra read cost.
|
|
33
|
+
- `EventBus` fan-out to multiple SSE consumers and a `StreamSampleEvent`
|
|
34
|
+
liveness sample emitted (throttled) by `operations.stream`.
|
|
35
|
+
- UI polish: nav reorganised into `Collect ▾`/`System ▾` dropdowns; **Inventory**
|
|
36
|
+
renamed **Data** (`/inventory`→`/data`) with data-type tabs; reworked Live
|
|
37
|
+
liveness — seeded from the last on-disk data point so a page refresh shows
|
|
38
|
+
freshness immediately (no "waiting…"), span-aware dot, a freshness label that
|
|
39
|
+
is a live relative "N min ago" counter under 24h and an absolute date beyond,
|
|
40
|
+
and no noise age for fresh trades, with client-side number formatting;
|
|
41
|
+
order-book cadence (`snapshot_interval`) shown and settable;
|
|
42
|
+
Storage shows on-disk sizes; Dashboard adds a KPI bar and clearer sections;
|
|
43
|
+
Logs reoriented around recent runs with human run labels. The Config page no
|
|
44
|
+
longer duplicates job management (jobs live on Historical/Live; raw edit via
|
|
45
|
+
its JSON tab). `GET /api/jobs` now returns `start`/`every`/`snapshot_interval`/
|
|
46
|
+
`depth`. (#76)
|
|
47
|
+
- Cursor-based trades pagination: the engine now follows each adapter's opaque
|
|
48
|
+
cursor until a window is drained, instead of advancing by a fixed time window.
|
|
49
|
+
Fixes silent loss of >95% of trades on every liquid pair (all exchanges).
|
|
50
|
+
- UI: single-line top bar (brand + nav on one row); per-job **Schedule** on
|
|
51
|
+
Historical (a recurring backfill cron — Off/hourly/daily/custom, independent of
|
|
52
|
+
the span but `≥` it), reconciled live via `Scheduler.sync_intervals`; **Run
|
|
53
|
+
all** (global) and per-exchange run; timezone-aware date display driven by
|
|
54
|
+
`settings.timezone` (`local`/`UTC`/zoneinfo). OHLC removed from Live (collected
|
|
55
|
+
via Historical schedule); order books removed from Historical (no REST
|
|
56
|
+
history). `POST /api/jobs/update` now also sets `every` (schedule); new
|
|
57
|
+
`manual` trigger kind for never-auto-run jobs.
|
|
58
|
+
- Bearer auth on `/api/*` when `settings.ui_auth_token` is set, with a `?token=`
|
|
59
|
+
fallback for Server-Sent Events; `settings.ui_allow_origins` for opt-in CORS.
|
|
60
|
+
- Public async `Client.read()` and `Client.stream()`; `Client` wires adapters
|
|
61
|
+
via `service_factory` (single source of truth).
|
|
62
|
+
- Network-marked end-to-end tests (`pytest -m network`) validating pagination
|
|
63
|
+
against live exchange APIs.
|
|
64
|
+
|
|
65
|
+
### Fixed
|
|
66
|
+
|
|
67
|
+
- Data loss on merge: writing into an existing legacy v2 Parquet file no longer
|
|
68
|
+
silently overwrites it; existing rows are canonicalised and preserved.
|
|
69
|
+
- Provenance is now actually written into the Parquet footer (was computed but
|
|
70
|
+
dropped).
|
|
71
|
+
- Custom ISO start date for backfill no longer raises (`JobParams.start`).
|
|
72
|
+
- Historical *first date* edit no longer reverts on reload: `GET /api/jobs` was
|
|
73
|
+
not returning `start`, so the UI reset the field after every refresh. (#76)
|
|
74
|
+
- Live order-book streams reported a crossed/incorrect best bid-ask: the WS
|
|
75
|
+
adapters emitted unmerged diff levels. binance/okx/bitmex now use full
|
|
76
|
+
snapshot channels (`@depth<N>`, `books5`, `orderBook10`) and bybit
|
|
77
|
+
reconstructs full state from snapshot+deltas (like kraken); best bid/ask is
|
|
78
|
+
computed defensively (`max` bid / `min` ask). (#76)
|
|
79
|
+
- Order-book Live liveness was incoherent with its cadence: it sampled the WS
|
|
80
|
+
every second while only one snapshot per ``snapshot_interval`` is captured. The
|
|
81
|
+
liveness sample is now emitted when a snapshot is actually saved, so its age
|
|
82
|
+
counts up to the interval and resets (matching the "Δ Ns" cadence). (#76)
|
|
83
|
+
- `dccd inventory` no longer crashes on OHLC datasets.
|
|
84
|
+
- Streams with no real implementation (Coinbase OHLC/order book, Bitfinex order
|
|
85
|
+
book) are rejected with `NoCapability` instead of "running" with zero output.
|
|
86
|
+
- `history="recent"` exchanges (Kraken OHLC) are clamped + warned instead of
|
|
87
|
+
silently returning wrong deep history.
|
|
88
|
+
- Kraken live OHLC timestamps were epoch 0 (1970): the WS adapter read a
|
|
89
|
+
non-existent `timestamp_open`; it now parses `interval_begin` (ISO-8601).
|
|
90
|
+
- `mypy dccd/` runs and passes again (it had been aborting on the dev Sphinx).
|
|
91
|
+
|
|
92
|
+
### Changed / Removed
|
|
93
|
+
|
|
94
|
+
- Docs/examples swept to v3: README drops the removed `dccd migrate` command and
|
|
95
|
+
the "Migrating from v2" section; `examples/` rewritten to the v3 `Client` and
|
|
96
|
+
`dccd.application` daemon wiring with a v3 `jobs:` config, and the stale v2
|
|
97
|
+
`historical_downloader.ipynb` removed. (#82)
|
|
98
|
+
- Honest OHLC fidelity: Coinbase `quote_volume` is null (was a fabricated
|
|
99
|
+
`close×volume`); Kraken now fills its native trade count.
|
|
100
|
+
- Removed the dead `parallel` backfill flag, the unused `Page` model and the
|
|
101
|
+
unused bundled `htmx.min.js`.
|
|
102
|
+
- Removed the v2→v3 Parquet migration tool entirely: `dccd migrate`,
|
|
103
|
+
`POST /api/migrate`, the Storage-page migrate card, `dccd/storage/migrate.py`,
|
|
104
|
+
and the `migrate` operation in the registry.
|
|
105
|
+
|
|
106
|
+
> v3 is a full hexagonal rewrite. It **removes** the v2 daemon web UI shipped in
|
|
107
|
+
> 2.4.0 (`dccd/daemon/*`) and replaces it with `dccd/interfaces/` (api/cli/ui).
|
|
108
|
+
|
|
109
|
+
## [2.4.0] - 2026-06-04
|
|
110
|
+
|
|
111
|
+
### Added
|
|
112
|
+
|
|
113
|
+
- `dccd/daemon/api.py` — web UI and JSON API (FastAPI + Jinja2 + htmx): a thin
|
|
114
|
+
HTTP layer over the existing daemon modules exposing dashboard (live health
|
|
115
|
+
metrics), inventory (stored data coverage), jobs (histo/stream list + add/remove
|
|
116
|
+
+ live backfill progress), logs (tail), config (view/validate/save the YAML),
|
|
117
|
+
and storage (rclone status + manual sync). JSON-only API (`/api/*`) with
|
|
118
|
+
dumb-shell templates, so the front-end can be swapped without touching the API.
|
|
119
|
+
Optional Bearer-token auth via `settings.ui_auth_token`
|
|
120
|
+
- `dccd/daemon/cli.py` — `dccd ui`: serve the web UI standalone; the UI is also
|
|
121
|
+
started automatically (background thread) by `dccd start` when the `[ui]` extra
|
|
122
|
+
is installed
|
|
123
|
+
- `dccd/daemon/config.py` — `SettingsConfig.ui_host`, `ui_port`, `ui_auth_token`:
|
|
124
|
+
web UI bind address, port, and optional auth token
|
|
125
|
+
- `dccd/daemon/backfill.py` — `progress_callback` and `stop_event` on
|
|
126
|
+
`_BackfillBase.run()` / `run_backfill()`: let the UI report live progress and
|
|
127
|
+
cancel a running backfill (defaults keep CLI behaviour unchanged)
|
|
128
|
+
- `dccd/daemon/stream_manager.py` — `SyncService` writes
|
|
129
|
+
`{local_path}/.dccd/last_sync.json` after each successful remote push, so the UI
|
|
130
|
+
can display the last sync time
|
|
131
|
+
- `pyproject.toml` — new optional extra `[ui]` (`fastapi`, `uvicorn[standard]`,
|
|
132
|
+
`jinja2`); install with `pip install dccd[daemon,ui]`
|
|
133
|
+
|
|
9
134
|
## [2.3.3] - 2026-05-31
|
|
10
135
|
|
|
11
136
|
### Added
|
|
12
137
|
|
|
138
|
+
|
|
13
139
|
- `doc/source/` — complete Sphinx documentation overhaul: redesigned homepage
|
|
14
140
|
with sphinx-design cards, captioned toctrees (Getting Started / Data Collection /
|
|
15
141
|
Reference), new pages (`installation`, `quickstart`, `changelog`, `cli`,
|
dccd-3.0.0/CLAUDE.md
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
> **Claude-oriented developer brief**: [`doc/dev/`](doc/dev/) contains an
|
|
6
|
+
> orientation pack written specifically for Claude Code — overview, architecture,
|
|
7
|
+
> design decisions & rationale, the per-exchange capability matrix, testing
|
|
8
|
+
> methodology + findings, current status, and the roadmap. Start at
|
|
9
|
+
> [`doc/dev/README.md`](doc/dev/README.md) for a fuller picture than this file
|
|
10
|
+
> gives. `CLAUDE.md` remains authoritative for commands and invariants.
|
|
11
|
+
|
|
12
|
+
## Commands
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
# Dev install (Python 3.11+)
|
|
16
|
+
pip install -e ".[dev]"
|
|
17
|
+
|
|
18
|
+
# Run full unit suite (network E2E excluded by default via -m 'not network')
|
|
19
|
+
pytest
|
|
20
|
+
|
|
21
|
+
# Run a single test file
|
|
22
|
+
pytest dccd/tests/v3/test_domain.py -v
|
|
23
|
+
|
|
24
|
+
# Real-exchange end-to-end tests (hit live APIs; opt-in)
|
|
25
|
+
pytest -m network
|
|
26
|
+
|
|
27
|
+
# Lint
|
|
28
|
+
ruff check dccd/
|
|
29
|
+
|
|
30
|
+
# Type check (strict on domain/; mypy assumes python 3.12 — see note below)
|
|
31
|
+
mypy dccd/
|
|
32
|
+
|
|
33
|
+
# Build Sphinx docs (must produce 0 warnings)
|
|
34
|
+
cd doc && make html
|
|
35
|
+
|
|
36
|
+
# UI smoke test (headless browser; start an isolated `dccd ui` first)
|
|
37
|
+
pip install playwright && playwright install chromium
|
|
38
|
+
python doc/dev/ui_smoke.py http://127.0.0.1:8137
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
> **mypy** assumes `python_version = 3.12` (in `pyproject.toml`): the dev/docs
|
|
42
|
+
> env ships Sphinx whose source uses 3.12 `type` statements, which made mypy
|
|
43
|
+
> abort under 3.11. dccd supports 3.11–3.13, so 3.12 semantics are safe.
|
|
44
|
+
|
|
45
|
+
## Git Flow
|
|
46
|
+
|
|
47
|
+
**Branch model:**
|
|
48
|
+
```
|
|
49
|
+
master ← stable releases only (tagged vX.Y.Z)
|
|
50
|
+
└── develop ← integration branch
|
|
51
|
+
├── feat/<topic> new feature or modernization axis
|
|
52
|
+
├── fix/<topic> bug fix
|
|
53
|
+
├── chore/<topic> tooling, CI, deps
|
|
54
|
+
└── docs/<topic> documentation only
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Rules — always follow these before committing or pushing:**
|
|
58
|
+
1. **Never commit directly to `master`.**
|
|
59
|
+
2. **Never commit directly to `develop`** — always use a feature branch + PR.
|
|
60
|
+
3. Branch off `develop`: `git checkout develop && git checkout -b feat/my-topic`
|
|
61
|
+
4. Open a PR into `develop` when done. `develop` → `master` only at release time.
|
|
62
|
+
|
|
63
|
+
**Commit style (Conventional Commits):**
|
|
64
|
+
```
|
|
65
|
+
feat: add Bybit futures OHLC capability
|
|
66
|
+
fix: correct paginator window for Coinbase
|
|
67
|
+
chore: upgrade httpx to 0.28
|
|
68
|
+
docs: update README for v3 install
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Do not add `Co-Authored-By` trailers to commits — this is a personal repo.
|
|
72
|
+
|
|
73
|
+
**Before every commit:** run `pytest`. It must pass.
|
|
74
|
+
|
|
75
|
+
**One PR = one concern, small and disposable.** Even a large plan ships as
|
|
76
|
+
*several* small atomic PRs — never one fourre-tout branch. A PR you couldn't throw
|
|
77
|
+
away without losing unrelated good work is too big: split it. This is what makes
|
|
78
|
+
`/abandon-task` (kill a bad PR, keep the lesson) viable.
|
|
79
|
+
|
|
80
|
+
### Dev loop & docs of record
|
|
81
|
+
|
|
82
|
+
The iterative loop is tooled by skills, with three tracked docs as the sources of
|
|
83
|
+
truth:
|
|
84
|
+
|
|
85
|
+
| Doc | Holds | Updated by |
|
|
86
|
+
|-----|-------|-----------|
|
|
87
|
+
| `doc/dev/07-roadmap.md` | open work (single source) | `/pick-task` reads · `/finish-task`, `/abandon-task` update |
|
|
88
|
+
| `doc/dev/03-decisions.md` | the *why* — ADR journal (+ settled rationale) | `/finish-task` (accepted), `/abandon-task` (rejected/tombstone) |
|
|
89
|
+
| `doc/dev/06-status.md` | where things stand | `/finish-task`, `/groom-docs` |
|
|
90
|
+
|
|
91
|
+
`CHANGELOG.md` + git log stay authoritative for *what* shipped. The loop:
|
|
92
|
+
`/pick-task` (smallest slice → branch) → plan (split big plans into small PRs) →
|
|
93
|
+
`/finish-task` (tests, ADR entry, status, PR) **or** `/abandon-task` (salvage the
|
|
94
|
+
lesson + close the PR); `/groom-docs` periodically keeps `doc/dev/` lean and true.
|
|
95
|
+
|
|
96
|
+
**Model per task** (advisory — you set it via `/model`, or a skill spawns a
|
|
97
|
+
subagent with an explicit `model`; subagents otherwise *inherit* the parent):
|
|
98
|
+
|
|
99
|
+
| Model | For |
|
|
100
|
+
|-------|-----|
|
|
101
|
+
| `opus` | judgement, design, decisions, planning, review |
|
|
102
|
+
| `sonnet` | implementation — code, tests, docstrings |
|
|
103
|
+
| `haiku` | mechanical fan-out (doc scans, checklists) — spawn it explicitly as a subagent |
|
|
104
|
+
|
|
105
|
+
## Architecture (v3 — hexagonal)
|
|
106
|
+
|
|
107
|
+
### Three usage modes
|
|
108
|
+
|
|
109
|
+
1. **Python API** — `async with Client() as c: await c.backfill(...)`.
|
|
110
|
+
2. **CLI** — `dccd` command (backfill, stream, start, ui, …).
|
|
111
|
+
3. **HTTP API / UI** — FastAPI server + Jinja2 templates (`dccd ui` or `dccd start`).
|
|
112
|
+
|
|
113
|
+
### Package structure
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
dccd/
|
|
117
|
+
domain/ # Pure, sync, zero I/O — models, capabilities, transforms
|
|
118
|
+
transport/ # Async HTTP (httpx), WebSocket base, RateLimiter, Paginator
|
|
119
|
+
sources/ # Exchange adapters (Source protocols + registry)
|
|
120
|
+
storage/ # ParquetStore, RunsStore (SQLite), RemoteStorage
|
|
121
|
+
application/ # Operations (backfill, stream), Scheduler, EventBus, Config
|
|
122
|
+
interfaces/
|
|
123
|
+
api/ # FastAPI app (1:1 with OperationRegistry)
|
|
124
|
+
cli/ # Typer CLI (asyncio.run)
|
|
125
|
+
ui/ # Jinja2 templates (pure HTTP client of api/)
|
|
126
|
+
tests/v3/ # All tests
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Domain layer (`domain/`)
|
|
130
|
+
|
|
131
|
+
Pure, synchronous, no I/O. Never import from transport/sources/storage.
|
|
132
|
+
|
|
133
|
+
| Module | Contents |
|
|
134
|
+
|--------|----------|
|
|
135
|
+
| `symbol.py` | `Symbol(base, quote)` — normalises XBT→BTC |
|
|
136
|
+
| `types.py` | `DataType` enum: `ohlc`, `trades`, `orderbook` |
|
|
137
|
+
| `records.py` | `OHLCBar`, `Trade`, `OrderBookSnapshot` (ns timestamps) |
|
|
138
|
+
| `dataset.py` | `DatasetId`, `Provenance` |
|
|
139
|
+
| `capability.py` | `Capability` — declared per adapter per (data_type × transport × mode) |
|
|
140
|
+
| `timeutils.py` | Helpers: `s_to_ns`, `align_ns`, `span_label`, `binance_interval`, … |
|
|
141
|
+
| `transforms.py` | `aggregate_ohlc(trades, span)` — pure computation |
|
|
142
|
+
| `errors.py` | `NoCapability`, `CoverageError` |
|
|
143
|
+
|
|
144
|
+
**All internal timestamps are nanoseconds UTC (int64).**
|
|
145
|
+
|
|
146
|
+
### Transport layer (`transport/`)
|
|
147
|
+
|
|
148
|
+
Async only. Drives I/O; domain stays pure.
|
|
149
|
+
|
|
150
|
+
| Module | Contents |
|
|
151
|
+
|--------|----------|
|
|
152
|
+
| `http.py` | `AsyncHTTPClient` — httpx wrapper with retry/backoff |
|
|
153
|
+
| `ws.py` | `WebSocketBase` — `stream_raw()` async generator with exponential reconnect |
|
|
154
|
+
| `ratelimit.py` | `RateLimiter` — token-bucket per exchange |
|
|
155
|
+
| `paginate.py` | `paginate_ohlc`, `paginate_trades` — generic forward paginator |
|
|
156
|
+
|
|
157
|
+
**Paginator contract**: callers must pass a closure with `symbol` (and `span` for OHLC) already bound:
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
async def _fetch(start_ns, end_ns, limit):
|
|
161
|
+
return await adapter.fetch_ohlc_page(symbol, span, start_ns, end_ns, limit)
|
|
162
|
+
async for bar in paginate_ohlc(_fetch, cap, start_ns, end_ns, span):
|
|
163
|
+
...
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Source adapters (`sources/`)
|
|
167
|
+
|
|
168
|
+
One class per exchange implementing Source protocol mixins:
|
|
169
|
+
|
|
170
|
+
- `OHLCHistory`, `TradesHistory`, `OrderBookSnapshotREST` — REST historical
|
|
171
|
+
- `OHLCLive`, `TradesLive`, `OrderBookLive` — WebSocket live
|
|
172
|
+
|
|
173
|
+
Adapters declare their capabilities via `capabilities() -> list[Capability]`.
|
|
174
|
+
|
|
175
|
+
| Exchange | Notes |
|
|
176
|
+
|----------|-------|
|
|
177
|
+
| `binance.py` | Full history OHLC+trades, depth 5000 |
|
|
178
|
+
| `coinbase.py` | 300 candles/req (Paginator handles automatically) |
|
|
179
|
+
| `kraken.py` | OHLC: 720 recent only (`history="recent"`); trades: full via `since` cursor |
|
|
180
|
+
| `bybit.py` | No spot trades history (capability not declared → `NoCapability` early) |
|
|
181
|
+
| `okx.py` | `history-candles` + `history-trades` for deep history |
|
|
182
|
+
| `bitfinex.py` | Up to 10 000 items per request |
|
|
183
|
+
| `bitmex.py` | Bucketed OHLC (1m/5m/1h/1d only), full trades |
|
|
184
|
+
|
|
185
|
+
**WS adapters** extend `WebSocketBase` and use `self.stream_raw()` (NOT a custom `_stream_raw` — the base handles reconnect).
|
|
186
|
+
|
|
187
|
+
### Storage (`storage/`)
|
|
188
|
+
|
|
189
|
+
| Module | Contents |
|
|
190
|
+
|--------|----------|
|
|
191
|
+
| `parquet.py` | `ParquetStore` — read/write Parquet (ns, provenance, dedup); `inventory()` enriched with on-disk `bytes` and (OHLC only) `expected_rows`/`missing_rows` gap detection at zero extra I/O |
|
|
192
|
+
| `runs_sqlite.py` | `RunsStore` — SQLite WAL, append-only job run history |
|
|
193
|
+
| `remote.py` | `RemoteStorage` — rclone sync |
|
|
194
|
+
|
|
195
|
+
**Layout**: `{data_path}/{exchange}/ohlc/{pair}/{span}/YYYY.parquet` (annual) and `.../trades/{pair}/YYYY-MM-DD.parquet` (daily).
|
|
196
|
+
|
|
197
|
+
### Application (`application/`)
|
|
198
|
+
|
|
199
|
+
| Module | Contents |
|
|
200
|
+
|--------|----------|
|
|
201
|
+
| `config.py` | `AppConfig` + `JobConfig` — Pydantic v2, validates exchange names + span-for-OHLC; runtime CRUD (`add_job`, `remove_job`, `update_job_start`) normalises mutations to single-pair entries (multi-pair configs are read but split on edit) |
|
|
202
|
+
| `events.py` | `EventBus` — pub/sub with **multi-queue fan-out** (`add_queue`/`remove_queue`, `enable_queue` alias) so Live + Logs + Dashboard consume concurrently; events: `ProgressEvent`, `LogEvent`, `StatusEvent`, `StreamSampleEvent` |
|
|
203
|
+
| `jobs.py` | `JobSpec`, `JobRun`, `Trigger`, `JobParams` |
|
|
204
|
+
| `operations.py` | `backfill()`, `stream()` (emits throttled `StreamSampleEvent` ≤1/s for Live liveness), `read()`, `inventory()` |
|
|
205
|
+
| `scheduler.py` | `Scheduler` — async interval/supervised/once job orchestration; `sync_streams()` reconciles stream workers and `sync_intervals()` reconciles recurring backfill loops (start/cancel/restart on cadence change, keyed by spec id) — both stop+drop deleted ones |
|
|
206
|
+
| `registry.py` | `REGISTRY` — maps operation names to schemas (parity enforcement) |
|
|
207
|
+
| `monitor.py` | `HealthMonitor` — EventBus subscriber, webhook alerts |
|
|
208
|
+
| `service_factory.py` | `build_registry()`, `build_store()`, `build_runs_store()` — **single source of truth for wiring** |
|
|
209
|
+
|
|
210
|
+
**Adding a new exchange**: add the adapter to `sources/`, register it in `service_factory.build_registry()`.
|
|
211
|
+
|
|
212
|
+
### Interfaces (`interfaces/`)
|
|
213
|
+
|
|
214
|
+
- `api/app.py` — FastAPI `create_app()`, lifespan context manager, module-level Pydantic request models. Job CRUD lives here: `POST /api/jobs/{create,delete,update}` (body-based to allow `/`/`:` in ids), all routed through the async `_persist_and_refresh` helper (writes YAML, updates `app.state`, calls `scheduler.sync_streams` **and** `scheduler.sync_intervals` to reconcile recurring backfills live). `POST /api/jobs/update` edits `start` and/or the recurring `every` (schedule). `GET /api/jobs` exposes `start`/`every`/`trigger`/`snapshot_interval`/`depth` so the UI can render and preserve them. `POST /api/jobs/run` + `/api/jobs/run-all` trigger configured backfills on demand. SSE at `GET /api/events` uses `add_queue`/`remove_queue` for multi-consumer fan-out.
|
|
215
|
+
- `cli/main.py` — Typer commands, all import from `service_factory`
|
|
216
|
+
- `ui/` — Jinja2 templates + static files. Nav: `Dashboard` · `Data` flat, plus `Collect ▾` (Historical/Live) and `System ▾` (Logs/Config/Storage) dropdowns. Pages are **split by concern**:
|
|
217
|
+
- **Data** (`data.html`, route `/data`; `/inventory` 307-redirects here) — read-only view of what's on disk: DataType tabs → per-exchange accordions with totals, freshness dot, OHLC gap %, on-disk size, file count. No action buttons.
|
|
218
|
+
- **Historical** (`historical.html`) — backfill jobs (**OHLC + Trades only**; order books have no REST history): DataType tabs → exchange accordions → one row per dataset with editable `first_date` (defaults to the dataset's earliest stored bar), a **Schedule** select (Off/hourly/daily/custom → `every`; `manual` trigger when off), real coverage bar, inline Run/Delete. **Run all** (header) + per-exchange **Run all**. New jobs default to `manual`.
|
|
219
|
+
- **Live** (`live.html`) — stream jobs (**Trades + Order Book only**; OHLC is collected via the Historical schedule, not streamed): same tab/accordion shape, with a liveness indicator fed by `StreamSampleEvent` over SSE (numeric `value`/`bid`/`ask`, formatted client-side via `fmtNum`). Liveness is **seeded from the last on-disk point** (inventory `max_ts`) so a refresh shows freshness without waiting for a live sample. The dot's "fresh" window is span-aware (order-book `snapshot_interval` / short for trades); the freshness label is a relative "N ago" under 24h (`fmtFreshness`) and an absolute date beyond, or the last-run date-time when stopped. Cadence column + `snapshot_interval` field for order book. Inline Start/Stop/Delete.
|
|
220
|
+
- Single top bar carries the brand (logo · `dccd` · version) left and the nav right. Dates render in `settings.timezone` (`local`/`UTC`/zoneinfo) via `DCCD_TZ` in `fmtNs`/`fmtDate`; relative ages are tz-independent.
|
|
221
|
+
- `dashboard.html` (KPIs + Active now / Recent runs / Data), `logs.html` (recent runs first, live console secondary, human run labels), `config.html` (Settings incl. `timezone`/Alerts/Storage + Raw JSON — **no jobs form**; jobs are managed on Historical/Live), `storage.html` (sizes via `fmtBytes`; no migrate tool).
|
|
222
|
+
|
|
223
|
+
**UI↔API contract**: UI is a pure HTTP client of the API — no direct calls to application layer. Inline job create/edit/delete on Historical/Live go through `/api/jobs/*`; the Config page no longer manages jobs (edit the `jobs` array via its Raw JSON tab if needed).
|
|
224
|
+
|
|
225
|
+
## Testing conventions
|
|
226
|
+
|
|
227
|
+
Tests live in `dccd/tests/v3/`. No doctests (removed `--doctest-modules` from `addopts`).
|
|
228
|
+
|
|
229
|
+
Coverage is measured on every run (`--cov=dccd`). CI matrix: Python 3.11–3.13.
|
|
230
|
+
|
|
231
|
+
Key test files:
|
|
232
|
+
- `test_domain.py` + `test_domain_extended.py` — domain models, transforms, config validation
|
|
233
|
+
- `test_sources.py` — capability declarations, protocol compliance, symbol mapping
|
|
234
|
+
- `test_storage.py` + `test_storage_extended.py` — ParquetStore, dedup keys, gap detection
|
|
235
|
+
- `test_application.py` — EventBus (multi-queue fan-out, `sample`), JobSpec, OperationRegistry parity, `AppConfig` job CRUD (incl. multi-pair split)
|
|
236
|
+
- `test_api.py` — FastAPI endpoints (incl. auth, backfill cancel, `/api/jobs/{create,delete,update}`, stream-delete unregisters worker) via TestClient
|
|
237
|
+
- `test_transport.py` — AsyncHTTPClient concurrency safety
|
|
238
|
+
- `test_backfill_lookback.py` — bounded default lookback per data type
|
|
239
|
+
- `test_network.py` — **real-exchange** E2E (`@pytest.mark.network`, opt-in)
|
|
240
|
+
|
|
241
|
+
**Test the chain on real data, not just the pieces.** A green unit suite missed
|
|
242
|
+
a backfill writing 0 rows, a store losing 58 % of trades, and a "Stop" button
|
|
243
|
+
that did nothing. For any data path: run the real operation, read what landed on
|
|
244
|
+
Parquet, and compare it to what was requested. Back up before any in-place
|
|
245
|
+
mutation. Full methodology + the catalogue of bugs this surfaced:
|
|
246
|
+
[`doc/dev/05-testing.md`](doc/dev/05-testing.md);
|
|
247
|
+
UI smoke test: `doc/dev/ui_smoke.py`.
|
|
248
|
+
|
|
249
|
+
### Invariants — do not regress
|
|
250
|
+
|
|
251
|
+
- **Trades pagination is cursor-based** (per-adapter opaque cursor); never
|
|
252
|
+
advance trades by a fixed time window. OHLC snaps the start to the *bar* (span),
|
|
253
|
+
not the window.
|
|
254
|
+
- **Dedup key is per data type** (`ParquetStore._dedup_subset`): OHLC=`TS`,
|
|
255
|
+
trades=`tid`(else composite), order book=`(TS,side,price)`. `TS` alone is
|
|
256
|
+
unique only for OHLC.
|
|
257
|
+
- **Declared capabilities must be honest**: don't declare a WS channel or
|
|
258
|
+
`history` depth that isn't implemented — the engine rejects undeclared ones.
|
|
259
|
+
- **All timestamps ns UTC int64**; legacy frames pass through
|
|
260
|
+
`ParquetStore.canonicalize()` before any `concat`.
|
|
261
|
+
- **First `start=last` backfill is bounded** per type (`_DEFAULT_LOOKBACK_NS`);
|
|
262
|
+
backfills are cancellable (`stop_event` → `DELETE /api/backfill/{id}`).
|
|
263
|
+
- **Adapters share one reference-counted HTTP client** (concurrency-safe).
|
|
264
|
+
- **`ui_auth_token` enforces Bearer on `/api/*`**; CORS is not wildcard.
|
|
265
|
+
- **Stream worker set is reconciled, not append-only**: deleting a stream job must
|
|
266
|
+
`Scheduler.sync_streams()` so its worker is stopped and dropped (never left
|
|
267
|
+
running/controllable after its config is gone).
|
|
268
|
+
- **`EventBus` fans out to all registered queues**; SSE consumers register via
|
|
269
|
+
`add_queue` and must `remove_queue` on disconnect (done in the `/api/events`
|
|
270
|
+
`finally`).
|
|
271
|
+
|
|
272
|
+
## Dependencies
|
|
273
|
+
|
|
274
|
+
Core (Python 3.11+): `httpx`, `websockets`, `pydantic>=2`, `polars`, `pyarrow`, `numpy`, `scipy`
|
|
275
|
+
Daemon extra: `pyyaml`, `typer`, `tqdm`, `uvicorn`, `fastapi`, `jinja2`, `apscheduler>=3.10,<4`
|
|
276
|
+
Dev extra: `pytest`, `pytest-asyncio`, `pytest-cov`, `ruff`, `mypy`, `interrogate`
|
dccd-3.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dccd
|
|
3
|
+
Version: 3.0.0
|
|
4
|
+
Summary: Download Crypto Currency Data — hexagonal architecture, async-first.
|
|
5
|
+
Author-email: Arthur Bernard <arthur.bernard.92@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ArthurBernard/Download_Crypto_Currencies_Data
|
|
8
|
+
Project-URL: Documentation, https://download-crypto-currencies-data.readthedocs.io/
|
|
9
|
+
Project-URL: Changelog, https://github.com/ArthurBernard/Download_Crypto_Currencies_Data/blob/master/CHANGELOG.md
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Database
|
|
19
|
+
Classifier: Topic :: Office/Business :: Financial
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE.txt
|
|
23
|
+
Requires-Dist: httpx>=0.27
|
|
24
|
+
Requires-Dist: websockets>=12.0
|
|
25
|
+
Requires-Dist: pydantic>=2.0
|
|
26
|
+
Requires-Dist: polars>=0.20
|
|
27
|
+
Requires-Dist: pyarrow>=13
|
|
28
|
+
Requires-Dist: numpy>=1.26
|
|
29
|
+
Requires-Dist: scipy>=1.10
|
|
30
|
+
Requires-Dist: SQLAlchemy>=2.0
|
|
31
|
+
Provides-Extra: daemon
|
|
32
|
+
Requires-Dist: pyyaml>=6.0; extra == "daemon"
|
|
33
|
+
Requires-Dist: typer>=0.12; extra == "daemon"
|
|
34
|
+
Requires-Dist: tqdm>=4.64; extra == "daemon"
|
|
35
|
+
Requires-Dist: uvicorn[standard]>=0.29; extra == "daemon"
|
|
36
|
+
Requires-Dist: fastapi>=0.110; extra == "daemon"
|
|
37
|
+
Requires-Dist: jinja2>=3.1; extra == "daemon"
|
|
38
|
+
Requires-Dist: apscheduler<4,>=3.10; extra == "daemon"
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
41
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
42
|
+
Requires-Dist: pytest-cov>=4.1; extra == "dev"
|
|
43
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
44
|
+
Requires-Dist: interrogate>=1.5; extra == "dev"
|
|
45
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
46
|
+
Requires-Dist: pyyaml>=6.0; extra == "dev"
|
|
47
|
+
Requires-Dist: typer>=0.12; extra == "dev"
|
|
48
|
+
Requires-Dist: tqdm>=4.64; extra == "dev"
|
|
49
|
+
Requires-Dist: uvicorn[standard]>=0.29; extra == "dev"
|
|
50
|
+
Requires-Dist: fastapi>=0.110; extra == "dev"
|
|
51
|
+
Requires-Dist: jinja2>=3.1; extra == "dev"
|
|
52
|
+
Requires-Dist: apscheduler<4,>=3.10; extra == "dev"
|
|
53
|
+
Requires-Dist: httpx>=0.27; extra == "dev"
|
|
54
|
+
Provides-Extra: doc
|
|
55
|
+
Requires-Dist: sphinx>=7.0; extra == "doc"
|
|
56
|
+
Requires-Dist: furo; extra == "doc"
|
|
57
|
+
Requires-Dist: numpydoc; extra == "doc"
|
|
58
|
+
Requires-Dist: sphinx-design; extra == "doc"
|
|
59
|
+
Requires-Dist: sphinx-copybutton; extra == "doc"
|
|
60
|
+
Requires-Dist: sphinx-click; extra == "doc"
|
|
61
|
+
Requires-Dist: autodoc-pydantic>=2.0; extra == "doc"
|
|
62
|
+
Requires-Dist: pyyaml>=6.0; extra == "doc"
|
|
63
|
+
Requires-Dist: fastapi>=0.110; extra == "doc"
|
|
64
|
+
Requires-Dist: uvicorn[standard]>=0.29; extra == "doc"
|
|
65
|
+
Requires-Dist: jinja2>=3.1; extra == "doc"
|
|
66
|
+
Dynamic: license-file
|
|
67
|
+
|
|
68
|
+
<picture>
|
|
69
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/ArthurBernard/Download_Crypto_Currencies_Data/develop/doc/source/_static/logo-dark-transparent.svg">
|
|
70
|
+
<img alt="dccd logo" src="https://raw.githubusercontent.com/ArthurBernard/Download_Crypto_Currencies_Data/develop/doc/source/_static/logo-light-transparent.svg" height="180px" align="left">
|
|
71
|
+
</picture>
|
|
72
|
+
|
|
73
|
+
# **Download Crypto-Currency Data** — v3
|
|
74
|
+
|
|
75
|
+
[](https://pypi.org/project/dccd/)
|
|
76
|
+
[](https://pypi.org/project/dccd/)
|
|
77
|
+
[](https://github.com/ArthurBernard/Download_Crypto_Currencies_Data/actions/workflows/ci.yml)
|
|
78
|
+
[](https://github.com/ArthurBernard/Download_Crypto_Currencies_Data/blob/master/LICENSE.txt)<br>
|
|
79
|
+
[](https://download-crypto-currencies-data.readthedocs.io/en/latest/)
|
|
80
|
+
[](https://codecov.io/gh/ArthurBernard/Download_Crypto_Currencies_Data)
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
**dccd** downloads crypto-currency market data (OHLCV, trades, order book)
|
|
85
|
+
from 7 exchanges via REST and WebSocket. Data is stored as Parquet files with
|
|
86
|
+
nanosecond-precision timestamps.
|
|
87
|
+
|
|
88
|
+
## Architecture (v3)
|
|
89
|
+
|
|
90
|
+
Hexagonal architecture — business logic is fully separated from interfaces:
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
Interfaces: CLI · HTTP API · Web UI · Python Client
|
|
94
|
+
↓
|
|
95
|
+
Application: backfill, stream, read, inventory
|
|
96
|
+
↓
|
|
97
|
+
Domain ← Sources (7 exchange adapters) ← Transport (httpx · WS · Paginator)
|
|
98
|
+
↓
|
|
99
|
+
Storage: ParquetStore + RunsStore (SQLite)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
- **Async-first** — httpx + websockets, one event loop; CLI via `asyncio.run`
|
|
103
|
+
- **Nanosecond timestamps** — uniform int64 UTC throughout the store
|
|
104
|
+
- **Generic Paginator** — no per-exchange chunking; Coinbase 300-limit is a capability declaration
|
|
105
|
+
- **NoCapability early** — Bybit no spot trades history, Kraken OHLC recent-only → clear error
|
|
106
|
+
- **Four iso-functional interfaces** — same operations everywhere (parity test enforces this)
|
|
107
|
+
|
|
108
|
+
## Supported exchanges
|
|
109
|
+
|
|
110
|
+
You pick a **data type** (OHLC · trades · order book) and an **operation** —
|
|
111
|
+
**backfill** (history) or **stream** (live):
|
|
112
|
+
|
|
113
|
+
| Exchange | Backfill (history) | Stream (live) |
|
|
114
|
+
|----------|--------------------|---------------|
|
|
115
|
+
| Binance | OHLC · trades · book | OHLC · trades · book |
|
|
116
|
+
| Coinbase | OHLC · book · trades *(recent)* | trades |
|
|
117
|
+
| Kraken | OHLC *(720 recent)* · trades · book | OHLC · trades · book |
|
|
118
|
+
| Bybit | OHLC · book | OHLC · trades · book |
|
|
119
|
+
| OKX | OHLC · trades · book | OHLC · trades · book |
|
|
120
|
+
| Bitfinex | OHLC · trades · book | OHLC · trades |
|
|
121
|
+
| BitMEX | OHLC *(1m/5m/1h/1d)* · trades · book | OHLC · trades · book |
|
|
122
|
+
|
|
123
|
+
Trades backfill is **cursor-paginated** (drains the full window, not just the
|
|
124
|
+
first page). *recent* = no deep history via the public API (a deeper request is
|
|
125
|
+
rejected/clamped early, never silently truncated); Bybit spot has no trade
|
|
126
|
+
history. **Order-book backfill** is a single snapshot — use a stream to record
|
|
127
|
+
the book over time. Stream channels are only listed where really implemented
|
|
128
|
+
(undeclared ones raise `NoCapability`).
|
|
129
|
+
|
|
130
|
+
### OHLC field fidelity
|
|
131
|
+
|
|
132
|
+
Not every exchange returns every OHLC field natively. Missing fields are stored
|
|
133
|
+
as `null` (never fabricated):
|
|
134
|
+
|
|
135
|
+
| Exchange | `quote_volume` | `trades` (count) |
|
|
136
|
+
|----------|----------------|------------------|
|
|
137
|
+
| Binance | ✅ native | ✅ native |
|
|
138
|
+
| Bybit / OKX | ✅ native | — null |
|
|
139
|
+
| Kraken | ✅ (vwap × volume, exact) | ✅ native |
|
|
140
|
+
| Coinbase / Bitfinex / BitMEX | — null | — null |
|
|
141
|
+
|
|
142
|
+
## Installation
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
# Core — Python 3.11+
|
|
146
|
+
pip install dccd
|
|
147
|
+
|
|
148
|
+
# With scheduler, CLI, and web UI
|
|
149
|
+
pip install "dccd[daemon]"
|
|
150
|
+
|
|
151
|
+
# Development
|
|
152
|
+
pip install "dccd[dev]"
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Quick start
|
|
156
|
+
|
|
157
|
+
### Python API
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
import asyncio
|
|
161
|
+
from dccd import Client
|
|
162
|
+
|
|
163
|
+
async def main():
|
|
164
|
+
async with Client() as c:
|
|
165
|
+
result = await c.backfill("binance", "BTC/USDT", data_type="ohlc", span=3600)
|
|
166
|
+
print(f"Wrote {result['rows_written']} rows")
|
|
167
|
+
for ds in c.inventory():
|
|
168
|
+
print(ds)
|
|
169
|
+
|
|
170
|
+
asyncio.run(main())
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### CLI
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
dccd validate --config config.yml # validate config
|
|
177
|
+
dccd backfill --config config.yml # run all backfill jobs
|
|
178
|
+
dccd backfill -e binance -s BTC/USDT --type ohlc --span 3600 # ad-hoc
|
|
179
|
+
dccd stream --config config.yml # run WebSocket stream jobs
|
|
180
|
+
dccd start --config config.yml # full daemon + UI
|
|
181
|
+
dccd ui --config config.yml # UI only (no scheduler)
|
|
182
|
+
dccd inventory --config config.yml # list stored datasets
|
|
183
|
+
dccd status --config config.yml # show recent runs
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Configuration (`config.yml`)
|
|
187
|
+
|
|
188
|
+
```yaml
|
|
189
|
+
settings:
|
|
190
|
+
data_path: ./data/crypto
|
|
191
|
+
timezone: UTC
|
|
192
|
+
ui_port: 8080
|
|
193
|
+
|
|
194
|
+
jobs:
|
|
195
|
+
- exchange: binance
|
|
196
|
+
pairs: [BTC/USDT, ETH/USDT]
|
|
197
|
+
data_type: ohlc
|
|
198
|
+
span: 3600
|
|
199
|
+
trigger_kind: interval
|
|
200
|
+
every: 3600
|
|
201
|
+
|
|
202
|
+
- exchange: kraken
|
|
203
|
+
pairs: [BTC/USD]
|
|
204
|
+
data_type: trades
|
|
205
|
+
operation: stream
|
|
206
|
+
trigger_kind: supervised
|
|
207
|
+
|
|
208
|
+
storage:
|
|
209
|
+
remotes:
|
|
210
|
+
- provider: rclone
|
|
211
|
+
remote: "mynas:crypto/"
|
|
212
|
+
sync_interval: 3600
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### HTTP API (when `dccd ui` or `dccd start` is running)
|
|
216
|
+
|
|
217
|
+
```
|
|
218
|
+
GET /api/operations list registered operations
|
|
219
|
+
POST /api/backfill start a backfill job
|
|
220
|
+
GET /api/backfill/{run_id} poll run status
|
|
221
|
+
GET /api/streams list stream jobs + state
|
|
222
|
+
POST /api/streams/start start a stream job
|
|
223
|
+
POST /api/streams/stop stop a stream job
|
|
224
|
+
POST /api/read read stored data (≤1 000 rows)
|
|
225
|
+
GET /api/events SSE stream of progress/log/status events
|
|
226
|
+
GET /api/inventory list all datasets
|
|
227
|
+
GET /health liveness check
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
## Data layout
|
|
231
|
+
|
|
232
|
+
```
|
|
233
|
+
{data_path}/
|
|
234
|
+
{exchange}/
|
|
235
|
+
ohlc/{pair}/{span}/YYYY.parquet # annual, ns timestamps
|
|
236
|
+
trades/{pair}/YYYY-MM-DD.parquet # daily
|
|
237
|
+
orderbook/{pair}/YYYY-MM-DD.parquet # daily
|
|
238
|
+
.dccd/runs.db # SQLite job run history
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
All timestamps are **nanoseconds UTC** (int64).
|
|
242
|
+
|
|
243
|
+
## Development
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
pip install -e ".[dev]"
|
|
247
|
+
pytest # 141 tests
|
|
248
|
+
ruff check dccd/ # lint
|
|
249
|
+
mypy dccd/ # type check (strict on domain/)
|
|
250
|
+
```
|