dccd 2.4.0__tar.gz → 3.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. {dccd-2.4.0 → dccd-3.0.0}/CHANGELOG.md +100 -0
  2. dccd-3.0.0/CLAUDE.md +276 -0
  3. dccd-3.0.0/PKG-INFO +250 -0
  4. dccd-3.0.0/README.md +183 -0
  5. dccd-3.0.0/dccd/__init__.py +273 -0
  6. dccd-3.0.0/dccd/application/__init__.py +1 -0
  7. dccd-3.0.0/dccd/application/config.py +347 -0
  8. dccd-3.0.0/dccd/application/events.py +167 -0
  9. dccd-3.0.0/dccd/application/jobs.py +97 -0
  10. dccd-3.0.0/dccd/application/monitor.py +68 -0
  11. dccd-3.0.0/dccd/application/operations.py +472 -0
  12. dccd-3.0.0/dccd/application/registry.py +94 -0
  13. dccd-3.0.0/dccd/application/scheduler.py +257 -0
  14. dccd-3.0.0/dccd/application/service_factory.py +79 -0
  15. dccd-3.0.0/dccd/domain/__init__.py +22 -0
  16. dccd-3.0.0/dccd/domain/capability.py +53 -0
  17. dccd-3.0.0/dccd/domain/dataset.py +53 -0
  18. dccd-3.0.0/dccd/domain/errors.py +34 -0
  19. dccd-3.0.0/dccd/domain/records.py +114 -0
  20. dccd-3.0.0/dccd/domain/symbol.py +63 -0
  21. dccd-3.0.0/dccd/domain/timeutils.py +298 -0
  22. dccd-3.0.0/dccd/domain/transforms.py +60 -0
  23. dccd-3.0.0/dccd/domain/types.py +27 -0
  24. dccd-3.0.0/dccd/interfaces/__init__.py +1 -0
  25. dccd-3.0.0/dccd/interfaces/api/__init__.py +5 -0
  26. dccd-3.0.0/dccd/interfaces/api/app.py +683 -0
  27. dccd-3.0.0/dccd/interfaces/cli/__init__.py +5 -0
  28. dccd-3.0.0/dccd/interfaces/cli/main.py +241 -0
  29. dccd-3.0.0/dccd/interfaces/ui/__init__.py +1 -0
  30. dccd-3.0.0/dccd/interfaces/ui/templates/base.html +326 -0
  31. dccd-3.0.0/dccd/interfaces/ui/templates/config.html +256 -0
  32. dccd-3.0.0/dccd/interfaces/ui/templates/dashboard.html +123 -0
  33. dccd-3.0.0/dccd/interfaces/ui/templates/data.html +106 -0
  34. dccd-3.0.0/dccd/interfaces/ui/templates/historical.html +434 -0
  35. dccd-3.0.0/dccd/interfaces/ui/templates/live.html +308 -0
  36. dccd-3.0.0/dccd/interfaces/ui/templates/logs.html +130 -0
  37. dccd-3.0.0/dccd/interfaces/ui/templates/storage.html +53 -0
  38. dccd-3.0.0/dccd/sources/__init__.py +23 -0
  39. dccd-3.0.0/dccd/sources/base.py +135 -0
  40. dccd-3.0.0/dccd/sources/binance.py +281 -0
  41. dccd-3.0.0/dccd/sources/bitfinex.py +280 -0
  42. dccd-3.0.0/dccd/sources/bitmex.py +283 -0
  43. dccd-3.0.0/dccd/sources/bybit.py +234 -0
  44. dccd-3.0.0/dccd/sources/coinbase.py +247 -0
  45. dccd-3.0.0/dccd/sources/kraken.py +341 -0
  46. dccd-3.0.0/dccd/sources/okx.py +247 -0
  47. dccd-3.0.0/dccd/sources/registry.py +111 -0
  48. dccd-3.0.0/dccd/storage/__init__.py +6 -0
  49. dccd-3.0.0/dccd/storage/parquet.py +531 -0
  50. dccd-3.0.0/dccd/storage/remote.py +72 -0
  51. dccd-3.0.0/dccd/storage/runs_sqlite.py +179 -0
  52. dccd-3.0.0/dccd/tests/v3/__init__.py +1 -0
  53. dccd-3.0.0/dccd/tests/v3/test_api.py +261 -0
  54. dccd-3.0.0/dccd/tests/v3/test_application.py +360 -0
  55. dccd-3.0.0/dccd/tests/v3/test_backfill_lookback.py +66 -0
  56. dccd-3.0.0/dccd/tests/v3/test_client.py +24 -0
  57. dccd-3.0.0/dccd/tests/v3/test_domain.py +259 -0
  58. dccd-3.0.0/dccd/tests/v3/test_domain_extended.py +230 -0
  59. dccd-3.0.0/dccd/tests/v3/test_network.py +100 -0
  60. dccd-3.0.0/dccd/tests/v3/test_sources.py +317 -0
  61. dccd-3.0.0/dccd/tests/v3/test_storage.py +179 -0
  62. dccd-3.0.0/dccd/tests/v3/test_storage_extended.py +151 -0
  63. dccd-3.0.0/dccd/tests/v3/test_transport.py +32 -0
  64. dccd-3.0.0/dccd/transport/__init__.py +7 -0
  65. dccd-3.0.0/dccd/transport/http.py +114 -0
  66. dccd-3.0.0/dccd/transport/paginate.py +212 -0
  67. dccd-3.0.0/dccd/transport/ratelimit.py +81 -0
  68. dccd-3.0.0/dccd/transport/ws.py +95 -0
  69. dccd-3.0.0/dccd.egg-info/PKG-INFO +250 -0
  70. dccd-3.0.0/dccd.egg-info/SOURCES.txt +80 -0
  71. dccd-3.0.0/dccd.egg-info/entry_points.txt +2 -0
  72. {dccd-2.4.0 → dccd-3.0.0}/dccd.egg-info/requires.txt +12 -19
  73. dccd-3.0.0/pyproject.toml +142 -0
  74. dccd-2.4.0/PKG-INFO +0 -300
  75. dccd-2.4.0/README.md +0 -227
  76. dccd-2.4.0/dccd/__init__.py +0 -43
  77. dccd-2.4.0/dccd/continuous_dl/__init__.py +0 -49
  78. dccd-2.4.0/dccd/continuous_dl/binance.py +0 -250
  79. dccd-2.4.0/dccd/continuous_dl/bitfinex.py +0 -370
  80. dccd-2.4.0/dccd/continuous_dl/bitmex.py +0 -350
  81. dccd-2.4.0/dccd/continuous_dl/bybit.py +0 -241
  82. dccd-2.4.0/dccd/continuous_dl/exchange.py +0 -346
  83. dccd-2.4.0/dccd/continuous_dl/kraken.py +0 -323
  84. dccd-2.4.0/dccd/continuous_dl/okx.py +0 -317
  85. dccd-2.4.0/dccd/daemon/__init__.py +0 -42
  86. dccd-2.4.0/dccd/daemon/api.py +0 -702
  87. dccd-2.4.0/dccd/daemon/backfill.py +0 -774
  88. dccd-2.4.0/dccd/daemon/cli.py +0 -629
  89. dccd-2.4.0/dccd/daemon/config.py +0 -353
  90. dccd-2.4.0/dccd/daemon/health.py +0 -245
  91. dccd-2.4.0/dccd/daemon/scheduler.py +0 -158
  92. dccd-2.4.0/dccd/daemon/storage.py +0 -118
  93. dccd-2.4.0/dccd/daemon/stream_manager.py +0 -514
  94. dccd-2.4.0/dccd/daemon/ui/__init__.py +0 -4
  95. dccd-2.4.0/dccd/daemon/ui/static/htmx.min.js +0 -1
  96. dccd-2.4.0/dccd/daemon/ui/templates/base.html +0 -160
  97. dccd-2.4.0/dccd/daemon/ui/templates/config.html +0 -225
  98. dccd-2.4.0/dccd/daemon/ui/templates/dashboard.html +0 -159
  99. dccd-2.4.0/dccd/daemon/ui/templates/inventory.html +0 -92
  100. dccd-2.4.0/dccd/daemon/ui/templates/jobs.html +0 -141
  101. dccd-2.4.0/dccd/daemon/ui/templates/logs.html +0 -19
  102. dccd-2.4.0/dccd/daemon/ui/templates/partials/backfill_modal.html +0 -123
  103. dccd-2.4.0/dccd/daemon/ui/templates/storage.html +0 -51
  104. dccd-2.4.0/dccd/histo_dl/__init__.py +0 -64
  105. dccd-2.4.0/dccd/histo_dl/binance.py +0 -204
  106. dccd-2.4.0/dccd/histo_dl/bybit.py +0 -221
  107. dccd-2.4.0/dccd/histo_dl/coinbase.py +0 -216
  108. dccd-2.4.0/dccd/histo_dl/exchange.py +0 -549
  109. dccd-2.4.0/dccd/histo_dl/kraken.py +0 -217
  110. dccd-2.4.0/dccd/histo_dl/okx.py +0 -216
  111. dccd-2.4.0/dccd/models.py +0 -85
  112. dccd-2.4.0/dccd/process_data.py +0 -139
  113. dccd-2.4.0/dccd/storage.py +0 -340
  114. dccd-2.4.0/dccd/tests/conftest.py +0 -222
  115. dccd-2.4.0/dccd/tests/test_backfill.py +0 -371
  116. dccd-2.4.0/dccd/tests/test_binance.py +0 -84
  117. dccd-2.4.0/dccd/tests/test_binance_ws.py +0 -187
  118. dccd-2.4.0/dccd/tests/test_bitfinex.py +0 -83
  119. dccd-2.4.0/dccd/tests/test_bitmex.py +0 -108
  120. dccd-2.4.0/dccd/tests/test_bybit.py +0 -77
  121. dccd-2.4.0/dccd/tests/test_bybit_ws.py +0 -114
  122. dccd-2.4.0/dccd/tests/test_coinbase.py +0 -96
  123. dccd-2.4.0/dccd/tests/test_daemon_api.py +0 -438
  124. dccd-2.4.0/dccd/tests/test_daemon_cli.py +0 -358
  125. dccd-2.4.0/dccd/tests/test_daemon_config.py +0 -216
  126. dccd-2.4.0/dccd/tests/test_daemon_health.py +0 -85
  127. dccd-2.4.0/dccd/tests/test_daemon_scheduler.py +0 -152
  128. dccd-2.4.0/dccd/tests/test_daemon_storage.py +0 -204
  129. dccd-2.4.0/dccd/tests/test_daemon_stream_manager.py +0 -536
  130. dccd-2.4.0/dccd/tests/test_date_time.py +0 -132
  131. dccd-2.4.0/dccd/tests/test_histo_dl.py +0 -84
  132. dccd-2.4.0/dccd/tests/test_io.py +0 -142
  133. dccd-2.4.0/dccd/tests/test_kraken.py +0 -80
  134. dccd-2.4.0/dccd/tests/test_kraken_ws.py +0 -163
  135. dccd-2.4.0/dccd/tests/test_models.py +0 -53
  136. dccd-2.4.0/dccd/tests/test_okx.py +0 -101
  137. dccd-2.4.0/dccd/tests/test_okx_ws.py +0 -152
  138. dccd-2.4.0/dccd/tests/test_process_data.py +0 -78
  139. dccd-2.4.0/dccd/tests/test_storage.py +0 -392
  140. dccd-2.4.0/dccd/tests/test_websocket.py +0 -69
  141. dccd-2.4.0/dccd/tools/__init__.py +0 -32
  142. dccd-2.4.0/dccd/tools/date_time.py +0 -290
  143. dccd-2.4.0/dccd/tools/io.py +0 -240
  144. dccd-2.4.0/dccd/tools/websocket.py +0 -165
  145. dccd-2.4.0/dccd.egg-info/PKG-INFO +0 -300
  146. dccd-2.4.0/dccd.egg-info/SOURCES.txt +0 -84
  147. dccd-2.4.0/dccd.egg-info/entry_points.txt +0 -2
  148. dccd-2.4.0/pyproject.toml +0 -103
  149. {dccd-2.4.0 → dccd-3.0.0}/CONTRIBUTING.md +0 -0
  150. {dccd-2.4.0 → dccd-3.0.0}/LICENSE.txt +0 -0
  151. {dccd-2.4.0 → dccd-3.0.0}/MANIFEST.in +0 -0
  152. {dccd-2.4.0/dccd/daemon → dccd-3.0.0/dccd/interfaces}/ui/static/favicon.svg +0 -0
  153. {dccd-2.4.0/dccd/daemon → dccd-3.0.0/dccd/interfaces}/ui/static/logo.svg +0 -0
  154. {dccd-2.4.0 → dccd-3.0.0}/dccd/tests/__init__.py +0 -0
  155. {dccd-2.4.0 → dccd-3.0.0}/dccd.egg-info/dependency_links.txt +0 -0
  156. {dccd-2.4.0 → dccd-3.0.0}/dccd.egg-info/top_level.txt +0 -0
  157. {dccd-2.4.0 → dccd-3.0.0}/setup.cfg +0 -0
@@ -6,6 +6,106 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ### Added
10
+
11
+ ### Changed
12
+
13
+ ### Fixed
14
+
15
+ ### Deprecated
16
+
17
+ ### Removed
18
+
19
+ ## [3.0.0] - 2026-06-07
20
+
21
+ ### Added
22
+
23
+ - Reworked web UI split by concern: a read-only enriched **Inventory** (data
24
+ freshness, OHLC gap detection, on-disk size, per-exchange totals) and two
25
+ collection pages — **Historical** and **Live** — each with data-type tabs and
26
+ per-exchange accordions. Jobs are created, edited (first date) and deleted
27
+ inline on the page; the Live page shows a real-time liveness indicator (last
28
+ trade/quote + age) fed by a throttled stream heartbeat over SSE. (#76)
29
+ - Job CRUD over the API: `POST /api/jobs/create|delete|update`, backed by
30
+ `AppConfig.add_job`/`remove_job`/`update_job_start` (persisted to `config.yml`).
31
+ - `ParquetStore.inventory()` now reports on-disk `bytes` and, for OHLC,
32
+ `expected_rows`/`missing_rows` (gap detection) at no extra read cost.
33
+ - `EventBus` fan-out to multiple SSE consumers and a `StreamSampleEvent`
34
+ liveness sample emitted (throttled) by `operations.stream`.
35
+ - UI polish: nav reorganised into `Collect ▾`/`System ▾` dropdowns; **Inventory**
36
+ renamed **Data** (`/inventory`→`/data`) with data-type tabs; reworked Live
37
+ liveness — seeded from the last on-disk data point so a page refresh shows
38
+ freshness immediately (no "waiting…"), span-aware dot, a freshness label that
39
+ is a live relative "N min ago" counter under 24h and an absolute date beyond,
40
+ and no noise age for fresh trades, with client-side number formatting;
41
+ order-book cadence (`snapshot_interval`) shown and settable;
42
+ Storage shows on-disk sizes; Dashboard adds a KPI bar and clearer sections;
43
+ Logs reoriented around recent runs with human run labels. The Config page no
44
+ longer duplicates job management (jobs live on Historical/Live; raw edit via
45
+ its JSON tab). `GET /api/jobs` now returns `start`/`every`/`snapshot_interval`/
46
+ `depth`. (#76)
47
+ - Cursor-based trades pagination: the engine now follows each adapter's opaque
48
+ cursor until a window is drained, instead of advancing by a fixed time window.
49
+ Fixes silent loss of >95% of trades on every liquid pair (all exchanges).
50
+ - UI: single-line top bar (brand + nav on one row); per-job **Schedule** on
51
+ Historical (a recurring backfill cron — Off/hourly/daily/custom, independent of
52
+ the span but `≥` it), reconciled live via `Scheduler.sync_intervals`; **Run
53
+ all** (global) and per-exchange run; timezone-aware date display driven by
54
+ `settings.timezone` (`local`/`UTC`/zoneinfo). OHLC removed from Live (collected
55
+ via Historical schedule); order books removed from Historical (no REST
56
+ history). `POST /api/jobs/update` now also sets `every` (schedule); new
57
+ `manual` trigger kind for never-auto-run jobs.
58
+ - Bearer auth on `/api/*` when `settings.ui_auth_token` is set, with a `?token=`
59
+ fallback for Server-Sent Events; `settings.ui_allow_origins` for opt-in CORS.
60
+ - Public async `Client.read()` and `Client.stream()`; `Client` wires adapters
61
+ via `service_factory` (single source of truth).
62
+ - Network-marked end-to-end tests (`pytest -m network`) validating pagination
63
+ against live exchange APIs.
64
+
65
+ ### Fixed
66
+
67
+ - Data loss on merge: writing into an existing legacy v2 Parquet file no longer
68
+ silently overwrites it; existing rows are canonicalised and preserved.
69
+ - Provenance is now actually written into the Parquet footer (was computed but
70
+ dropped).
71
+ - Custom ISO start date for backfill no longer raises (`JobParams.start`).
72
+ - Historical *first date* edit no longer reverts on reload: `GET /api/jobs` was
73
+ not returning `start`, so the UI reset the field after every refresh. (#76)
74
+ - Live order-book streams reported a crossed/incorrect best bid-ask: the WS
75
+ adapters emitted unmerged diff levels. binance/okx/bitmex now use full
76
+ snapshot channels (`@depth<N>`, `books5`, `orderBook10`) and bybit
77
+ reconstructs full state from snapshot+deltas (like kraken); best bid/ask is
78
+ computed defensively (`max` bid / `min` ask). (#76)
79
+ - Order-book Live liveness was incoherent with its cadence: it sampled the WS
80
+ every second while only one snapshot per ``snapshot_interval`` is captured. The
81
+ liveness sample is now emitted when a snapshot is actually saved, so its age
82
+ counts up to the interval and resets (matching the "Δ Ns" cadence). (#76)
83
+ - `dccd inventory` no longer crashes on OHLC datasets.
84
+ - Streams with no real implementation (Coinbase OHLC/order book, Bitfinex order
85
+ book) are rejected with `NoCapability` instead of "running" with zero output.
86
+ - `history="recent"` exchanges (Kraken OHLC) are clamped + warned instead of
87
+ silently returning wrong deep history.
88
+ - Kraken live OHLC timestamps were epoch 0 (1970): the WS adapter read a
89
+ non-existent `timestamp_open`; it now parses `interval_begin` (ISO-8601).
90
+ - `mypy dccd/` runs and passes again (it had been aborting on the dev Sphinx).
91
+
92
+ ### Changed / Removed
93
+
94
+ - Docs/examples swept to v3: README drops the removed `dccd migrate` command and
95
+ the "Migrating from v2" section; `examples/` rewritten to the v3 `Client` and
96
+ `dccd.application` daemon wiring with a v3 `jobs:` config, and the stale v2
97
+ `historical_downloader.ipynb` removed. (#82)
98
+ - Honest OHLC fidelity: Coinbase `quote_volume` is null (was a fabricated
99
+ `close×volume`); Kraken now fills its native trade count.
100
+ - Removed the dead `parallel` backfill flag, the unused `Page` model and the
101
+ unused bundled `htmx.min.js`.
102
+ - Removed the v2→v3 Parquet migration tool entirely: `dccd migrate`,
103
+ `POST /api/migrate`, the Storage-page migrate card, `dccd/storage/migrate.py`,
104
+ and the `migrate` operation in the registry.
105
+
106
+ > v3 is a full hexagonal rewrite. It **removes** the v2 daemon web UI shipped in
107
+ > 2.4.0 (`dccd/daemon/*`) and replaces it with `dccd/interfaces/` (api/cli/ui).
108
+
9
109
  ## [2.4.0] - 2026-06-04
10
110
 
11
111
  ### Added
dccd-3.0.0/CLAUDE.md ADDED
@@ -0,0 +1,276 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ > **Claude-oriented developer brief**: [`doc/dev/`](doc/dev/) contains an
6
+ > orientation pack written specifically for Claude Code — overview, architecture,
7
+ > design decisions & rationale, the per-exchange capability matrix, testing
8
+ > methodology + findings, current status, and the roadmap. Start at
9
+ > [`doc/dev/README.md`](doc/dev/README.md) for a fuller picture than this file
10
+ > gives. `CLAUDE.md` remains authoritative for commands and invariants.
11
+
12
+ ## Commands
13
+
14
+ ```bash
15
+ # Dev install (Python 3.11+)
16
+ pip install -e ".[dev]"
17
+
18
+ # Run full unit suite (network E2E excluded by default via -m 'not network')
19
+ pytest
20
+
21
+ # Run a single test file
22
+ pytest dccd/tests/v3/test_domain.py -v
23
+
24
+ # Real-exchange end-to-end tests (hit live APIs; opt-in)
25
+ pytest -m network
26
+
27
+ # Lint
28
+ ruff check dccd/
29
+
30
+ # Type check (strict on domain/; mypy assumes python 3.12 — see note below)
31
+ mypy dccd/
32
+
33
+ # Build Sphinx docs (must produce 0 warnings)
34
+ cd doc && make html
35
+
36
+ # UI smoke test (headless browser; start an isolated `dccd ui` first)
37
+ pip install playwright && playwright install chromium
38
+ python doc/dev/ui_smoke.py http://127.0.0.1:8137
39
+ ```
40
+
41
+ > **mypy** assumes `python_version = 3.12` (in `pyproject.toml`): the dev/docs
42
+ > env ships Sphinx whose source uses 3.12 `type` statements, which made mypy
43
+ > abort under 3.11. dccd supports 3.11–3.13, so 3.12 semantics are safe.
44
+
45
+ ## Git Flow
46
+
47
+ **Branch model:**
48
+ ```
49
+ master ← stable releases only (tagged vX.Y.Z)
50
+ └── develop ← integration branch
51
+ ├── feat/<topic> new feature or modernization axis
52
+ ├── fix/<topic> bug fix
53
+ ├── chore/<topic> tooling, CI, deps
54
+ └── docs/<topic> documentation only
55
+ ```
56
+
57
+ **Rules — always follow these before committing or pushing:**
58
+ 1. **Never commit directly to `master`.**
59
+ 2. **Never commit directly to `develop`** — always use a feature branch + PR.
60
+ 3. Branch off `develop`: `git checkout develop && git checkout -b feat/my-topic`
61
+ 4. Open a PR into `develop` when done. `develop` → `master` only at release time.
62
+
63
+ **Commit style (Conventional Commits):**
64
+ ```
65
+ feat: add Bybit futures OHLC capability
66
+ fix: correct paginator window for Coinbase
67
+ chore: upgrade httpx to 0.28
68
+ docs: update README for v3 install
69
+ ```
70
+
71
+ Do not add `Co-Authored-By` trailers to commits — this is a personal repo.
72
+
73
+ **Before every commit:** run `pytest`. It must pass.
74
+
75
+ **One PR = one concern, small and disposable.** Even a large plan ships as
76
+ *several* small atomic PRs — never one fourre-tout branch. A PR you couldn't throw
77
+ away without losing unrelated good work is too big: split it. This is what makes
78
+ `/abandon-task` (kill a bad PR, keep the lesson) viable.
79
+
80
+ ### Dev loop & docs of record
81
+
82
+ The iterative loop is tooled by skills, with three tracked docs as the sources of
83
+ truth:
84
+
85
+ | Doc | Holds | Updated by |
86
+ |-----|-------|-----------|
87
+ | `doc/dev/07-roadmap.md` | open work (single source) | `/pick-task` reads · `/finish-task`, `/abandon-task` update |
88
+ | `doc/dev/03-decisions.md` | the *why* — ADR journal (+ settled rationale) | `/finish-task` (accepted), `/abandon-task` (rejected/tombstone) |
89
+ | `doc/dev/06-status.md` | where things stand | `/finish-task`, `/groom-docs` |
90
+
91
+ `CHANGELOG.md` + git log stay authoritative for *what* shipped. The loop:
92
+ `/pick-task` (smallest slice → branch) → plan (split big plans into small PRs) →
93
+ `/finish-task` (tests, ADR entry, status, PR) **or** `/abandon-task` (salvage the
94
+ lesson + close the PR); `/groom-docs` periodically keeps `doc/dev/` lean and true.
95
+
96
+ **Model per task** (advisory — you set it via `/model`, or a skill spawns a
97
+ subagent with an explicit `model`; subagents otherwise *inherit* the parent):
98
+
99
+ | Model | For |
100
+ |-------|-----|
101
+ | `opus` | judgement, design, decisions, planning, review |
102
+ | `sonnet` | implementation — code, tests, docstrings |
103
+ | `haiku` | mechanical fan-out (doc scans, checklists) — spawn it explicitly as a subagent |
104
+
105
+ ## Architecture (v3 — hexagonal)
106
+
107
+ ### Three usage modes
108
+
109
+ 1. **Python API** — `async with Client() as c: await c.backfill(...)`.
110
+ 2. **CLI** — `dccd` command (backfill, stream, start, ui, …).
111
+ 3. **HTTP API / UI** — FastAPI server + Jinja2 templates (`dccd ui` or `dccd start`).
112
+
113
+ ### Package structure
114
+
115
+ ```
116
+ dccd/
117
+ domain/ # Pure, sync, zero I/O — models, capabilities, transforms
118
+ transport/ # Async HTTP (httpx), WebSocket base, RateLimiter, Paginator
119
+ sources/ # Exchange adapters (Source protocols + registry)
120
+ storage/ # ParquetStore, RunsStore (SQLite), RemoteStorage
121
+ application/ # Operations (backfill, stream), Scheduler, EventBus, Config
122
+ interfaces/
123
+ api/ # FastAPI app (1:1 with OperationRegistry)
124
+ cli/ # Typer CLI (asyncio.run)
125
+ ui/ # Jinja2 templates (pure HTTP client of api/)
126
+ tests/v3/ # All tests
127
+ ```
128
+
129
+ ### Domain layer (`domain/`)
130
+
131
+ Pure, synchronous, no I/O. Never import from transport/sources/storage.
132
+
133
+ | Module | Contents |
134
+ |--------|----------|
135
+ | `symbol.py` | `Symbol(base, quote)` — normalises XBT→BTC |
136
+ | `types.py` | `DataType` enum: `ohlc`, `trades`, `orderbook` |
137
+ | `records.py` | `OHLCBar`, `Trade`, `OrderBookSnapshot` (ns timestamps) |
138
+ | `dataset.py` | `DatasetId`, `Provenance` |
139
+ | `capability.py` | `Capability` — declared per adapter per (data_type × transport × mode) |
140
+ | `timeutils.py` | Helpers: `s_to_ns`, `align_ns`, `span_label`, `binance_interval`, … |
141
+ | `transforms.py` | `aggregate_ohlc(trades, span)` — pure computation |
142
+ | `errors.py` | `NoCapability`, `CoverageError` |
143
+
144
+ **All internal timestamps are nanoseconds UTC (int64).**
145
+
146
+ ### Transport layer (`transport/`)
147
+
148
+ Async only. Drives I/O; domain stays pure.
149
+
150
+ | Module | Contents |
151
+ |--------|----------|
152
+ | `http.py` | `AsyncHTTPClient` — httpx wrapper with retry/backoff |
153
+ | `ws.py` | `WebSocketBase` — `stream_raw()` async generator with exponential reconnect |
154
+ | `ratelimit.py` | `RateLimiter` — token-bucket per exchange |
155
+ | `paginate.py` | `paginate_ohlc`, `paginate_trades` — generic forward paginator |
156
+
157
+ **Paginator contract**: callers must pass a closure with `symbol` (and `span` for OHLC) already bound:
158
+
159
+ ```python
160
+ async def _fetch(start_ns, end_ns, limit):
161
+ return await adapter.fetch_ohlc_page(symbol, span, start_ns, end_ns, limit)
162
+ async for bar in paginate_ohlc(_fetch, cap, start_ns, end_ns, span):
163
+ ...
164
+ ```
165
+
166
+ ### Source adapters (`sources/`)
167
+
168
+ One class per exchange implementing Source protocol mixins:
169
+
170
+ - `OHLCHistory`, `TradesHistory`, `OrderBookSnapshotREST` — REST historical
171
+ - `OHLCLive`, `TradesLive`, `OrderBookLive` — WebSocket live
172
+
173
+ Adapters declare their capabilities via `capabilities() -> list[Capability]`.
174
+
175
+ | Exchange | Notes |
176
+ |----------|-------|
177
+ | `binance.py` | Full history OHLC+trades, depth 5000 |
178
+ | `coinbase.py` | 300 candles/req (Paginator handles automatically) |
179
+ | `kraken.py` | OHLC: 720 recent only (`history="recent"`); trades: full via `since` cursor |
180
+ | `bybit.py` | No spot trades history (capability not declared → `NoCapability` early) |
181
+ | `okx.py` | `history-candles` + `history-trades` for deep history |
182
+ | `bitfinex.py` | Up to 10 000 items per request |
183
+ | `bitmex.py` | Bucketed OHLC (1m/5m/1h/1d only), full trades |
184
+
185
+ **WS adapters** extend `WebSocketBase` and use `self.stream_raw()` (NOT a custom `_stream_raw` — the base handles reconnect).
186
+
187
+ ### Storage (`storage/`)
188
+
189
+ | Module | Contents |
190
+ |--------|----------|
191
+ | `parquet.py` | `ParquetStore` — read/write Parquet (ns, provenance, dedup); `inventory()` enriched with on-disk `bytes` and (OHLC only) `expected_rows`/`missing_rows` gap detection at zero extra I/O |
192
+ | `runs_sqlite.py` | `RunsStore` — SQLite WAL, append-only job run history |
193
+ | `remote.py` | `RemoteStorage` — rclone sync |
194
+
195
+ **Layout**: `{data_path}/{exchange}/ohlc/{pair}/{span}/YYYY.parquet` (annual) and `.../trades/{pair}/YYYY-MM-DD.parquet` (daily).
196
+
197
+ ### Application (`application/`)
198
+
199
+ | Module | Contents |
200
+ |--------|----------|
201
+ | `config.py` | `AppConfig` + `JobConfig` — Pydantic v2, validates exchange names + span-for-OHLC; runtime CRUD (`add_job`, `remove_job`, `update_job_start`) normalises mutations to single-pair entries (multi-pair configs are read but split on edit) |
202
+ | `events.py` | `EventBus` — pub/sub with **multi-queue fan-out** (`add_queue`/`remove_queue`, `enable_queue` alias) so Live + Logs + Dashboard consume concurrently; events: `ProgressEvent`, `LogEvent`, `StatusEvent`, `StreamSampleEvent` |
203
+ | `jobs.py` | `JobSpec`, `JobRun`, `Trigger`, `JobParams` |
204
+ | `operations.py` | `backfill()`, `stream()` (emits throttled `StreamSampleEvent` ≤1/s for Live liveness), `read()`, `inventory()` |
205
+ | `scheduler.py` | `Scheduler` — async interval/supervised/once job orchestration; `sync_streams()` reconciles stream workers and `sync_intervals()` reconciles recurring backfill loops (start/cancel/restart on cadence change, keyed by spec id) — both stop+drop deleted ones |
206
+ | `registry.py` | `REGISTRY` — maps operation names to schemas (parity enforcement) |
207
+ | `monitor.py` | `HealthMonitor` — EventBus subscriber, webhook alerts |
208
+ | `service_factory.py` | `build_registry()`, `build_store()`, `build_runs_store()` — **single source of truth for wiring** |
209
+
210
+ **Adding a new exchange**: add the adapter to `sources/`, register it in `service_factory.build_registry()`.
211
+
212
+ ### Interfaces (`interfaces/`)
213
+
214
+ - `api/app.py` — FastAPI `create_app()`, lifespan context manager, module-level Pydantic request models. Job CRUD lives here: `POST /api/jobs/{create,delete,update}` (body-based to allow `/`/`:` in ids), all routed through the async `_persist_and_refresh` helper (writes YAML, updates `app.state`, calls `scheduler.sync_streams` **and** `scheduler.sync_intervals` to reconcile recurring backfills live). `POST /api/jobs/update` edits `start` and/or the recurring `every` (schedule). `GET /api/jobs` exposes `start`/`every`/`trigger`/`snapshot_interval`/`depth` so the UI can render and preserve them. `POST /api/jobs/run` + `/api/jobs/run-all` trigger configured backfills on demand. SSE at `GET /api/events` uses `add_queue`/`remove_queue` for multi-consumer fan-out.
215
+ - `cli/main.py` — Typer commands, all import from `service_factory`
216
+ - `ui/` — Jinja2 templates + static files. Nav: `Dashboard` · `Data` flat, plus `Collect ▾` (Historical/Live) and `System ▾` (Logs/Config/Storage) dropdowns. Pages are **split by concern**:
217
+ - **Data** (`data.html`, route `/data`; `/inventory` 307-redirects here) — read-only view of what's on disk: DataType tabs → per-exchange accordions with totals, freshness dot, OHLC gap %, on-disk size, file count. No action buttons.
218
+ - **Historical** (`historical.html`) — backfill jobs (**OHLC + Trades only**; order books have no REST history): DataType tabs → exchange accordions → one row per dataset with editable `first_date` (defaults to the dataset's earliest stored bar), a **Schedule** select (Off/hourly/daily/custom → `every`; `manual` trigger when off), real coverage bar, inline Run/Delete. **Run all** (header) + per-exchange **Run all**. New jobs default to `manual`.
219
+ - **Live** (`live.html`) — stream jobs (**Trades + Order Book only**; OHLC is collected via the Historical schedule, not streamed): same tab/accordion shape, with a liveness indicator fed by `StreamSampleEvent` over SSE (numeric `value`/`bid`/`ask`, formatted client-side via `fmtNum`). Liveness is **seeded from the last on-disk point** (inventory `max_ts`) so a refresh shows freshness without waiting for a live sample. The dot's "fresh" window is span-aware (order-book `snapshot_interval` / short for trades); the freshness label is a relative "N ago" under 24h (`fmtFreshness`) and an absolute date beyond, or the last-run date-time when stopped. Cadence column + `snapshot_interval` field for order book. Inline Start/Stop/Delete.
220
+ - Single top bar carries the brand (logo · `dccd` · version) left and the nav right. Dates render in `settings.timezone` (`local`/`UTC`/zoneinfo) via `DCCD_TZ` in `fmtNs`/`fmtDate`; relative ages are tz-independent.
221
+ - `dashboard.html` (KPIs + Active now / Recent runs / Data), `logs.html` (recent runs first, live console secondary, human run labels), `config.html` (Settings incl. `timezone`/Alerts/Storage + Raw JSON — **no jobs form**; jobs are managed on Historical/Live), `storage.html` (sizes via `fmtBytes`; no migrate tool).
222
+
223
+ **UI↔API contract**: UI is a pure HTTP client of the API — no direct calls to application layer. Inline job create/edit/delete on Historical/Live go through `/api/jobs/*`; the Config page no longer manages jobs (edit the `jobs` array via its Raw JSON tab if needed).
224
+
225
+ ## Testing conventions
226
+
227
+ Tests live in `dccd/tests/v3/`. No doctests (removed `--doctest-modules` from `addopts`).
228
+
229
+ Coverage is measured on every run (`--cov=dccd`). CI matrix: Python 3.11–3.13.
230
+
231
+ Key test files:
232
+ - `test_domain.py` + `test_domain_extended.py` — domain models, transforms, config validation
233
+ - `test_sources.py` — capability declarations, protocol compliance, symbol mapping
234
+ - `test_storage.py` + `test_storage_extended.py` — ParquetStore, dedup keys, gap detection
235
+ - `test_application.py` — EventBus (multi-queue fan-out, `sample`), JobSpec, OperationRegistry parity, `AppConfig` job CRUD (incl. multi-pair split)
236
+ - `test_api.py` — FastAPI endpoints (incl. auth, backfill cancel, `/api/jobs/{create,delete,update}`, stream-delete unregisters worker) via TestClient
237
+ - `test_transport.py` — AsyncHTTPClient concurrency safety
238
+ - `test_backfill_lookback.py` — bounded default lookback per data type
239
+ - `test_network.py` — **real-exchange** E2E (`@pytest.mark.network`, opt-in)
240
+
241
+ **Test the chain on real data, not just the pieces.** A green unit suite missed
242
+ a backfill writing 0 rows, a store losing 58 % of trades, and a "Stop" button
243
+ that did nothing. For any data path: run the real operation, read what landed on
244
+ Parquet, and compare it to what was requested. Back up before any in-place
245
+ mutation. Full methodology + the catalogue of bugs this surfaced:
246
+ [`doc/dev/05-testing.md`](doc/dev/05-testing.md);
247
+ UI smoke test: `doc/dev/ui_smoke.py`.
248
+
249
+ ### Invariants — do not regress
250
+
251
+ - **Trades pagination is cursor-based** (per-adapter opaque cursor); never
252
+ advance trades by a fixed time window. OHLC snaps the start to the *bar* (span),
253
+ not the window.
254
+ - **Dedup key is per data type** (`ParquetStore._dedup_subset`): OHLC=`TS`,
255
+ trades=`tid`(else composite), order book=`(TS,side,price)`. `TS` alone is
256
+ unique only for OHLC.
257
+ - **Declared capabilities must be honest**: don't declare a WS channel or
258
+ `history` depth that isn't implemented — the engine rejects undeclared ones.
259
+ - **All timestamps ns UTC int64**; legacy frames pass through
260
+ `ParquetStore.canonicalize()` before any `concat`.
261
+ - **First `start=last` backfill is bounded** per type (`_DEFAULT_LOOKBACK_NS`);
262
+ backfills are cancellable (`stop_event` → `DELETE /api/backfill/{id}`).
263
+ - **Adapters share one reference-counted HTTP client** (concurrency-safe).
264
+ - **`ui_auth_token` enforces Bearer on `/api/*`**; CORS is not wildcard.
265
+ - **Stream worker set is reconciled, not append-only**: deleting a stream job must
266
+ `Scheduler.sync_streams()` so its worker is stopped and dropped (never left
267
+ running/controllable after its config is gone).
268
+ - **`EventBus` fans out to all registered queues**; SSE consumers register via
269
+ `add_queue` and must `remove_queue` on disconnect (done in the `/api/events`
270
+ `finally`).
271
+
272
+ ## Dependencies
273
+
274
+ Core (Python 3.11+): `httpx`, `websockets`, `pydantic>=2`, `polars`, `pyarrow`, `numpy`, `scipy`
275
+ Daemon extra: `pyyaml`, `typer`, `tqdm`, `uvicorn`, `fastapi`, `jinja2`, `apscheduler>=3.10,<4`
276
+ Dev extra: `pytest`, `pytest-asyncio`, `pytest-cov`, `ruff`, `mypy`, `interrogate`
dccd-3.0.0/PKG-INFO ADDED
@@ -0,0 +1,250 @@
1
+ Metadata-Version: 2.4
2
+ Name: dccd
3
+ Version: 3.0.0
4
+ Summary: Download Crypto Currency Data — hexagonal architecture, async-first.
5
+ Author-email: Arthur Bernard <arthur.bernard.92@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ArthurBernard/Download_Crypto_Currencies_Data
8
+ Project-URL: Documentation, https://download-crypto-currencies-data.readthedocs.io/
9
+ Project-URL: Changelog, https://github.com/ArthurBernard/Download_Crypto_Currencies_Data/blob/master/CHANGELOG.md
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Financial and Insurance Industry
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Database
19
+ Classifier: Topic :: Office/Business :: Financial
20
+ Requires-Python: >=3.11
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE.txt
23
+ Requires-Dist: httpx>=0.27
24
+ Requires-Dist: websockets>=12.0
25
+ Requires-Dist: pydantic>=2.0
26
+ Requires-Dist: polars>=0.20
27
+ Requires-Dist: pyarrow>=13
28
+ Requires-Dist: numpy>=1.26
29
+ Requires-Dist: scipy>=1.10
30
+ Requires-Dist: SQLAlchemy>=2.0
31
+ Provides-Extra: daemon
32
+ Requires-Dist: pyyaml>=6.0; extra == "daemon"
33
+ Requires-Dist: typer>=0.12; extra == "daemon"
34
+ Requires-Dist: tqdm>=4.64; extra == "daemon"
35
+ Requires-Dist: uvicorn[standard]>=0.29; extra == "daemon"
36
+ Requires-Dist: fastapi>=0.110; extra == "daemon"
37
+ Requires-Dist: jinja2>=3.1; extra == "daemon"
38
+ Requires-Dist: apscheduler<4,>=3.10; extra == "daemon"
39
+ Provides-Extra: dev
40
+ Requires-Dist: pytest>=7.4; extra == "dev"
41
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
42
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
43
+ Requires-Dist: ruff>=0.4; extra == "dev"
44
+ Requires-Dist: interrogate>=1.5; extra == "dev"
45
+ Requires-Dist: mypy>=1.0; extra == "dev"
46
+ Requires-Dist: pyyaml>=6.0; extra == "dev"
47
+ Requires-Dist: typer>=0.12; extra == "dev"
48
+ Requires-Dist: tqdm>=4.64; extra == "dev"
49
+ Requires-Dist: uvicorn[standard]>=0.29; extra == "dev"
50
+ Requires-Dist: fastapi>=0.110; extra == "dev"
51
+ Requires-Dist: jinja2>=3.1; extra == "dev"
52
+ Requires-Dist: apscheduler<4,>=3.10; extra == "dev"
53
+ Requires-Dist: httpx>=0.27; extra == "dev"
54
+ Provides-Extra: doc
55
+ Requires-Dist: sphinx>=7.0; extra == "doc"
56
+ Requires-Dist: furo; extra == "doc"
57
+ Requires-Dist: numpydoc; extra == "doc"
58
+ Requires-Dist: sphinx-design; extra == "doc"
59
+ Requires-Dist: sphinx-copybutton; extra == "doc"
60
+ Requires-Dist: sphinx-click; extra == "doc"
61
+ Requires-Dist: autodoc-pydantic>=2.0; extra == "doc"
62
+ Requires-Dist: pyyaml>=6.0; extra == "doc"
63
+ Requires-Dist: fastapi>=0.110; extra == "doc"
64
+ Requires-Dist: uvicorn[standard]>=0.29; extra == "doc"
65
+ Requires-Dist: jinja2>=3.1; extra == "doc"
66
+ Dynamic: license-file
67
+
68
+ <picture>
69
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/ArthurBernard/Download_Crypto_Currencies_Data/develop/doc/source/_static/logo-dark-transparent.svg">
70
+ <img alt="dccd logo" src="https://raw.githubusercontent.com/ArthurBernard/Download_Crypto_Currencies_Data/develop/doc/source/_static/logo-light-transparent.svg" height="180px" align="left">
71
+ </picture>
72
+
73
+ # **Download Crypto-Currency Data** — v3
74
+
75
+ [![Python versions](https://img.shields.io/pypi/pyversions/dccd)](https://pypi.org/project/dccd/)
76
+ [![PyPI](https://img.shields.io/pypi/v/dccd.svg)](https://pypi.org/project/dccd/)
77
+ [![CI](https://github.com/ArthurBernard/Download_Crypto_Currencies_Data/actions/workflows/ci.yml/badge.svg)](https://github.com/ArthurBernard/Download_Crypto_Currencies_Data/actions/workflows/ci.yml)
78
+ [![License](https://img.shields.io/github/license/ArthurBernard/Download_Crypto_Currencies_Data.svg)](https://github.com/ArthurBernard/Download_Crypto_Currencies_Data/blob/master/LICENSE.txt)<br>
79
+ [![Documentation](https://readthedocs.org/projects/download-crypto-currencies-data/badge/?version=latest)](https://download-crypto-currencies-data.readthedocs.io/en/latest/)
80
+ [![Coverage](https://codecov.io/gh/ArthurBernard/Download_Crypto_Currencies_Data/branch/master/graph/badge.svg)](https://codecov.io/gh/ArthurBernard/Download_Crypto_Currencies_Data)
81
+
82
+ ---
83
+
84
+ **dccd** downloads crypto-currency market data (OHLCV, trades, order book)
85
+ from 7 exchanges via REST and WebSocket. Data is stored as Parquet files with
86
+ nanosecond-precision timestamps.
87
+
88
+ ## Architecture (v3)
89
+
90
+ Hexagonal architecture — business logic is fully separated from interfaces:
91
+
92
+ ```
93
+ Interfaces: CLI · HTTP API · Web UI · Python Client
94
+
95
+ Application: backfill, stream, read, inventory
96
+
97
+ Domain ← Sources (7 exchange adapters) ← Transport (httpx · WS · Paginator)
98
+
99
+ Storage: ParquetStore + RunsStore (SQLite)
100
+ ```
101
+
102
+ - **Async-first** — httpx + websockets, one event loop; CLI via `asyncio.run`
103
+ - **Nanosecond timestamps** — uniform int64 UTC throughout the store
104
+ - **Generic Paginator** — no per-exchange chunking; Coinbase 300-limit is a capability declaration
105
+ - **NoCapability early** — Bybit no spot trades history, Kraken OHLC recent-only → clear error
106
+ - **Four iso-functional interfaces** — same operations everywhere (parity test enforces this)
107
+
108
+ ## Supported exchanges
109
+
110
+ You pick a **data type** (OHLC · trades · order book) and an **operation** —
111
+ **backfill** (history) or **stream** (live):
112
+
113
+ | Exchange | Backfill (history) | Stream (live) |
114
+ |----------|--------------------|---------------|
115
+ | Binance | OHLC · trades · book | OHLC · trades · book |
116
+ | Coinbase | OHLC · book · trades *(recent)* | trades |
117
+ | Kraken | OHLC *(720 recent)* · trades · book | OHLC · trades · book |
118
+ | Bybit | OHLC · book | OHLC · trades · book |
119
+ | OKX | OHLC · trades · book | OHLC · trades · book |
120
+ | Bitfinex | OHLC · trades · book | OHLC · trades |
121
+ | BitMEX | OHLC *(1m/5m/1h/1d)* · trades · book | OHLC · trades · book |
122
+
123
+ Trades backfill is **cursor-paginated** (drains the full window, not just the
124
+ first page). *recent* = no deep history via the public API (a deeper request is
125
+ rejected/clamped early, never silently truncated); Bybit spot has no trade
126
+ history. **Order-book backfill** is a single snapshot — use a stream to record
127
+ the book over time. Stream channels are only listed where really implemented
128
+ (undeclared ones raise `NoCapability`).
129
+
130
+ ### OHLC field fidelity
131
+
132
+ Not every exchange returns every OHLC field natively. Missing fields are stored
133
+ as `null` (never fabricated):
134
+
135
+ | Exchange | `quote_volume` | `trades` (count) |
136
+ |----------|----------------|------------------|
137
+ | Binance | ✅ native | ✅ native |
138
+ | Bybit / OKX | ✅ native | — null |
139
+ | Kraken | ✅ (vwap × volume, exact) | ✅ native |
140
+ | Coinbase / Bitfinex / BitMEX | — null | — null |
141
+
142
+ ## Installation
143
+
144
+ ```bash
145
+ # Core — Python 3.11+
146
+ pip install dccd
147
+
148
+ # With scheduler, CLI, and web UI
149
+ pip install "dccd[daemon]"
150
+
151
+ # Development
152
+ pip install "dccd[dev]"
153
+ ```
154
+
155
+ ## Quick start
156
+
157
+ ### Python API
158
+
159
+ ```python
160
+ import asyncio
161
+ from dccd import Client
162
+
163
+ async def main():
164
+ async with Client() as c:
165
+ result = await c.backfill("binance", "BTC/USDT", data_type="ohlc", span=3600)
166
+ print(f"Wrote {result['rows_written']} rows")
167
+ for ds in c.inventory():
168
+ print(ds)
169
+
170
+ asyncio.run(main())
171
+ ```
172
+
173
+ ### CLI
174
+
175
+ ```bash
176
+ dccd validate --config config.yml # validate config
177
+ dccd backfill --config config.yml # run all backfill jobs
178
+ dccd backfill -e binance -s BTC/USDT --type ohlc --span 3600 # ad-hoc
179
+ dccd stream --config config.yml # run WebSocket stream jobs
180
+ dccd start --config config.yml # full daemon + UI
181
+ dccd ui --config config.yml # UI only (no scheduler)
182
+ dccd inventory --config config.yml # list stored datasets
183
+ dccd status --config config.yml # show recent runs
184
+ ```
185
+
186
+ ### Configuration (`config.yml`)
187
+
188
+ ```yaml
189
+ settings:
190
+ data_path: ./data/crypto
191
+ timezone: UTC
192
+ ui_port: 8080
193
+
194
+ jobs:
195
+ - exchange: binance
196
+ pairs: [BTC/USDT, ETH/USDT]
197
+ data_type: ohlc
198
+ span: 3600
199
+ trigger_kind: interval
200
+ every: 3600
201
+
202
+ - exchange: kraken
203
+ pairs: [BTC/USD]
204
+ data_type: trades
205
+ operation: stream
206
+ trigger_kind: supervised
207
+
208
+ storage:
209
+ remotes:
210
+ - provider: rclone
211
+ remote: "mynas:crypto/"
212
+ sync_interval: 3600
213
+ ```
214
+
215
+ ### HTTP API (when `dccd ui` or `dccd start` is running)
216
+
217
+ ```
218
+ GET /api/operations list registered operations
219
+ POST /api/backfill start a backfill job
220
+ GET /api/backfill/{run_id} poll run status
221
+ GET /api/streams list stream jobs + state
222
+ POST /api/streams/start start a stream job
223
+ POST /api/streams/stop stop a stream job
224
+ POST /api/read read stored data (≤1 000 rows)
225
+ GET /api/events SSE stream of progress/log/status events
226
+ GET /api/inventory list all datasets
227
+ GET /health liveness check
228
+ ```
229
+
230
+ ## Data layout
231
+
232
+ ```
233
+ {data_path}/
234
+ {exchange}/
235
+ ohlc/{pair}/{span}/YYYY.parquet # annual, ns timestamps
236
+ trades/{pair}/YYYY-MM-DD.parquet # daily
237
+ orderbook/{pair}/YYYY-MM-DD.parquet # daily
238
+ .dccd/runs.db # SQLite job run history
239
+ ```
240
+
241
+ All timestamps are **nanoseconds UTC** (int64).
242
+
243
+ ## Development
244
+
245
+ ```bash
246
+ pip install -e ".[dev]"
247
+ pytest # 141 tests
248
+ ruff check dccd/ # lint
249
+ mypy dccd/ # type check (strict on domain/)
250
+ ```