dccd 3.0.0__tar.gz → 3.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dccd-3.0.0 → dccd-3.2.0}/CHANGELOG.md +95 -0
- {dccd-3.0.0 → dccd-3.2.0}/CLAUDE.md +23 -7
- {dccd-3.0.0 → dccd-3.2.0}/PKG-INFO +1 -1
- {dccd-3.0.0 → dccd-3.2.0}/dccd/__init__.py +14 -3
- {dccd-3.0.0 → dccd-3.2.0}/dccd/application/config.py +3 -1
- {dccd-3.0.0 → dccd-3.2.0}/dccd/application/monitor.py +9 -4
- {dccd-3.0.0 → dccd-3.2.0}/dccd/application/operations.py +109 -2
- {dccd-3.0.0 → dccd-3.2.0}/dccd/application/scheduler.py +87 -1
- {dccd-3.0.0 → dccd-3.2.0}/dccd/application/service_factory.py +57 -1
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/api/app.py +80 -1
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/cli/main.py +29 -3
- dccd-3.2.0/dccd/interfaces/ui/templates/storage.html +108 -0
- dccd-3.2.0/dccd/storage/coverage_sqlite.py +134 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/storage/parquet.py +5 -0
- dccd-3.2.0/dccd/storage/purge.py +105 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/storage/remote.py +40 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/tests/v3/test_api.py +49 -1
- {dccd-3.0.0 → dccd-3.2.0}/dccd/tests/v3/test_application.py +62 -0
- dccd-3.2.0/dccd/tests/v3/test_coverage.py +140 -0
- dccd-3.2.0/dccd/tests/v3/test_purge.py +104 -0
- dccd-3.2.0/dccd/tests/v3/test_remote_sync.py +172 -0
- dccd-3.2.0/dccd/tests/v3/test_restart.py +77 -0
- dccd-3.2.0/dccd/tests/v3/test_restore.py +79 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd.egg-info/PKG-INFO +1 -1
- {dccd-3.0.0 → dccd-3.2.0}/dccd.egg-info/SOURCES.txt +7 -0
- {dccd-3.0.0 → dccd-3.2.0}/pyproject.toml +1 -1
- dccd-3.0.0/dccd/interfaces/ui/templates/storage.html +0 -53
- {dccd-3.0.0 → dccd-3.2.0}/CONTRIBUTING.md +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/LICENSE.txt +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/MANIFEST.in +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/README.md +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/application/__init__.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/application/events.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/application/jobs.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/application/registry.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/domain/__init__.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/domain/capability.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/domain/dataset.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/domain/errors.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/domain/records.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/domain/symbol.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/domain/timeutils.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/domain/transforms.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/domain/types.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/__init__.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/api/__init__.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/cli/__init__.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/ui/__init__.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/ui/static/favicon.svg +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/ui/static/logo.svg +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/base.html +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/config.html +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/dashboard.html +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/data.html +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/historical.html +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/live.html +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/logs.html +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/sources/__init__.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/sources/base.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/sources/binance.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/sources/bitfinex.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/sources/bitmex.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/sources/bybit.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/sources/coinbase.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/sources/kraken.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/sources/okx.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/sources/registry.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/storage/__init__.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/storage/runs_sqlite.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/tests/__init__.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/tests/v3/__init__.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/tests/v3/test_backfill_lookback.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/tests/v3/test_client.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/tests/v3/test_domain.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/tests/v3/test_domain_extended.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/tests/v3/test_network.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/tests/v3/test_sources.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/tests/v3/test_storage.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/tests/v3/test_storage_extended.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/tests/v3/test_transport.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/transport/__init__.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/transport/http.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/transport/paginate.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/transport/ratelimit.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd/transport/ws.py +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd.egg-info/dependency_links.txt +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd.egg-info/entry_points.txt +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd.egg-info/requires.txt +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/dccd.egg-info/top_level.txt +0 -0
- {dccd-3.0.0 → dccd-3.2.0}/setup.cfg +0 -0
|
@@ -16,6 +16,101 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
16
16
|
|
|
17
17
|
### Removed
|
|
18
18
|
|
|
19
|
+
## [3.2.0] - 2026-06-10
|
|
20
|
+
|
|
21
|
+
### Added
|
|
22
|
+
|
|
23
|
+
- Dev workflow: hierarchical, file-based **plan trees** under `doc/dev/plans/`
|
|
24
|
+
(committed) with a `<plans_dir>` descriptor key. A roadmap item expands into a
|
|
25
|
+
global `00-plan.md` + precise leaf specs (adaptive depth); each leaf declares a
|
|
26
|
+
`complexity` that derives its execution model (`low→haiku`/`medium→sonnet`/
|
|
27
|
+
`high→opus`). New `/plan` (build the tree + open the plan PR first) and
|
|
28
|
+
`/execute-leaf` (spawn an agent per leaf, verify on real data) skills; `/pick-task`,
|
|
29
|
+
`/finish-task`, `/abandon-task`, `/release` and `CLAUDE.md` updated to chain
|
|
30
|
+
through it. Backward-compatible: no `plans_dir` ⇒ the old plan-mode loop. (#94)
|
|
31
|
+
- Restart/reboot safety verified on a real server `systemctl reboot`: the daemon
|
|
32
|
+
auto-starts, the trades stream reconnects, the interval backfill re-arms, the
|
|
33
|
+
`RunsStore` (SQLite WAL) survives and appends, and the coverage manifest keeps the
|
|
34
|
+
resume cursor (no gap). New `test_restart.py` guards RunsStore persistence across a
|
|
35
|
+
reopen and scheduler interval re-arm from config. (#99)
|
|
36
|
+
- Ops for unattended deploy: `HealthMonitor` is now wired into the daemon (CLI
|
|
37
|
+
`dccd start` and the standalone API) — it was implemented but never instantiated,
|
|
38
|
+
so webhook alerts never fired. Docker `HEALTHCHECK` on `/health`, commented
|
|
39
|
+
systemd resource limits, and journald log-rotation guidance. Verified live on a
|
|
40
|
+
server: a failing job past the threshold delivered a real webhook POST, and the
|
|
41
|
+
container reports `healthy`. (#100)
|
|
42
|
+
- Docs: new `how-to/deploy` guide — a blessed, host-validated path to run dccd
|
|
43
|
+
unattended on a server (systemd + venv recommended, Docker alternative), covering
|
|
44
|
+
install, secret injection, `/health`, restart/reboot safety, logs, alerts and the
|
|
45
|
+
old-CPU caveat. Completes **Epic A** (run on a remote server). (#102)
|
|
46
|
+
|
|
47
|
+
### Changed
|
|
48
|
+
|
|
49
|
+
- `Dockerfile`: pin the base image to a digest (reproducible builds) and add a
|
|
50
|
+
`POLARS_VARIANT` build arg — on CPUs without AVX2 (older servers) the default
|
|
51
|
+
`polars` wheel crashes with SIGILL, so
|
|
52
|
+
`docker build --build-arg POLARS_VARIANT=polars-lts-cpu` installs the LTS-CPU
|
|
53
|
+
build instead. Verified end-to-end on a real host (build, run, `/health`, Bearer
|
|
54
|
+
auth, a backfill writing correct OHLC to the `/data` volume). (#97)
|
|
55
|
+
- Docs: `how-to/protect-ui` now covers deploy-time secret injection — the token and
|
|
56
|
+
`rclone.conf` are mounted at run time, never baked into the image (verified on the
|
|
57
|
+
built image: `docker history`/filesystem show no config); the YAML loader does not
|
|
58
|
+
expand `${ENV}` placeholders, so the mounted-file pattern is the blessed one. (#101)
|
|
59
|
+
|
|
60
|
+
### Fixed
|
|
61
|
+
|
|
62
|
+
- `deploy/dccd.service`: `ExecStart` pointed at `/usr/local/bin/dccd` and failed
|
|
63
|
+
`systemd-analyze verify`; it now uses a venv path (`/opt/dccd/venv/bin/dccd`) with
|
|
64
|
+
`StateDirectory=dccd` (systemd owns `/var/lib/dccd`). The install spec dropped the
|
|
65
|
+
non-existent `ui` extra (`.[daemon,ui]` → `.[daemon]`, also in the `Dockerfile`).
|
|
66
|
+
Verified a real system-wide install: `systemd-analyze verify` passes, the service
|
|
67
|
+
is active, auto-restarts after SIGKILL, and a backfill writes correct OHLC under
|
|
68
|
+
the hardened `/var/lib/dccd/data` (`ProtectSystem=strict`). (#98)
|
|
69
|
+
- `HealthMonitor` counted consecutive failures per `run_id`, but each backfill run
|
|
70
|
+
has a unique id (`{spec}@{ts}`), so repeated failures never accumulated (only
|
|
71
|
+
streams, with a stable `@stream` id, could alert). It now keys on the job
|
|
72
|
+
(spec id) so repeated backfill failures trip the alert. (#100)
|
|
73
|
+
|
|
74
|
+
### Deprecated
|
|
75
|
+
|
|
76
|
+
### Removed
|
|
77
|
+
|
|
78
|
+
## [3.1.0] - 2026-06-09
|
|
79
|
+
|
|
80
|
+
### Added
|
|
81
|
+
|
|
82
|
+
- `dccd start` now schedules rclone remote sync: when `storage.remotes` is set,
|
|
83
|
+
the daemon mirrors the store off-box every `storage.sync_interval` seconds with
|
|
84
|
+
exponential backoff, persisted run history (`sync` runs in `RunsStore`) and a
|
|
85
|
+
live `remote-sync` EventBus status. Previously `RemoteStorage` was implemented
|
|
86
|
+
but never driven — a server synced nothing. (#86)
|
|
87
|
+
- Storage page surfaces remote sync: last/next sync, status, configured remotes
|
|
88
|
+
and synced volume, plus a **Sync now** button — backed by
|
|
89
|
+
`GET`/`POST /api/storage/sync`. The shared `operations.sync_remote` primitive
|
|
90
|
+
records each cycle, so the manual button and the scheduled loop stay in sync. (#87)
|
|
91
|
+
- Coverage manifest (`CoverageStore`, SQLite under `.dccd/`): backfill records each
|
|
92
|
+
dataset's `[min_ts, max_ts]` extent, and `start="last"` falls back to the
|
|
93
|
+
manifest's `max_ts` when no local file exists — so local data can be dropped to
|
|
94
|
+
free disk without forcing a re-download on the next backfill. (#88)
|
|
95
|
+
- Free-space purge: `storage.min_free_gb` (default `0` = off). After each
|
|
96
|
+
successful sync the daemon drops the oldest already-synced Parquet files until
|
|
97
|
+
free space is back above the floor (the coverage manifest keeps the resume
|
|
98
|
+
cursor, `.dccd/` is never touched). (#89)
|
|
99
|
+
- Read-through restore: reading a dataset whose local Parquet was purged now pulls
|
|
100
|
+
it back from the remote (`rclone copy`) before loading, so a purge is
|
|
101
|
+
transparent to readers (`Client.read`, `POST /api/read`). (#90)
|
|
102
|
+
- Docs: the `how-to/sync-remote` guide now covers rclone provisioning, the
|
|
103
|
+
`min_free_gb` free-space purge, read-through restore, and restore/integrity
|
|
104
|
+
(`rclone copy`/`rclone check`) — completing Epic C (tiered storage). (#91)
|
|
105
|
+
|
|
106
|
+
### Changed
|
|
107
|
+
|
|
108
|
+
### Fixed
|
|
109
|
+
|
|
110
|
+
### Deprecated
|
|
111
|
+
|
|
112
|
+
### Removed
|
|
113
|
+
|
|
19
114
|
## [3.0.0] - 2026-06-07
|
|
20
115
|
|
|
21
116
|
### Added
|
|
@@ -79,22 +79,38 @@ away without losing unrelated good work is too big: split it. This is what makes
|
|
|
79
79
|
|
|
80
80
|
### Dev loop & docs of record
|
|
81
81
|
|
|
82
|
-
The iterative loop is tooled by skills, with
|
|
82
|
+
The iterative loop is tooled by skills, with four tracked docs as the sources of
|
|
83
83
|
truth:
|
|
84
84
|
|
|
85
85
|
| Doc | Holds | Updated by |
|
|
86
86
|
|-----|-------|-----------|
|
|
87
|
-
| `doc/dev/07-roadmap.md` | open work
|
|
87
|
+
| `doc/dev/07-roadmap.md` | open work — single source *index* | `/pick-task` reads · `/finish-task`, `/abandon-task` update |
|
|
88
|
+
| `doc/dev/plans/<epic>/` | open work *detail* — durable hierarchical plan trees (global + leaf specs) | `/plan` writes · `/execute-leaf` reads · `/finish-task`/`/abandon-task` archive |
|
|
88
89
|
| `doc/dev/03-decisions.md` | the *why* — ADR journal (+ settled rationale) | `/finish-task` (accepted), `/abandon-task` (rejected/tombstone) |
|
|
89
90
|
| `doc/dev/06-status.md` | where things stand | `/finish-task`, `/groom-docs` |
|
|
90
91
|
|
|
91
92
|
`CHANGELOG.md` + git log stay authoritative for *what* shipped. The loop:
|
|
92
|
-
`/pick-task` (smallest slice → branch) → plan (split big plans into small PRs) →
|
|
93
|
-
`/finish-task` (tests, ADR entry, status, PR) **or** `/abandon-task` (salvage the
|
|
94
|
-
lesson + close the PR); `/groom-docs` periodically keeps `doc/dev/` lean and true.
|
|
95
93
|
|
|
96
|
-
|
|
97
|
-
|
|
94
|
+
`/pick-task` (smallest coherent slice; **no branch yet**) →
|
|
95
|
+
`/plan` (decompose into a `doc/dev/plans/<epic>/` tree — adaptive depth: a single
|
|
96
|
+
leaf for a trivial task, a global `00-plan.md` + leaves otherwise — and open the
|
|
97
|
+
**plan PR** that lands the tree on `develop` first) →
|
|
98
|
+
`/execute-leaf <epic> next` (cut the leaf branch, **spawn an agent at the model
|
|
99
|
+
derived from the leaf's `complexity`**, which implements + tests + **verifies on
|
|
100
|
+
real data**, then reports) →
|
|
101
|
+
`/finish-task` (tests, ADR, CHANGELOG, leaf PR, archive the leaf, tick the global
|
|
102
|
+
checklist) → … per leaf … → last leaf removes the roadmap line → `/release`.
|
|
103
|
+
|
|
104
|
+
`/abandon-task` salvages the lesson + closes a bad PR (tombstones the leaf);
|
|
105
|
+
`/groom-docs` periodically keeps `doc/dev/` lean and true. The full format lives in
|
|
106
|
+
[`doc/dev/plans/README.md`](doc/dev/plans/README.md). The workflow is
|
|
107
|
+
backward-compatible: a repo whose `.claude/workflow.json` has **no `plans_dir`**
|
|
108
|
+
falls back to the older `/pick-task → plan mode → /finish-task` loop.
|
|
109
|
+
|
|
110
|
+
**Model per task** (advisory — you set it via `/model`, a skill spawns a subagent
|
|
111
|
+
with an explicit `model`, or a plan **leaf's `complexity` derives it**:
|
|
112
|
+
`low→haiku`, `medium→sonnet`, `high→opus`; subagents otherwise *inherit* the
|
|
113
|
+
parent):
|
|
98
114
|
|
|
99
115
|
| Model | For |
|
|
100
116
|
|-------|-----|
|
|
@@ -70,6 +70,8 @@ class Client:
|
|
|
70
70
|
self._config: AppConfig | None = None
|
|
71
71
|
self._store: ParquetStore | None = None
|
|
72
72
|
self._registry: SourceRegistry | None = None
|
|
73
|
+
self._coverage_store: Any = None
|
|
74
|
+
self._remote: Any = None
|
|
73
75
|
|
|
74
76
|
def _require_ready(self) -> tuple["SourceRegistry", "ParquetStore"]:
|
|
75
77
|
if self._registry is None or self._store is None:
|
|
@@ -78,7 +80,12 @@ class Client:
|
|
|
78
80
|
|
|
79
81
|
async def __aenter__(self) -> "Client":
|
|
80
82
|
from dccd.application.config import AppConfig, load_config, resolve_config_path
|
|
81
|
-
from dccd.application.service_factory import
|
|
83
|
+
from dccd.application.service_factory import (
|
|
84
|
+
build_coverage_store,
|
|
85
|
+
build_registry,
|
|
86
|
+
build_remote,
|
|
87
|
+
build_store,
|
|
88
|
+
)
|
|
82
89
|
|
|
83
90
|
try:
|
|
84
91
|
path = resolve_config_path(self._config_path)
|
|
@@ -88,6 +95,8 @@ class Client:
|
|
|
88
95
|
|
|
89
96
|
# Single source of truth for adapter wiring — same as CLI and API.
|
|
90
97
|
self._store = build_store(self._config.settings.data_path)
|
|
98
|
+
self._coverage_store = build_coverage_store(self._config.settings.data_path)
|
|
99
|
+
self._remote = build_remote(self._config)
|
|
91
100
|
self._registry = build_registry()
|
|
92
101
|
return self
|
|
93
102
|
|
|
@@ -156,7 +165,8 @@ class Client:
|
|
|
156
165
|
origin="runtime",
|
|
157
166
|
)
|
|
158
167
|
registry, store = self._require_ready()
|
|
159
|
-
return await do_backfill(spec, registry=registry, store=store
|
|
168
|
+
return await do_backfill(spec, registry=registry, store=store,
|
|
169
|
+
coverage_store=self._coverage_store)
|
|
160
170
|
|
|
161
171
|
async def stream(self, exchange: str, symbol: str, data_type: str = "trades",
|
|
162
172
|
span: int | None = None, depth: int | None = None,
|
|
@@ -249,7 +259,8 @@ class Client:
|
|
|
249
259
|
_, store = self._require_ready()
|
|
250
260
|
target = JobTarget(exchange=exchange, symbol=Symbol.parse(symbol),
|
|
251
261
|
data_type=DataType(data_type), span=span)
|
|
252
|
-
return cast("pl.DataFrame", do_read(target, store=store, start_ns=start_ns,
|
|
262
|
+
return cast("pl.DataFrame", do_read(target, store=store, start_ns=start_ns,
|
|
263
|
+
end_ns=end_ns, remote=self._remote))
|
|
253
264
|
|
|
254
265
|
def inventory(self) -> list[dict[str, Any]]:
|
|
255
266
|
"""List every stored dataset with its coverage.
|
|
@@ -70,10 +70,12 @@ class RemoteConfig(BaseModel):
|
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
class StorageConfig(BaseModel):
|
|
73
|
-
"""Storage settings: local path, rclone remotes,
|
|
73
|
+
"""Storage settings: local path, rclone remotes, sync interval, and the
|
|
74
|
+
free-space floor (GiB) that triggers purging already-synced local files."""
|
|
74
75
|
local_path: str = ""
|
|
75
76
|
remotes: list[RemoteConfig] = Field(default_factory=list)
|
|
76
77
|
sync_interval: int = 3600
|
|
78
|
+
min_free_gb: float = 0.0
|
|
77
79
|
|
|
78
80
|
|
|
79
81
|
class AlertConfig(BaseModel):
|
|
@@ -41,13 +41,18 @@ class HealthMonitor:
|
|
|
41
41
|
def _on_event(self, event: Event) -> None:
|
|
42
42
|
if not isinstance(event, StatusEvent):
|
|
43
43
|
return
|
|
44
|
+
# Count failures per *job*, not per run: a run_id is `{spec_id}@{run}` and
|
|
45
|
+
# each backfill run is unique, so keying on run_id would never accumulate
|
|
46
|
+
# across runs (only streams reuse `{spec_id}@stream`). Key on the spec_id
|
|
47
|
+
# prefix so repeated failures of the same job trip the alert.
|
|
48
|
+
key = event.run_id.split("@", 1)[0]
|
|
44
49
|
if event.state == "failed":
|
|
45
|
-
self._consecutive[
|
|
46
|
-
count = self._consecutive[
|
|
50
|
+
self._consecutive[key] += 1
|
|
51
|
+
count = self._consecutive[key]
|
|
47
52
|
if count >= self._max_errors:
|
|
48
|
-
self._alert(
|
|
53
|
+
self._alert(key, count)
|
|
49
54
|
elif event.state == "succeeded":
|
|
50
|
-
self._consecutive[
|
|
55
|
+
self._consecutive[key] = 0
|
|
51
56
|
|
|
52
57
|
def _alert(self, run_id: str, count: int) -> None:
|
|
53
58
|
msg = f"dccd alert: {run_id} failed {count} times consecutively."
|
|
@@ -27,10 +27,12 @@ from dccd.domain.timeutils import NS, ns_now, ns_to_dt
|
|
|
27
27
|
from dccd.domain.types import DataType
|
|
28
28
|
from dccd.sources.base import OHLCHistory, OrderBookSnapshotREST, TradesHistory
|
|
29
29
|
from dccd.sources.registry import SourceRegistry
|
|
30
|
+
from dccd.storage.coverage_sqlite import CoverageStore
|
|
30
31
|
from dccd.storage.parquet import ParquetStore
|
|
32
|
+
from dccd.storage.remote import RemoteStorage
|
|
31
33
|
from dccd.storage.runs_sqlite import RunsStore
|
|
32
34
|
|
|
33
|
-
__all__ = ["backfill", "stream", "read", "inventory"]
|
|
35
|
+
__all__ = ["backfill", "stream", "read", "inventory", "sync_remote"]
|
|
34
36
|
|
|
35
37
|
logger = logging.getLogger(__name__)
|
|
36
38
|
|
|
@@ -112,6 +114,7 @@ async def backfill(
|
|
|
112
114
|
registry: SourceRegistry,
|
|
113
115
|
store: ParquetStore,
|
|
114
116
|
runs_store: RunsStore | None = None,
|
|
117
|
+
coverage_store: CoverageStore | None = None,
|
|
115
118
|
events: RunEvents | None = None,
|
|
116
119
|
stop_event: asyncio.Event | None = None,
|
|
117
120
|
run_id: str | None = None,
|
|
@@ -136,6 +139,10 @@ async def backfill(
|
|
|
136
139
|
registry : SourceRegistry
|
|
137
140
|
store : ParquetStore
|
|
138
141
|
runs_store : RunsStore or None
|
|
142
|
+
coverage_store : CoverageStore or None
|
|
143
|
+
When set, ``start="last"`` falls back to the manifest's recorded
|
|
144
|
+
``max_ts`` if no local file exists (so a dropped store doesn't trigger a
|
|
145
|
+
re-download), and the dataset's extent is recorded on success.
|
|
139
146
|
events : RunEvents or None
|
|
140
147
|
stop_event : asyncio.Event or None
|
|
141
148
|
Set externally to cancel mid-run cleanly.
|
|
@@ -169,6 +176,11 @@ async def backfill(
|
|
|
169
176
|
|
|
170
177
|
if params.start == "last":
|
|
171
178
|
last = store.last_timestamp(ds)
|
|
179
|
+
if last is None and coverage_store is not None:
|
|
180
|
+
# Local files may have been dropped to free disk; the coverage
|
|
181
|
+
# manifest remembers how far we got, so we resume from there instead
|
|
182
|
+
# of re-downloading from the bounded default lookback.
|
|
183
|
+
last = coverage_store.get_max_ts(ds)
|
|
172
184
|
if last is not None:
|
|
173
185
|
start_ns: int = last + 1
|
|
174
186
|
else:
|
|
@@ -200,6 +212,17 @@ async def backfill(
|
|
|
200
212
|
# Counts every item received from the paginator, including unflushed ones.
|
|
201
213
|
_collected: list[int] = [0]
|
|
202
214
|
|
|
215
|
+
# Min/max timestamp seen this run, fed to the coverage manifest on success so
|
|
216
|
+
# the dataset's extent survives a local-data drop (see CoverageStore).
|
|
217
|
+
_run_min: list[int | None] = [None]
|
|
218
|
+
_run_max: list[int | None] = [None]
|
|
219
|
+
|
|
220
|
+
def _track_ts(ts: int) -> None:
|
|
221
|
+
if _run_min[0] is None or ts < _run_min[0]:
|
|
222
|
+
_run_min[0] = ts
|
|
223
|
+
if _run_max[0] is None or ts > _run_max[0]:
|
|
224
|
+
_run_max[0] = ts
|
|
225
|
+
|
|
203
226
|
# Progress is reported by *time covered* of the requested window, which gives
|
|
204
227
|
# a real, smooth bar for both OHLC and cursor-paginated trades (the latter
|
|
205
228
|
# have no page total). ``at`` is the timestamp reached. The window is read
|
|
@@ -257,6 +280,7 @@ async def backfill(
|
|
|
257
280
|
break
|
|
258
281
|
bars.append(bar)
|
|
259
282
|
_collected[0] += 1
|
|
283
|
+
_track_ts(bar.ts)
|
|
260
284
|
if _collected[0] % 200 == 0:
|
|
261
285
|
_emit_time(bar.ts)
|
|
262
286
|
if len(bars) >= _FLUSH_BATCH:
|
|
@@ -295,6 +319,7 @@ async def backfill(
|
|
|
295
319
|
break
|
|
296
320
|
batch.append(trade)
|
|
297
321
|
_collected[0] += 1
|
|
322
|
+
_track_ts(trade.ts)
|
|
298
323
|
if _collected[0] % 1000 == 0:
|
|
299
324
|
_emit_time(trade.ts) # progress by time covered, not page count
|
|
300
325
|
if len(batch) >= _FLUSH_BATCH:
|
|
@@ -309,6 +334,7 @@ async def backfill(
|
|
|
309
334
|
raise NoCapability(target.exchange, "orderbook", "snapshot")
|
|
310
335
|
depth = params.depth or 50
|
|
311
336
|
snap = await adapter.fetch_orderbook(target.symbol, depth)
|
|
337
|
+
_track_ts(snap.ts)
|
|
312
338
|
total_written += await _flush(store, ds, [snap], prov_src)
|
|
313
339
|
|
|
314
340
|
except Exception as exc:
|
|
@@ -328,6 +354,12 @@ async def backfill(
|
|
|
328
354
|
if runs_store:
|
|
329
355
|
runs_store.finish_run(run_id, state, rows_written=total_written)
|
|
330
356
|
|
|
357
|
+
# Record coverage so this dataset's extent survives a later local-data drop.
|
|
358
|
+
if coverage_store is not None and _run_max[0] is not None:
|
|
359
|
+
coverage_store.record(
|
|
360
|
+
ds, min_ts=_run_min[0], max_ts=_run_max[0], rows_added=total_written
|
|
361
|
+
)
|
|
362
|
+
|
|
331
363
|
return {"run_id": run_id, "rows_written": total_written, "start_ns": start_ns, "end_ns": end_ns}
|
|
332
364
|
|
|
333
365
|
|
|
@@ -461,12 +493,87 @@ def read(
|
|
|
461
493
|
store: ParquetStore,
|
|
462
494
|
start_ns: int | None = None,
|
|
463
495
|
end_ns: int | None = None,
|
|
496
|
+
remote: RemoteStorage | None = None,
|
|
464
497
|
) -> Any:
|
|
465
|
-
"""Read stored data for *target* in the given nanosecond range.
|
|
498
|
+
"""Read stored data for *target* in the given nanosecond range.
|
|
499
|
+
|
|
500
|
+
Read-through restore: when *remote* is set and the dataset has no local
|
|
501
|
+
Parquet (e.g. it was purged to free disk), the dataset directory is pulled
|
|
502
|
+
back from the remote (``rclone copy``) before loading, so a purge is
|
|
503
|
+
transparent to readers.
|
|
504
|
+
"""
|
|
466
505
|
ds = _make_dataset_id(target)
|
|
506
|
+
if remote is not None:
|
|
507
|
+
directory = store.directory(ds)
|
|
508
|
+
if not any(directory.glob("*.parquet")):
|
|
509
|
+
rel = directory.relative_to(store.root)
|
|
510
|
+
remote.restore(str(rel))
|
|
467
511
|
return store.load(ds, start_ns, end_ns)
|
|
468
512
|
|
|
469
513
|
|
|
470
514
|
def inventory(*, store: ParquetStore) -> list[dict[str, Any]]:
|
|
471
515
|
"""Return a list of dataset descriptors for all stored data."""
|
|
472
516
|
return store.inventory()
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
async def sync_remote(
|
|
520
|
+
remote: RemoteStorage,
|
|
521
|
+
*,
|
|
522
|
+
runs_store: RunsStore | None = None,
|
|
523
|
+
events: RunEvents | None = None,
|
|
524
|
+
run_id: str | None = None,
|
|
525
|
+
) -> dict[str, Any]:
|
|
526
|
+
"""Run one remote-sync cycle: mirror the local store to all rclone remotes.
|
|
527
|
+
|
|
528
|
+
Records the cycle as a ``sync`` run in *runs_store* (so the Storage UI can
|
|
529
|
+
show "last sync") and emits ``status``/``log`` on *events*. Shared by the
|
|
530
|
+
scheduler's periodic loop and the manual "Sync now" endpoint, so the
|
|
531
|
+
run-recording lives in exactly one place.
|
|
532
|
+
|
|
533
|
+
Parameters
|
|
534
|
+
----------
|
|
535
|
+
remote : RemoteStorage
|
|
536
|
+
runs_store : RunsStore or None
|
|
537
|
+
events : RunEvents or None
|
|
538
|
+
run_id : str or None
|
|
539
|
+
Override the auto-generated run id.
|
|
540
|
+
|
|
541
|
+
Returns
|
|
542
|
+
-------
|
|
543
|
+
dict
|
|
544
|
+
``{'run_id', 'results', 'ok'}`` — ``results`` maps remote → success;
|
|
545
|
+
``ok`` is True only when every configured remote synced.
|
|
546
|
+
"""
|
|
547
|
+
if run_id is None:
|
|
548
|
+
run_id = f"remote-sync@{time.time_ns()}"
|
|
549
|
+
if runs_store:
|
|
550
|
+
runs_store.create_run(run_id, "remote-sync", "sync", "-", "all", "-")
|
|
551
|
+
if events:
|
|
552
|
+
events.status("running")
|
|
553
|
+
try:
|
|
554
|
+
results = await remote.sync_all()
|
|
555
|
+
except Exception as exc:
|
|
556
|
+
msg = f"Remote sync error: {exc}"
|
|
557
|
+
if events:
|
|
558
|
+
events.log(msg, "error")
|
|
559
|
+
events.status("failed")
|
|
560
|
+
if runs_store:
|
|
561
|
+
runs_store.finish_run(run_id, "failed", error=str(exc))
|
|
562
|
+
return {"run_id": run_id, "results": {}, "ok": False}
|
|
563
|
+
|
|
564
|
+
failed = [r for r, ok in results.items() if not ok]
|
|
565
|
+
if failed:
|
|
566
|
+
msg = f"Remote sync failed for: {', '.join(failed)}"
|
|
567
|
+
if events:
|
|
568
|
+
events.log(msg, "error")
|
|
569
|
+
events.status("failed")
|
|
570
|
+
if runs_store:
|
|
571
|
+
runs_store.finish_run(run_id, "failed", error=msg)
|
|
572
|
+
return {"run_id": run_id, "results": results, "ok": False}
|
|
573
|
+
|
|
574
|
+
if events:
|
|
575
|
+
events.log(f"Synced {len(results)} remote(s)")
|
|
576
|
+
events.status("succeeded")
|
|
577
|
+
if runs_store:
|
|
578
|
+
runs_store.finish_run(run_id, "succeeded", rows_written=len(results))
|
|
579
|
+
return {"run_id": run_id, "results": results, "ok": True}
|
|
@@ -6,10 +6,12 @@ import asyncio
|
|
|
6
6
|
import logging
|
|
7
7
|
import time
|
|
8
8
|
|
|
9
|
-
from dccd.application.events import EventBus
|
|
9
|
+
from dccd.application.events import EventBus, RunEvents
|
|
10
10
|
from dccd.application.jobs import JobSpec
|
|
11
11
|
from dccd.sources.registry import SourceRegistry
|
|
12
|
+
from dccd.storage.coverage_sqlite import CoverageStore
|
|
12
13
|
from dccd.storage.parquet import ParquetStore
|
|
14
|
+
from dccd.storage.remote import RemoteStorage
|
|
13
15
|
from dccd.storage.runs_sqlite import RunsStore
|
|
14
16
|
|
|
15
17
|
__all__ = ["Scheduler"]
|
|
@@ -90,6 +92,11 @@ class Scheduler:
|
|
|
90
92
|
store : ParquetStore
|
|
91
93
|
runs_store : RunsStore or None
|
|
92
94
|
events : EventBus
|
|
95
|
+
remote : RemoteStorage or None
|
|
96
|
+
When set (rclone remotes configured), :meth:`start` launches a periodic
|
|
97
|
+
loop that mirrors the local store off-box every ``sync_interval`` seconds.
|
|
98
|
+
sync_interval : int
|
|
99
|
+
Seconds between remote-sync cycles (default 3600).
|
|
93
100
|
"""
|
|
94
101
|
|
|
95
102
|
def __init__(
|
|
@@ -98,11 +105,22 @@ class Scheduler:
|
|
|
98
105
|
store: ParquetStore,
|
|
99
106
|
runs_store: RunsStore | None = None,
|
|
100
107
|
events: EventBus | None = None,
|
|
108
|
+
remote: RemoteStorage | None = None,
|
|
109
|
+
sync_interval: int = 3600,
|
|
110
|
+
coverage_store: CoverageStore | None = None,
|
|
111
|
+
data_path: str | None = None,
|
|
112
|
+
min_free_gb: float = 0.0,
|
|
101
113
|
) -> None:
|
|
102
114
|
self._registry = registry
|
|
103
115
|
self._store = store
|
|
104
116
|
self._runs_store = runs_store
|
|
105
117
|
self._events = events or EventBus()
|
|
118
|
+
self._remote = remote
|
|
119
|
+
self._sync_interval = sync_interval
|
|
120
|
+
self._coverage_store = coverage_store
|
|
121
|
+
self._data_path = data_path
|
|
122
|
+
self._min_free_gb = min_free_gb
|
|
123
|
+
self._sync_task: asyncio.Task[None] | None = None
|
|
106
124
|
self._streams: dict[str, _StreamWorker] = {}
|
|
107
125
|
self._interval_tasks: list[asyncio.Task[None]] = []
|
|
108
126
|
# Per-spec recurring backfill loops, keyed by spec id, with the interval
|
|
@@ -186,6 +204,8 @@ class Scheduler:
|
|
|
186
204
|
async def start(self, specs: list[JobSpec]) -> None:
|
|
187
205
|
"""Start all enabled specs (full daemon mode)."""
|
|
188
206
|
self._running = True
|
|
207
|
+
if self._remote is not None and self._sync_task is None:
|
|
208
|
+
self._sync_task = asyncio.create_task(self._sync_loop())
|
|
189
209
|
for spec in specs:
|
|
190
210
|
if not spec.enabled:
|
|
191
211
|
continue
|
|
@@ -207,6 +227,13 @@ class Scheduler:
|
|
|
207
227
|
async def stop(self) -> None:
|
|
208
228
|
"""Stop all running jobs."""
|
|
209
229
|
self._running = False
|
|
230
|
+
if self._sync_task is not None:
|
|
231
|
+
self._sync_task.cancel()
|
|
232
|
+
try:
|
|
233
|
+
await self._sync_task
|
|
234
|
+
except (asyncio.CancelledError, Exception):
|
|
235
|
+
pass
|
|
236
|
+
self._sync_task = None
|
|
210
237
|
for task in self._interval_tasks:
|
|
211
238
|
task.cancel()
|
|
212
239
|
for task, _ in self._interval_loops.values():
|
|
@@ -216,6 +243,64 @@ class Scheduler:
|
|
|
216
243
|
self._interval_tasks.clear()
|
|
217
244
|
self._interval_loops.clear()
|
|
218
245
|
|
|
246
|
+
async def _sync_loop(self) -> None:
|
|
247
|
+
"""Periodically mirror the local store to the configured rclone remotes.
|
|
248
|
+
|
|
249
|
+
Runs only when a :class:`~dccd.storage.remote.RemoteStorage` was wired in
|
|
250
|
+
(``storage.remotes`` non-empty). Each cycle is delegated to
|
|
251
|
+
:func:`dccd.application.operations.sync_remote` (which records a ``sync``
|
|
252
|
+
run in :class:`RunsStore` and emits live ``remote-sync`` EventBus status);
|
|
253
|
+
this loop only owns the cadence and the exponential backoff on failure
|
|
254
|
+
(30s → capped at ``sync_interval``) so a flapping remote doesn't hammer
|
|
255
|
+
rclone.
|
|
256
|
+
"""
|
|
257
|
+
from dccd.application.operations import sync_remote
|
|
258
|
+
assert self._remote is not None
|
|
259
|
+
run_events = self._events.for_run("remote-sync")
|
|
260
|
+
backoff = 30.0
|
|
261
|
+
try:
|
|
262
|
+
while self._running:
|
|
263
|
+
result = await sync_remote(
|
|
264
|
+
self._remote, runs_store=self._runs_store, events=run_events
|
|
265
|
+
)
|
|
266
|
+
if result["ok"]:
|
|
267
|
+
# Remote is now up to date → safe to free disk by dropping the
|
|
268
|
+
# oldest already-synced files (the coverage manifest keeps the
|
|
269
|
+
# resume cursor). Runs only when a floor is configured.
|
|
270
|
+
await self._maybe_purge(run_events)
|
|
271
|
+
backoff = 30.0
|
|
272
|
+
await asyncio.sleep(self._sync_interval)
|
|
273
|
+
else:
|
|
274
|
+
logger.warning("Remote sync failed — retry in %ds", int(backoff))
|
|
275
|
+
await asyncio.sleep(min(backoff, self._sync_interval))
|
|
276
|
+
backoff = min(backoff * 2, float(self._sync_interval))
|
|
277
|
+
except asyncio.CancelledError:
|
|
278
|
+
return
|
|
279
|
+
|
|
280
|
+
async def _maybe_purge(self, run_events: RunEvents) -> None:
|
|
281
|
+
"""Free disk by dropping oldest synced files when below the floor.
|
|
282
|
+
|
|
283
|
+
Called right after a successful sync (remote is current), so dropped
|
|
284
|
+
files are recoverable from the remote. No-op unless ``min_free_gb`` and a
|
|
285
|
+
``data_path`` are configured.
|
|
286
|
+
"""
|
|
287
|
+
if self._min_free_gb <= 0 or not self._data_path:
|
|
288
|
+
return
|
|
289
|
+
from dccd.storage.purge import purge_to_free_space
|
|
290
|
+
try:
|
|
291
|
+
res = await asyncio.to_thread(
|
|
292
|
+
purge_to_free_space, self._data_path, self._min_free_gb
|
|
293
|
+
)
|
|
294
|
+
except Exception as exc:
|
|
295
|
+
logger.warning("Purge failed: %s", exc)
|
|
296
|
+
return
|
|
297
|
+
if res["removed"]:
|
|
298
|
+
run_events.log(
|
|
299
|
+
f"Purged {len(res['removed'])} file(s), "
|
|
300
|
+
f"freed ~{res['freed_bytes'] / (1024 ** 3):.2f} GiB to stay above "
|
|
301
|
+
f"{self._min_free_gb} GiB free"
|
|
302
|
+
)
|
|
303
|
+
|
|
219
304
|
async def _interval_loop(self, spec: JobSpec) -> None:
|
|
220
305
|
every = spec.trigger.every or spec.target.span or 3600
|
|
221
306
|
while self._running:
|
|
@@ -231,6 +316,7 @@ class Scheduler:
|
|
|
231
316
|
registry=self._registry,
|
|
232
317
|
store=self._store,
|
|
233
318
|
runs_store=self._runs_store,
|
|
319
|
+
coverage_store=self._coverage_store,
|
|
234
320
|
events=run_events,
|
|
235
321
|
)
|
|
236
322
|
except Exception as exc:
|
|
@@ -11,11 +11,20 @@ import pathlib
|
|
|
11
11
|
from typing import TYPE_CHECKING
|
|
12
12
|
|
|
13
13
|
if TYPE_CHECKING:
|
|
14
|
+
from dccd.application.config import AppConfig
|
|
14
15
|
from dccd.sources.registry import SourceRegistry
|
|
16
|
+
from dccd.storage.coverage_sqlite import CoverageStore
|
|
15
17
|
from dccd.storage.parquet import ParquetStore
|
|
18
|
+
from dccd.storage.remote import RemoteStorage
|
|
16
19
|
from dccd.storage.runs_sqlite import RunsStore
|
|
17
20
|
|
|
18
|
-
__all__ = [
|
|
21
|
+
__all__ = [
|
|
22
|
+
"build_registry",
|
|
23
|
+
"build_store",
|
|
24
|
+
"build_runs_store",
|
|
25
|
+
"build_remote",
|
|
26
|
+
"build_coverage_store",
|
|
27
|
+
]
|
|
19
28
|
|
|
20
29
|
|
|
21
30
|
def build_registry() -> "SourceRegistry":
|
|
@@ -77,3 +86,50 @@ def build_runs_store(data_path: str | pathlib.Path) -> "RunsStore":
|
|
|
77
86
|
from dccd.storage.runs_sqlite import RunsStore
|
|
78
87
|
|
|
79
88
|
return RunsStore(pathlib.Path(data_path) / ".dccd" / "runs.db")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def build_coverage_store(data_path: str | pathlib.Path) -> "CoverageStore":
|
|
92
|
+
"""Return a :class:`~dccd.storage.coverage_sqlite.CoverageStore`.
|
|
93
|
+
|
|
94
|
+
The database lives at ``{data_path}/.dccd/coverage.db`` — the manifest that
|
|
95
|
+
lets local data be dropped without forcing a re-download on the next
|
|
96
|
+
backfill.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
data_path : str or Path
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
CoverageStore
|
|
105
|
+
"""
|
|
106
|
+
from dccd.storage.coverage_sqlite import CoverageStore
|
|
107
|
+
|
|
108
|
+
return CoverageStore(pathlib.Path(data_path) / ".dccd" / "coverage.db")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def build_remote(cfg: "AppConfig") -> "RemoteStorage | None":
|
|
112
|
+
"""Return a :class:`~dccd.storage.remote.RemoteStorage`, or ``None``.
|
|
113
|
+
|
|
114
|
+
Returns ``None`` when no rclone remotes are configured (``storage.remotes``
|
|
115
|
+
empty) — there is nothing to sync, so the daemon skips the sync loop. The
|
|
116
|
+
local root is ``settings.data_path`` (the canonical store root used by
|
|
117
|
+
:func:`build_store`).
|
|
118
|
+
|
|
119
|
+
Parameters
|
|
120
|
+
----------
|
|
121
|
+
cfg : AppConfig
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
RemoteStorage or None
|
|
126
|
+
"""
|
|
127
|
+
if not cfg.storage.remotes:
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
from dccd.storage.remote import RemoteStorage
|
|
131
|
+
|
|
132
|
+
return RemoteStorage(
|
|
133
|
+
cfg.settings.data_path,
|
|
134
|
+
[r.model_dump() for r in cfg.storage.remotes],
|
|
135
|
+
)
|