dccd 3.1.0__tar.gz → 3.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dccd-3.1.0 → dccd-3.2.0}/CHANGELOG.md +59 -0
- {dccd-3.1.0 → dccd-3.2.0}/CLAUDE.md +23 -7
- {dccd-3.1.0 → dccd-3.2.0}/PKG-INFO +1 -1
- {dccd-3.1.0 → dccd-3.2.0}/dccd/application/monitor.py +9 -4
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/api/app.py +12 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/cli/main.py +10 -1
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_application.py +62 -0
- dccd-3.2.0/dccd/tests/v3/test_restart.py +77 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd.egg-info/PKG-INFO +1 -1
- {dccd-3.1.0 → dccd-3.2.0}/dccd.egg-info/SOURCES.txt +1 -0
- {dccd-3.1.0 → dccd-3.2.0}/pyproject.toml +1 -1
- {dccd-3.1.0 → dccd-3.2.0}/CONTRIBUTING.md +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/LICENSE.txt +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/MANIFEST.in +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/README.md +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/__init__.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/application/__init__.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/application/config.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/application/events.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/application/jobs.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/application/operations.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/application/registry.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/application/scheduler.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/application/service_factory.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/domain/__init__.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/domain/capability.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/domain/dataset.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/domain/errors.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/domain/records.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/domain/symbol.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/domain/timeutils.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/domain/transforms.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/domain/types.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/__init__.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/api/__init__.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/cli/__init__.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/ui/__init__.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/ui/static/favicon.svg +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/ui/static/logo.svg +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/base.html +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/config.html +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/dashboard.html +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/data.html +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/historical.html +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/live.html +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/logs.html +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/interfaces/ui/templates/storage.html +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/sources/__init__.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/sources/base.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/sources/binance.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/sources/bitfinex.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/sources/bitmex.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/sources/bybit.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/sources/coinbase.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/sources/kraken.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/sources/okx.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/sources/registry.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/storage/__init__.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/storage/coverage_sqlite.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/storage/parquet.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/storage/purge.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/storage/remote.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/storage/runs_sqlite.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/__init__.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/__init__.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_api.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_backfill_lookback.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_client.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_coverage.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_domain.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_domain_extended.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_network.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_purge.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_remote_sync.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_restore.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_sources.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_storage.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_storage_extended.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/tests/v3/test_transport.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/transport/__init__.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/transport/http.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/transport/paginate.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/transport/ratelimit.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd/transport/ws.py +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd.egg-info/dependency_links.txt +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd.egg-info/entry_points.txt +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd.egg-info/requires.txt +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/dccd.egg-info/top_level.txt +0 -0
- {dccd-3.1.0 → dccd-3.2.0}/setup.cfg +0 -0
|
@@ -16,6 +16,65 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
16
16
|
|
|
17
17
|
### Removed
|
|
18
18
|
|
|
19
|
+
## [3.2.0] - 2026-06-10
|
|
20
|
+
|
|
21
|
+
### Added
|
|
22
|
+
|
|
23
|
+
- Dev workflow: hierarchical, file-based **plan trees** under `doc/dev/plans/`
|
|
24
|
+
(committed) with a `<plans_dir>` descriptor key. A roadmap item expands into a
|
|
25
|
+
global `00-plan.md` + precise leaf specs (adaptive depth); each leaf declares a
|
|
26
|
+
`complexity` that derives its execution model (`low→haiku`/`medium→sonnet`/
|
|
27
|
+
`high→opus`). New `/plan` (build the tree + open the plan PR first) and
|
|
28
|
+
`/execute-leaf` (spawn an agent per leaf, verify on real data) skills; `/pick-task`,
|
|
29
|
+
`/finish-task`, `/abandon-task`, `/release` and `CLAUDE.md` updated to chain
|
|
30
|
+
through it. Backward-compatible: no `plans_dir` ⇒ the old plan-mode loop. (#94)
|
|
31
|
+
- Restart/reboot safety verified on a real server `systemctl reboot`: the daemon
|
|
32
|
+
auto-starts, the trades stream reconnects, the interval backfill re-arms, the
|
|
33
|
+
`RunsStore` (SQLite WAL) survives and appends, and the coverage manifest keeps the
|
|
34
|
+
resume cursor (no gap). New `test_restart.py` guards RunsStore persistence across a
|
|
35
|
+
reopen and scheduler interval re-arm from config. (#99)
|
|
36
|
+
- Ops for unattended deploy: `HealthMonitor` is now wired into the daemon (CLI
|
|
37
|
+
`dccd start` and the standalone API) — it was implemented but never instantiated,
|
|
38
|
+
so webhook alerts never fired. Docker `HEALTHCHECK` on `/health`, commented
|
|
39
|
+
systemd resource limits, and journald log-rotation guidance. Verified live on a
|
|
40
|
+
server: a failing job past the threshold delivered a real webhook POST, and the
|
|
41
|
+
container reports `healthy`. (#100)
|
|
42
|
+
- Docs: new `how-to/deploy` guide — a blessed, host-validated path to run dccd
|
|
43
|
+
unattended on a server (systemd + venv recommended, Docker alternative), covering
|
|
44
|
+
install, secret injection, `/health`, restart/reboot safety, logs, alerts and the
|
|
45
|
+
old-CPU caveat. Completes **Epic A** (run on a remote server). (#102)
|
|
46
|
+
|
|
47
|
+
### Changed
|
|
48
|
+
|
|
49
|
+
- `Dockerfile`: pin the base image to a digest (reproducible builds) and add a
|
|
50
|
+
`POLARS_VARIANT` build arg — on CPUs without AVX2 (older servers) the default
|
|
51
|
+
`polars` wheel crashes with SIGILL, so
|
|
52
|
+
`docker build --build-arg POLARS_VARIANT=polars-lts-cpu` installs the LTS-CPU
|
|
53
|
+
build instead. Verified end-to-end on a real host (build, run, `/health`, Bearer
|
|
54
|
+
auth, a backfill writing correct OHLC to the `/data` volume). (#97)
|
|
55
|
+
- Docs: `how-to/protect-ui` now covers deploy-time secret injection — the token and
|
|
56
|
+
`rclone.conf` are mounted at run time, never baked into the image (verified on the
|
|
57
|
+
built image: `docker history`/filesystem show no config); the YAML loader does not
|
|
58
|
+
expand `${ENV}` placeholders, so the mounted-file pattern is the blessed one. (#101)
|
|
59
|
+
|
|
60
|
+
### Fixed
|
|
61
|
+
|
|
62
|
+
- `deploy/dccd.service`: `ExecStart` pointed at `/usr/local/bin/dccd` and failed
|
|
63
|
+
`systemd-analyze verify`; it now uses a venv path (`/opt/dccd/venv/bin/dccd`) with
|
|
64
|
+
`StateDirectory=dccd` (systemd owns `/var/lib/dccd`). The install spec dropped the
|
|
65
|
+
non-existent `ui` extra (`.[daemon,ui]` → `.[daemon]`, also in the `Dockerfile`).
|
|
66
|
+
Verified a real system-wide install: `systemd-analyze verify` passes, the service
|
|
67
|
+
is active, auto-restarts after SIGKILL, and a backfill writes correct OHLC under
|
|
68
|
+
the hardened `/var/lib/dccd/data` (`ProtectSystem=strict`). (#98)
|
|
69
|
+
- `HealthMonitor` counted consecutive failures per `run_id`, but each backfill run
|
|
70
|
+
has a unique id (`{spec}@{ts}`), so repeated failures never accumulated (only
|
|
71
|
+
streams, with a stable `@stream` id, could alert). It now keys on the job
|
|
72
|
+
(spec id) so repeated backfill failures trip the alert. (#100)
|
|
73
|
+
|
|
74
|
+
### Deprecated
|
|
75
|
+
|
|
76
|
+
### Removed
|
|
77
|
+
|
|
19
78
|
## [3.1.0] - 2026-06-09
|
|
20
79
|
|
|
21
80
|
### Added
|
|
@@ -79,22 +79,38 @@ away without losing unrelated good work is too big: split it. This is what makes
|
|
|
79
79
|
|
|
80
80
|
### Dev loop & docs of record
|
|
81
81
|
|
|
82
|
-
The iterative loop is tooled by skills, with
|
|
82
|
+
The iterative loop is tooled by skills, with four tracked docs as the sources of
|
|
83
83
|
truth:
|
|
84
84
|
|
|
85
85
|
| Doc | Holds | Updated by |
|
|
86
86
|
|-----|-------|-----------|
|
|
87
|
-
| `doc/dev/07-roadmap.md` | open work
|
|
87
|
+
| `doc/dev/07-roadmap.md` | open work — single source *index* | `/pick-task` reads · `/finish-task`, `/abandon-task` update |
|
|
88
|
+
| `doc/dev/plans/<epic>/` | open work *detail* — durable hierarchical plan trees (global + leaf specs) | `/plan` writes · `/execute-leaf` reads · `/finish-task`/`/abandon-task` archive |
|
|
88
89
|
| `doc/dev/03-decisions.md` | the *why* — ADR journal (+ settled rationale) | `/finish-task` (accepted), `/abandon-task` (rejected/tombstone) |
|
|
89
90
|
| `doc/dev/06-status.md` | where things stand | `/finish-task`, `/groom-docs` |
|
|
90
91
|
|
|
91
92
|
`CHANGELOG.md` + git log stay authoritative for *what* shipped. The loop:
|
|
92
|
-
`/pick-task` (smallest slice → branch) → plan (split big plans into small PRs) →
|
|
93
|
-
`/finish-task` (tests, ADR entry, status, PR) **or** `/abandon-task` (salvage the
|
|
94
|
-
lesson + close the PR); `/groom-docs` periodically keeps `doc/dev/` lean and true.
|
|
95
93
|
|
|
96
|
-
|
|
97
|
-
|
|
94
|
+
`/pick-task` (smallest coherent slice; **no branch yet**) →
|
|
95
|
+
`/plan` (decompose into a `doc/dev/plans/<epic>/` tree — adaptive depth: a single
|
|
96
|
+
leaf for a trivial task, a global `00-plan.md` + leaves otherwise — and open the
|
|
97
|
+
**plan PR** that lands the tree on `develop` first) →
|
|
98
|
+
`/execute-leaf <epic> next` (cut the leaf branch, **spawn an agent at the model
|
|
99
|
+
derived from the leaf's `complexity`**, which implements + tests + **verifies on
|
|
100
|
+
real data**, then reports) →
|
|
101
|
+
`/finish-task` (tests, ADR, CHANGELOG, leaf PR, archive the leaf, tick the global
|
|
102
|
+
checklist) → … per leaf … → last leaf removes the roadmap line → `/release`.
|
|
103
|
+
|
|
104
|
+
`/abandon-task` salvages the lesson + closes a bad PR (tombstones the leaf);
|
|
105
|
+
`/groom-docs` periodically keeps `doc/dev/` lean and true. The full format lives in
|
|
106
|
+
[`doc/dev/plans/README.md`](doc/dev/plans/README.md). The workflow is
|
|
107
|
+
backward-compatible: a repo whose `.claude/workflow.json` has **no `plans_dir`**
|
|
108
|
+
falls back to the older `/pick-task → plan mode → /finish-task` loop.
|
|
109
|
+
|
|
110
|
+
**Model per task** (advisory — you set it via `/model`, a skill spawns a subagent
|
|
111
|
+
with an explicit `model`, or a plan **leaf's `complexity` derives it**:
|
|
112
|
+
`low→haiku`, `medium→sonnet`, `high→opus`; subagents otherwise *inherit* the
|
|
113
|
+
parent):
|
|
98
114
|
|
|
99
115
|
| Model | For |
|
|
100
116
|
|-------|-----|
|
|
@@ -41,13 +41,18 @@ class HealthMonitor:
|
|
|
41
41
|
def _on_event(self, event: Event) -> None:
|
|
42
42
|
if not isinstance(event, StatusEvent):
|
|
43
43
|
return
|
|
44
|
+
# Count failures per *job*, not per run: a run_id is `{spec_id}@{run}` and
|
|
45
|
+
# each backfill run is unique, so keying on run_id would never accumulate
|
|
46
|
+
# across runs (only streams reuse `{spec_id}@stream`). Key on the spec_id
|
|
47
|
+
# prefix so repeated failures of the same job trip the alert.
|
|
48
|
+
key = event.run_id.split("@", 1)[0]
|
|
44
49
|
if event.state == "failed":
|
|
45
|
-
self._consecutive[
|
|
46
|
-
count = self._consecutive[
|
|
50
|
+
self._consecutive[key] += 1
|
|
51
|
+
count = self._consecutive[key]
|
|
47
52
|
if count >= self._max_errors:
|
|
48
|
-
self._alert(
|
|
53
|
+
self._alert(key, count)
|
|
49
54
|
elif event.state == "succeeded":
|
|
50
|
-
self._consecutive[
|
|
55
|
+
self._consecutive[key] = 0
|
|
51
56
|
|
|
52
57
|
def _alert(self, run_id: str, count: int) -> None:
|
|
53
58
|
msg = f"dccd alert: {run_id} failed {count} times consecutively."
|
|
@@ -37,6 +37,7 @@ from pydantic import BaseModel
|
|
|
37
37
|
from dccd.application.config import AppConfig, load_config, resolve_config_path
|
|
38
38
|
from dccd.application.events import EventBus
|
|
39
39
|
from dccd.application.jobs import JobParams, JobSpec, JobTarget, Trigger
|
|
40
|
+
from dccd.application.monitor import HealthMonitor
|
|
40
41
|
from dccd.application.registry import REGISTRY
|
|
41
42
|
from dccd.application.scheduler import Scheduler
|
|
42
43
|
from dccd.application.service_factory import (
|
|
@@ -179,7 +180,10 @@ def create_app(
|
|
|
179
180
|
app.state.remote = build_remote(cfg)
|
|
180
181
|
|
|
181
182
|
if scheduler is not None:
|
|
183
|
+
# `dccd start` owns the scheduler *and* its HealthMonitor (wired on the
|
|
184
|
+
# scheduler's bus in cmd_start) — don't double-wire here.
|
|
182
185
|
app.state.scheduler = scheduler
|
|
186
|
+
app.state.monitor = None
|
|
183
187
|
else:
|
|
184
188
|
app.state.scheduler = Scheduler(
|
|
185
189
|
app.state.registry,
|
|
@@ -188,6 +192,14 @@ def create_app(
|
|
|
188
192
|
app.state.event_bus,
|
|
189
193
|
coverage_store=app.state.coverage_store,
|
|
190
194
|
)
|
|
195
|
+
# Standalone (`dccd ui`): wire alerts on this app's bus, which the
|
|
196
|
+
# standalone scheduler publishes to.
|
|
197
|
+
app.state.monitor = HealthMonitor(
|
|
198
|
+
app.state.runs_store,
|
|
199
|
+
app.state.event_bus,
|
|
200
|
+
webhook_url=cfg.alerts.webhook_url,
|
|
201
|
+
max_consecutive_errors=cfg.alerts.max_consecutive_errors,
|
|
202
|
+
)
|
|
191
203
|
|
|
192
204
|
# Register stream workers from config so they can be started/stopped
|
|
193
205
|
# from the UI even in standalone dccd-ui mode (without dccd start).
|
|
@@ -146,6 +146,7 @@ def cmd_start(
|
|
|
146
146
|
import uvicorn
|
|
147
147
|
|
|
148
148
|
from dccd.application.events import EventBus
|
|
149
|
+
from dccd.application.monitor import HealthMonitor
|
|
149
150
|
from dccd.application.scheduler import Scheduler
|
|
150
151
|
from dccd.application.service_factory import (
|
|
151
152
|
build_coverage_store,
|
|
@@ -162,6 +163,14 @@ def cmd_start(
|
|
|
162
163
|
coverage_store = build_coverage_store(cfg.settings.data_path)
|
|
163
164
|
registry = build_registry()
|
|
164
165
|
bus = EventBus()
|
|
166
|
+
# Fire webhook alerts on repeated job failures. Subscribes to the same bus the
|
|
167
|
+
# scheduler publishes to; kept referenced for the daemon's lifetime (it was
|
|
168
|
+
# never instantiated before, so alerts never fired).
|
|
169
|
+
monitor = HealthMonitor(
|
|
170
|
+
runs_store, bus,
|
|
171
|
+
webhook_url=cfg.alerts.webhook_url,
|
|
172
|
+
max_consecutive_errors=cfg.alerts.max_consecutive_errors,
|
|
173
|
+
)
|
|
165
174
|
remote = build_remote(cfg)
|
|
166
175
|
scheduler = Scheduler(
|
|
167
176
|
registry, store, runs_store, bus,
|
|
@@ -185,7 +194,7 @@ def cmd_start(
|
|
|
185
194
|
ui_host = host or cfg.settings.ui_host
|
|
186
195
|
ui_port = port or cfg.settings.ui_port
|
|
187
196
|
|
|
188
|
-
async def _run():
|
|
197
|
+
async def _run(_monitor: object = monitor): # keep the HealthMonitor alive
|
|
189
198
|
await scheduler.start(all_specs)
|
|
190
199
|
typer.echo(f"Daemon running — UI at http://{ui_host}:{ui_port}")
|
|
191
200
|
server = uvicorn.Server(uvicorn.Config(fastapi_app, host=ui_host, port=ui_port, log_level="warning"))
|
|
@@ -358,3 +358,65 @@ class TestOperationRegistry:
|
|
|
358
358
|
"""Each operation must be accessible from both CLI and API (parity test)."""
|
|
359
359
|
required_ops = {"backfill", "stream", "read", "inventory"}
|
|
360
360
|
assert required_ops.issubset(set(REGISTRY.operations))
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
class TestHealthMonitor:
|
|
364
|
+
"""HealthMonitor fires a webhook on N consecutive failures and resets on success."""
|
|
365
|
+
|
|
366
|
+
def test_alerts_after_threshold_and_resets(self, monkeypatch):
|
|
367
|
+
import urllib.request
|
|
368
|
+
|
|
369
|
+
from dccd.application.events import EventBus, StatusEvent
|
|
370
|
+
from dccd.application.monitor import HealthMonitor
|
|
371
|
+
|
|
372
|
+
calls: list[str] = []
|
|
373
|
+
|
|
374
|
+
class _Resp:
|
|
375
|
+
def __enter__(self):
|
|
376
|
+
return self
|
|
377
|
+
|
|
378
|
+
def __exit__(self, *a):
|
|
379
|
+
return False
|
|
380
|
+
|
|
381
|
+
def fake_urlopen(req, timeout=0):
|
|
382
|
+
calls.append(getattr(req, "full_url", str(req)))
|
|
383
|
+
return _Resp()
|
|
384
|
+
|
|
385
|
+
monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen)
|
|
386
|
+
|
|
387
|
+
bus = EventBus()
|
|
388
|
+
HealthMonitor(None, bus, webhook_url="http://hook.test", max_consecutive_errors=3)
|
|
389
|
+
|
|
390
|
+
# Distinct run ids that share one job (spec id) — each backfill run is
|
|
391
|
+
# unique (`{spec}@{ts}`), so the monitor must accumulate across runs.
|
|
392
|
+
job = "backfill:binance:BTC/USDT:ohlc:3600s"
|
|
393
|
+
# Below threshold: no alert.
|
|
394
|
+
bus.emit(StatusEvent(run_id=f"{job}@1", state="failed"))
|
|
395
|
+
bus.emit(StatusEvent(run_id=f"{job}@2", state="failed"))
|
|
396
|
+
assert calls == []
|
|
397
|
+
|
|
398
|
+
# Threshold reached (3rd failure, different run): one alert.
|
|
399
|
+
bus.emit(StatusEvent(run_id=f"{job}@3", state="failed"))
|
|
400
|
+
assert len(calls) == 1
|
|
401
|
+
|
|
402
|
+
# A success resets the counter; failures must re-accumulate.
|
|
403
|
+
bus.emit(StatusEvent(run_id=f"{job}@4", state="succeeded"))
|
|
404
|
+
bus.emit(StatusEvent(run_id=f"{job}@5", state="failed"))
|
|
405
|
+
bus.emit(StatusEvent(run_id=f"{job}@6", state="failed"))
|
|
406
|
+
assert len(calls) == 1
|
|
407
|
+
bus.emit(StatusEvent(run_id=f"{job}@7", state="failed"))
|
|
408
|
+
assert len(calls) == 2
|
|
409
|
+
|
|
410
|
+
# A different job keeps its own independent counter.
|
|
411
|
+
bus.emit(StatusEvent(run_id="backfill:kraken:BTC/USD:ohlc:3600s@1", state="failed"))
|
|
412
|
+
assert len(calls) == 2
|
|
413
|
+
|
|
414
|
+
def test_no_webhook_no_crash(self):
|
|
415
|
+
from dccd.application.events import EventBus, StatusEvent
|
|
416
|
+
from dccd.application.monitor import HealthMonitor
|
|
417
|
+
|
|
418
|
+
bus = EventBus()
|
|
419
|
+
HealthMonitor(None, bus, webhook_url=None, max_consecutive_errors=1)
|
|
420
|
+
# Must not raise even past threshold when no webhook is configured.
|
|
421
|
+
bus.emit(StatusEvent(run_id="r1", state="failed"))
|
|
422
|
+
bus.emit(StatusEvent(run_id="r1", state="failed"))
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Restart safety regression guards.
|
|
2
|
+
|
|
3
|
+
Durable state must survive a process restart and the scheduler must re-arm its
|
|
4
|
+
recurring work from config alone (so a reboot resumes with no manual step and no
|
|
5
|
+
gap). Verified live on a real `systemctl reboot` of a server in PR #99 — these are
|
|
6
|
+
the cheap regression guards for that behaviour.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import pytest
|
|
10
|
+
|
|
11
|
+
from dccd.application.jobs import JobSpec, JobTarget, Trigger
|
|
12
|
+
from dccd.application.scheduler import Scheduler
|
|
13
|
+
from dccd.domain.symbol import Symbol
|
|
14
|
+
from dccd.domain.types import DataType
|
|
15
|
+
from dccd.storage.runs_sqlite import RunsStore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _interval_spec(every: int = 120) -> JobSpec:
|
|
19
|
+
target = JobTarget(
|
|
20
|
+
exchange="binance",
|
|
21
|
+
symbol=Symbol(base="BTC", quote="USDT"),
|
|
22
|
+
data_type=DataType.OHLC,
|
|
23
|
+
span=3600,
|
|
24
|
+
)
|
|
25
|
+
return JobSpec(
|
|
26
|
+
id=JobSpec.make_id("backfill", target),
|
|
27
|
+
operation="backfill",
|
|
28
|
+
target=target,
|
|
29
|
+
trigger=Trigger(kind="interval", every=every),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_runsstore_survives_reopen(tmp_path):
|
|
34
|
+
"""A fresh RunsStore at the same path keeps prior runs (append, not truncate)."""
|
|
35
|
+
db = tmp_path / "runs.db"
|
|
36
|
+
s1 = RunsStore(db)
|
|
37
|
+
s1.create_run("r1", "spec-1", "backfill", "binance", "BTC/USDT", "ohlc")
|
|
38
|
+
s1.finish_run("r1", "succeeded", rows_written=10)
|
|
39
|
+
s1.create_run("r2", "spec-1", "backfill", "binance", "BTC/USDT", "ohlc")
|
|
40
|
+
s1.finish_run("r2", "succeeded", rows_written=5)
|
|
41
|
+
|
|
42
|
+
# New instance at the same path == a daemon restart.
|
|
43
|
+
s2 = RunsStore(db)
|
|
44
|
+
assert {r["run_id"] for r in s2.list_runs()} == {"r1", "r2"}
|
|
45
|
+
|
|
46
|
+
# A post-restart run appends; the history is not reset.
|
|
47
|
+
s2.create_run("r3", "spec-1", "backfill", "binance", "BTC/USDT", "ohlc")
|
|
48
|
+
assert len(RunsStore(db).list_runs()) == 3
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@pytest.mark.asyncio
|
|
52
|
+
async def test_scheduler_rearms_intervals_from_specs():
|
|
53
|
+
"""A fresh Scheduler re-arms the same interval loop from the same specs.
|
|
54
|
+
|
|
55
|
+
This is the boot reconstruction path: `cmd_start` rebuilds everything from
|
|
56
|
+
config and calls `scheduler.start(cfg.all_job_specs())`. No cross-process
|
|
57
|
+
in-memory state — the spec id is the only thing that carries over.
|
|
58
|
+
"""
|
|
59
|
+
spec = _interval_spec()
|
|
60
|
+
|
|
61
|
+
async def _noop(_spec):
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
sched = Scheduler(registry=None, store=None) # type: ignore[arg-type]
|
|
65
|
+
sched._run_once = _noop # type: ignore[assignment]
|
|
66
|
+
sched._running = True
|
|
67
|
+
await sched.sync_intervals([spec])
|
|
68
|
+
assert spec.id in sched._interval_loops
|
|
69
|
+
await sched.stop()
|
|
70
|
+
|
|
71
|
+
# A second, independent Scheduler from the same spec arms it identically.
|
|
72
|
+
sched2 = Scheduler(registry=None, store=None) # type: ignore[arg-type]
|
|
73
|
+
sched2._run_once = _noop # type: ignore[assignment]
|
|
74
|
+
sched2._running = True
|
|
75
|
+
await sched2.sync_intervals([spec])
|
|
76
|
+
assert spec.id in sched2._interval_loops
|
|
77
|
+
await sched2.stop()
|
|
@@ -74,6 +74,7 @@ dccd/tests/v3/test_domain_extended.py
|
|
|
74
74
|
dccd/tests/v3/test_network.py
|
|
75
75
|
dccd/tests/v3/test_purge.py
|
|
76
76
|
dccd/tests/v3/test_remote_sync.py
|
|
77
|
+
dccd/tests/v3/test_restart.py
|
|
77
78
|
dccd/tests/v3/test_restore.py
|
|
78
79
|
dccd/tests/v3/test_sources.py
|
|
79
80
|
dccd/tests/v3/test_storage.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|