dccd 3.5.0__tar.gz → 3.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. {dccd-3.5.0 → dccd-3.5.2}/CHANGELOG.md +53 -0
  2. {dccd-3.5.0 → dccd-3.5.2}/PKG-INFO +1 -1
  3. {dccd-3.5.0 → dccd-3.5.2}/dccd/application/config.py +11 -0
  4. {dccd-3.5.0 → dccd-3.5.2}/dccd/application/monitor.py +46 -17
  5. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/api/app.py +38 -7
  6. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/cli/main.py +10 -0
  7. {dccd-3.5.0 → dccd-3.5.2}/dccd/sources/okx.py +3 -1
  8. {dccd-3.5.0 → dccd-3.5.2}/dccd/storage/purge.py +5 -3
  9. {dccd-3.5.0 → dccd-3.5.2}/dccd/storage/remote.py +33 -8
  10. {dccd-3.5.0 → dccd-3.5.2}/dccd/storage/runs_sqlite.py +59 -3
  11. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_api.py +220 -0
  12. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_cli.py +142 -0
  13. dccd-3.5.2/dccd/tests/v3/test_monitor_webhook.py +150 -0
  14. dccd-3.5.2/dccd/tests/v3/test_remote_sync.py +339 -0
  15. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_sources.py +118 -0
  16. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_storage.py +47 -0
  17. {dccd-3.5.0 → dccd-3.5.2}/dccd.egg-info/PKG-INFO +1 -1
  18. {dccd-3.5.0 → dccd-3.5.2}/dccd.egg-info/SOURCES.txt +1 -0
  19. {dccd-3.5.0 → dccd-3.5.2}/pyproject.toml +1 -1
  20. dccd-3.5.0/dccd/tests/v3/test_remote_sync.py +0 -172
  21. {dccd-3.5.0 → dccd-3.5.2}/CLAUDE.md +0 -0
  22. {dccd-3.5.0 → dccd-3.5.2}/CONTRIBUTING.md +0 -0
  23. {dccd-3.5.0 → dccd-3.5.2}/LICENSE.txt +0 -0
  24. {dccd-3.5.0 → dccd-3.5.2}/MANIFEST.in +0 -0
  25. {dccd-3.5.0 → dccd-3.5.2}/README.md +0 -0
  26. {dccd-3.5.0 → dccd-3.5.2}/dccd/__init__.py +0 -0
  27. {dccd-3.5.0 → dccd-3.5.2}/dccd/application/__init__.py +0 -0
  28. {dccd-3.5.0 → dccd-3.5.2}/dccd/application/events.py +0 -0
  29. {dccd-3.5.0 → dccd-3.5.2}/dccd/application/jobs.py +0 -0
  30. {dccd-3.5.0 → dccd-3.5.2}/dccd/application/operations.py +0 -0
  31. {dccd-3.5.0 → dccd-3.5.2}/dccd/application/registry.py +0 -0
  32. {dccd-3.5.0 → dccd-3.5.2}/dccd/application/scheduler.py +0 -0
  33. {dccd-3.5.0 → dccd-3.5.2}/dccd/application/service_factory.py +0 -0
  34. {dccd-3.5.0 → dccd-3.5.2}/dccd/domain/__init__.py +0 -0
  35. {dccd-3.5.0 → dccd-3.5.2}/dccd/domain/capability.py +0 -0
  36. {dccd-3.5.0 → dccd-3.5.2}/dccd/domain/dataset.py +0 -0
  37. {dccd-3.5.0 → dccd-3.5.2}/dccd/domain/errors.py +0 -0
  38. {dccd-3.5.0 → dccd-3.5.2}/dccd/domain/records.py +0 -0
  39. {dccd-3.5.0 → dccd-3.5.2}/dccd/domain/symbol.py +0 -0
  40. {dccd-3.5.0 → dccd-3.5.2}/dccd/domain/timeutils.py +0 -0
  41. {dccd-3.5.0 → dccd-3.5.2}/dccd/domain/transforms.py +0 -0
  42. {dccd-3.5.0 → dccd-3.5.2}/dccd/domain/types.py +0 -0
  43. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/__init__.py +0 -0
  44. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/api/__init__.py +0 -0
  45. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/cli/__init__.py +0 -0
  46. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/ui/__init__.py +0 -0
  47. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/ui/static/favicon.svg +0 -0
  48. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/ui/static/logo.svg +0 -0
  49. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/ui/templates/base.html +0 -0
  50. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/ui/templates/config.html +0 -0
  51. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/ui/templates/dashboard.html +0 -0
  52. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/ui/templates/data.html +0 -0
  53. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/ui/templates/historical.html +0 -0
  54. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/ui/templates/live.html +0 -0
  55. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/ui/templates/login.html +0 -0
  56. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/ui/templates/logs.html +0 -0
  57. {dccd-3.5.0 → dccd-3.5.2}/dccd/interfaces/ui/templates/storage.html +0 -0
  58. {dccd-3.5.0 → dccd-3.5.2}/dccd/sources/__init__.py +0 -0
  59. {dccd-3.5.0 → dccd-3.5.2}/dccd/sources/base.py +0 -0
  60. {dccd-3.5.0 → dccd-3.5.2}/dccd/sources/binance.py +0 -0
  61. {dccd-3.5.0 → dccd-3.5.2}/dccd/sources/bitfinex.py +0 -0
  62. {dccd-3.5.0 → dccd-3.5.2}/dccd/sources/bitmex.py +0 -0
  63. {dccd-3.5.0 → dccd-3.5.2}/dccd/sources/bybit.py +0 -0
  64. {dccd-3.5.0 → dccd-3.5.2}/dccd/sources/coinbase.py +0 -0
  65. {dccd-3.5.0 → dccd-3.5.2}/dccd/sources/kraken.py +0 -0
  66. {dccd-3.5.0 → dccd-3.5.2}/dccd/sources/registry.py +0 -0
  67. {dccd-3.5.0 → dccd-3.5.2}/dccd/storage/__init__.py +0 -0
  68. {dccd-3.5.0 → dccd-3.5.2}/dccd/storage/coverage_sqlite.py +0 -0
  69. {dccd-3.5.0 → dccd-3.5.2}/dccd/storage/parquet.py +0 -0
  70. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/__init__.py +0 -0
  71. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/__init__.py +0 -0
  72. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_adapter_parsing.py +0 -0
  73. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_application.py +0 -0
  74. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_backfill_lookback.py +0 -0
  75. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_client.py +0 -0
  76. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_coverage.py +0 -0
  77. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_domain.py +0 -0
  78. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_domain_extended.py +0 -0
  79. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_network.py +0 -0
  80. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_orderbook_throttle.py +0 -0
  81. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_purge.py +0 -0
  82. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_ratelimit.py +0 -0
  83. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_restart.py +0 -0
  84. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_restore.py +0 -0
  85. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_scheduler_hygiene.py +0 -0
  86. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_storage_extended.py +0 -0
  87. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_stream_end_state.py +0 -0
  88. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_stream_flush.py +0 -0
  89. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_stream_nocapability.py +0 -0
  90. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_transport.py +0 -0
  91. {dccd-3.5.0 → dccd-3.5.2}/dccd/tests/v3/test_ws_subscription_honesty.py +0 -0
  92. {dccd-3.5.0 → dccd-3.5.2}/dccd/transport/__init__.py +0 -0
  93. {dccd-3.5.0 → dccd-3.5.2}/dccd/transport/http.py +0 -0
  94. {dccd-3.5.0 → dccd-3.5.2}/dccd/transport/paginate.py +0 -0
  95. {dccd-3.5.0 → dccd-3.5.2}/dccd/transport/ratelimit.py +0 -0
  96. {dccd-3.5.0 → dccd-3.5.2}/dccd/transport/ws.py +0 -0
  97. {dccd-3.5.0 → dccd-3.5.2}/dccd.egg-info/dependency_links.txt +0 -0
  98. {dccd-3.5.0 → dccd-3.5.2}/dccd.egg-info/entry_points.txt +0 -0
  99. {dccd-3.5.0 → dccd-3.5.2}/dccd.egg-info/requires.txt +0 -0
  100. {dccd-3.5.0 → dccd-3.5.2}/dccd.egg-info/top_level.txt +0 -0
  101. {dccd-3.5.0 → dccd-3.5.2}/setup.cfg +0 -0
@@ -16,6 +16,59 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
16
16
 
17
17
  ### Removed
18
18
 
19
+ ## [3.5.2] - 2026-06-12
20
+
21
+ ### Added
22
+
23
+ - Boot-time runs.db retention (`settings.runs_retention_days`, default 90,
24
+ `0` disables): terminal non-failed runs (`succeeded`/`stale`/`cancelled`)
25
+ older than the window are deleted and the database VACUUMed at daemon
26
+ start, right after the orphan sweep; `failed` runs are kept as the
27
+ long-term error journal. Verified on a copy of the production runs.db:
28
+ 1,770 old rows pruned, file size −67 %, `failed` rows untouched. (#154)
29
+
30
+ ### Fixed
31
+
32
+ - Webhook alerts send a plain-text body with `X-Title: dccd` /
33
+ `X-Priority: high` headers for ntfy-style endpoints — the phone showed a
34
+ raw JSON blob before; Slack webhooks (`hooks.slack.com`) keep the JSON
35
+ `{"text": …}` payload. Verified live: one test message delivered to the
36
+ production ntfy topic (HTTP 200) rendered as plain text. (#155)
37
+ - Manual backfill triggers (`POST /api/backfill`, `/api/jobs/run`,
38
+ `/api/jobs/run-all`) are idempotent: a spec that is already being
39
+ backfilled returns the existing `run_id` (`status: already-running`) —
40
+ run-all skips busy jobs and lists them under `already_running` — instead
41
+ of starting a duplicate concurrent run that wasted exchange requests and
42
+ confused runs/progress. (#153)
43
+ - Off-box sync no longer mirrors deletions: `RemoteStorage` runs
44
+ `rclone copy` instead of `rclone sync`, so locally purged files survive
45
+ on the remote for read-through restore — enabling `min_free_gb` no longer
46
+ risks deleting the only copy of old data. The remote is now an archive
47
+ superset (never deleted automatically; remote cleanup is manual).
48
+ Verified live against a real rclone remote: purge → sync → file survives
49
+ → `restore()` returns byte-identical content. (#152)
50
+
51
+ ## [3.5.1] - 2026-06-12
52
+
53
+ ### Fixed
54
+
55
+ - `dccd start` marked its own just-started stream runs `stale` at boot: the
56
+ orphan sweep (`mark_stale_running`) ran in the FastAPI lifespan *after*
57
+ `cmd_start` had already started the scheduler's stream workers, so their
58
+ fresh `running` rows were swept as "orphaned by daemon restart" and the
59
+ Dashboard "Active now" never showed streams. The sweep now runs in
60
+ `cmd_start` before the scheduler starts; the lifespan only sweeps in
61
+ standalone `dccd ui`. Verified live across two daemon launches: the live
62
+ run stays `running`; a restart stales only the previous one. (#145)
63
+ - OKX OHLC pagination silently dropped the bar at every 100-bar page
64
+ boundary: OKX `before`/`after` cursors are exclusive, so passing
65
+ `before=start_ms` excluded the bar exactly at each window start (observed
66
+ in production as 431 one-minute gaps per OKX pair, spaced exactly
67
+ 100 min). `fetch_ohlc_page` now sends `before=start_ms-1`; regression
68
+ test drives the paginator across a page boundary under faithful exclusive
69
+ semantics. Verified live: a 12 h OKX 1m backfill lands with 0 gaps and
70
+ all 7 boundary bars present. (#144)
71
+
19
72
  ## [3.5.0] - 2026-06-11
20
73
 
21
74
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dccd
3
- Version: 3.5.0
3
+ Version: 3.5.2
4
4
  Summary: Download Crypto Currency Data — hexagonal architecture, async-first.
5
5
  Author-email: Arthur Bernard <arthur.bernard.92@gmail.com>
6
6
  License: MIT
@@ -46,6 +46,9 @@ class SettingsConfig(BaseModel):
46
46
  - ``ui_trusted_proxy`` — trust ``X-Forwarded-For`` as the rate-limit client key.
47
47
  Enable **only** behind a reverse proxy that overwrites the header, else a direct
48
48
  client can forge it and bypass the limit.
49
+ - ``runs_retention_days`` — delete terminal non-failed runs (``succeeded``,
50
+ ``stale``, ``cancelled``) older than this many days at daemon boot. ``0``
51
+ disables the sweep (rows accumulate indefinitely). Default ``90``.
49
52
  """
50
53
  data_path: str = "./data/crypto"
51
54
  timezone: str = "local"
@@ -56,6 +59,7 @@ class SettingsConfig(BaseModel):
56
59
  ui_readonly: bool = False
57
60
  ui_rate_limit: int = 0
58
61
  ui_trusted_proxy: bool = False
62
+ runs_retention_days: int = 90
59
63
 
60
64
  @field_validator("data_path")
61
65
  @classmethod
@@ -69,6 +73,13 @@ class SettingsConfig(BaseModel):
69
73
  raise ValueError("ui_rate_limit must be >= 0")
70
74
  return v
71
75
 
76
+ @field_validator("runs_retention_days")
77
+ @classmethod
78
+ def _non_negative_retention(cls, v: int) -> int:
79
+ if v < 0:
80
+ raise ValueError("runs_retention_days must be >= 0")
81
+ return v
82
+
72
83
  @field_validator("timezone")
73
84
  @classmethod
74
85
  def _validate_tz(cls, v: str) -> str:
@@ -26,6 +26,17 @@ class HealthMonitor:
26
26
  job does not flood a webhook. The count (and cooldown) reset on the first
27
27
  success.
28
28
 
29
+ Webhook format
30
+ --------------
31
+ - **Slack** (hostname is or ends with ``hooks.slack.com``): JSON body
32
+ ``{"text": msg}`` with ``Content-Type: application/json`` — the current
33
+ behaviour.
34
+ - **All other endpoints** (e.g. ntfy): plain-text body with headers
35
+ ``Content-Type: text/plain``, ``X-Title: dccd``, and
36
+ ``X-Priority: high``. ntfy renders the raw body as the notification
37
+ message; the old JSON blob caused the phone to show ``{"text": "…"}``
38
+ instead of the actual alert.
39
+
29
40
  Parameters
30
41
  ----------
31
42
  runs_store : RunsStore
@@ -77,25 +88,43 @@ class HealthMonitor:
77
88
  self._last_alert_ts.pop(key, None)
78
89
  self._last_webhook_err_ts.pop(key, None)
79
90
 
91
+ def _post_webhook(self, msg: str, run_id: str) -> None:
92
+ """Send *msg* to ``self._webhook``, choosing the right payload format.
93
+
94
+ Slack endpoints (hostname ``hooks.slack.com`` or subdomains) receive
95
+ ``{"text": msg}`` JSON. All other endpoints (ntfy, custom) receive a
96
+ plain-text body so the notification shows the message directly.
97
+ """
98
+ import json
99
+ import urllib.parse
100
+ import urllib.request
101
+
102
+ assert self._webhook is not None # caller (_alert) guards this
103
+ hostname: str = urllib.parse.urlsplit(self._webhook).hostname or ""
104
+ if hostname == "hooks.slack.com" or hostname.endswith(".hooks.slack.com"):
105
+ data = json.dumps({"text": msg}).encode()
106
+ headers: dict[str, str] = {"Content-Type": "application/json"}
107
+ else:
108
+ data = msg.encode()
109
+ headers = {
110
+ "Content-Type": "text/plain",
111
+ "X-Title": "dccd",
112
+ "X-Priority": "high",
113
+ }
114
+ req = urllib.request.Request(self._webhook, data=data, headers=headers)
115
+ try:
116
+ with urllib.request.urlopen(req, timeout=5):
117
+ pass
118
+ except Exception as exc:
119
+ # Log webhook-send failures at most once per cooldown window.
120
+ last_err = self._last_webhook_err_ts.get(run_id, 0.0)
121
+ if time.monotonic() - last_err >= _ALERT_COOLDOWN_S:
122
+ logger.warning("Webhook alert failed: %s", exc)
123
+ self._last_webhook_err_ts[run_id] = time.monotonic()
124
+
80
125
  def _alert(self, run_id: str, count: int) -> None:
81
126
  msg = f"dccd alert: {run_id} failed {count} times consecutively."
82
127
  logger.error(msg)
83
128
  self._last_alert_ts[run_id] = time.monotonic()
84
129
  if self._webhook:
85
- try:
86
- import json
87
- import urllib.request
88
- data = json.dumps({"text": msg}).encode()
89
- req = urllib.request.Request(
90
- self._webhook,
91
- data=data,
92
- headers={"Content-Type": "application/json"},
93
- )
94
- with urllib.request.urlopen(req, timeout=5):
95
- pass
96
- except Exception as exc:
97
- # Log webhook-send failures at most once per cooldown window.
98
- last_err = self._last_webhook_err_ts.get(run_id, 0.0)
99
- if time.monotonic() - last_err >= _ALERT_COOLDOWN_S:
100
- logger.warning("Webhook alert failed: %s", exc)
101
- self._last_webhook_err_ts[run_id] = time.monotonic()
130
+ self._post_webhook(msg, run_id)
@@ -191,12 +191,20 @@ def create_app(
191
191
  app.state.config_path = config_path
192
192
  app.state.store = build_store(cfg.settings.data_path)
193
193
  app.state.runs_store = build_runs_store(cfg.settings.data_path)
194
- # Mark any runs left in 'running' state as 'stale' — they were
195
- # orphaned by the previous daemon crash/SIGKILL and would otherwise
196
- # pollute active_runs() and the Dashboard forever.
197
- _stale_count = app.state.runs_store.mark_stale_running()
198
- if _stale_count:
199
- logger.warning("marked %d orphaned run(s) stale (daemon restarted)", _stale_count)
194
+ # Mark any runs left in 'running' state as 'stale'. For standalone
195
+ # `dccd ui` (scheduler is None) this is the right call site no
196
+ # scheduler has started any workers yet. For `dccd start` the sweep
197
+ # happens in cmd_start *before* the scheduler starts, so we must skip
198
+ # it here: sweeping at this point would stale-out the stream-worker
199
+ # rows the scheduler just created (the exact misuse warned about in
200
+ # RunsStore.mark_stale_running's docstring).
201
+ if scheduler is None:
202
+ _stale_count = app.state.runs_store.mark_stale_running()
203
+ if _stale_count:
204
+ logger.warning("marked %d orphaned run(s) stale (daemon restarted)", _stale_count)
205
+ _pruned_count = app.state.runs_store.prune_old_runs(cfg.settings.runs_retention_days)
206
+ if _pruned_count:
207
+ logger.info("pruned %d old terminal run(s) from runs.db (retention: %dd)", _pruned_count, cfg.settings.runs_retention_days)
200
208
  app.state.coverage_store = build_coverage_store(cfg.settings.data_path)
201
209
  app.state.event_bus = EventBus()
202
210
  app.state.registry = build_registry()
@@ -443,6 +451,14 @@ def create_app(
443
451
  await request.app.state.scheduler.sync_streams(request.app.state.all_specs)
444
452
  await request.app.state.scheduler.sync_intervals(request.app.state.all_specs)
445
453
 
454
+ async def _active_run_for(request: Request, spec_id: str) -> str | None:
455
+ """Return the run_id of the first active run matching *spec_id*, or None."""
456
+ active = await asyncio.to_thread(_runs(request).active_runs)
457
+ for row in active:
458
+ if row.get("spec_id") == spec_id:
459
+ return str(row["run_id"])
460
+ return None
461
+
446
462
  def _run_backfill_tracked(request: Request, spec: JobSpec, run_id: str) -> None:
447
463
  """Spawn a backfill with a cancellable stop event registered by run_id."""
448
464
  reg = _reg(request)
@@ -580,6 +596,11 @@ def create_app(
580
596
  origin="runtime",
581
597
  )
582
598
 
599
+ # Idempotency guard: if the spec is already running, return the existing run_id.
600
+ existing = await _active_run_for(request, spec.id)
601
+ if existing is not None:
602
+ return {"run_id": existing, "status": "already-running"}
603
+
583
604
  # Generate a URL-safe run_id and pass it into backfill() so both the
584
605
  # API polling endpoint and the RunsStore use the same identifier.
585
606
  # We use a short UUID (no slashes) instead of embedding spec.id which
@@ -683,6 +704,11 @@ def create_app(
683
704
  if spec.operation != "backfill":
684
705
  raise HTTPException(400, "Only backfill jobs can be triggered manually; use /api/streams/start for stream jobs")
685
706
 
707
+ # Idempotency guard: if the spec is already running, return the existing run_id.
708
+ existing = await _active_run_for(request, spec.id)
709
+ if existing is not None:
710
+ return {"run_id": existing, "status": "already-running", "job_id": job_id}
711
+
686
712
  run_id = str(_uuid.uuid4())
687
713
  _run_backfill_tracked(request, spec, run_id)
688
714
 
@@ -696,12 +722,17 @@ def create_app(
696
722
  backfill_specs = [s for s in specs if s.operation == "backfill" and s.enabled]
697
723
 
698
724
  run_ids = []
725
+ already_running = []
699
726
  for spec in backfill_specs:
727
+ existing = await _active_run_for(request, spec.id)
728
+ if existing is not None:
729
+ already_running.append({"run_id": existing, "job_id": spec.id})
730
+ continue
700
731
  run_id = str(_uuid.uuid4())
701
732
  run_ids.append({"run_id": run_id, "job_id": spec.id})
702
733
  _run_backfill_tracked(request, spec, run_id)
703
734
 
704
- return {"started": len(run_ids), "runs": run_ids}
735
+ return {"started": len(run_ids), "runs": run_ids, "already_running": already_running}
705
736
 
706
737
  # -----------------------------------------------------------------------
707
738
  # Config
@@ -160,6 +160,16 @@ def cmd_start(
160
160
  cfg, cfg_path = _load_cfg(config)
161
161
  store = build_store(cfg.settings.data_path)
162
162
  runs_store = build_runs_store(cfg.settings.data_path)
163
+ # Sweep orphaned runs *before* the scheduler starts any stream workers so
164
+ # the lifespan (which opens the same DB) does not stale-out legitimate rows
165
+ # that the scheduler just created. This is the correct boot-path call site
166
+ # warned about in RunsStore.mark_stale_running's docstring.
167
+ _stale = runs_store.mark_stale_running()
168
+ if _stale > 0:
169
+ typer.echo(f"Marked {_stale} orphaned run(s) stale (daemon restarted)")
170
+ _pruned = runs_store.prune_old_runs(cfg.settings.runs_retention_days)
171
+ if _pruned > 0:
172
+ typer.echo(f"Pruned {_pruned} old terminal run(s) from runs.db (retention: {cfg.settings.runs_retention_days}d)")
163
173
  coverage_store = build_coverage_store(cfg.settings.data_path)
164
174
  registry = build_registry()
165
175
  bus = EventBus()
@@ -102,10 +102,12 @@ class OKXSource(
102
102
  return []
103
103
 
104
104
  pair = self.render_symbol(symbol)
105
+ # OKX `before`/`after` are exclusive bounds. Without `- 1` the bar
106
+ # exactly at each page-window start would be silently dropped.
105
107
  params: dict[str, Any] = {
106
108
  "instId": pair,
107
109
  "bar": bar,
108
- "before": str(start_ns // 1_000_000),
110
+ "before": str(start_ns // 1_000_000 - 1),
109
111
  "after": str(end_ns // 1_000_000),
110
112
  "limit": min(limit, 100),
111
113
  }
@@ -2,9 +2,11 @@
2
2
  pressure.
3
3
 
4
4
  **Safety contract**: this deletes local data that is recoverable only from the
5
- remote, so it must be called **only when the remote mirror is up to date** — in
5
+ remote, so it must be called **only when the remote archive is up to date** — in
6
6
  practice, right after a successful :func:`~dccd.application.operations.sync_remote`
7
- cycle. The coverage manifest (``CoverageStore``) preserves the resume cursor, so a
7
+ cycle. Because :class:`~dccd.storage.remote.RemoteStorage` uses ``rclone copy``
8
+ (never ``rclone sync``), purged files remain on the remote and can be pulled
9
+ back by :meth:`~dccd.storage.remote.RemoteStorage.restore`. The coverage manifest (``CoverageStore``) preserves the resume cursor, so a
8
10
  later ``backfill(start="last")`` still continues from where collection left off;
9
11
  reads of purged ranges return what's local until read-through restore pulls them
10
12
  back.
@@ -41,7 +43,7 @@ def purge_to_free_space(
41
43
  """Delete oldest Parquet files until free space reaches ``min_free_gb``.
42
44
 
43
45
  Files are removed oldest-first (by mtime), so recent data stays local while
44
- old data — already mirrored off-box — is dropped. The ``.dccd`` directory is
46
+ old data — already copied off-box — is dropped. The ``.dccd`` directory is
45
47
  excluded. No-op when ``min_free_gb <= 0`` or free space is already above the
46
48
  threshold.
47
49
 
@@ -1,4 +1,17 @@
1
- """Remote storage sync via rclone."""
1
+ """Remote storage copy via rclone.
2
+
3
+ The remote is an *archive superset* of the local store: files are copied
4
+ off-box but never deleted remotely. Local = hot tier (fast access, space
5
+ pressure managed by the purge subsystem); remote = complete history archive.
6
+ This means:
7
+
8
+ - A local file that is purged to free disk space still exists on the remote
9
+ and can be pulled back by :meth:`RemoteStorage.restore` (read-through
10
+ restore).
11
+ - Remote cleanup (removing datasets you no longer want) is a **manual**
12
+ operation — use ``rclone delete`` or the provider console. dccd will
13
+ never delete from the remote automatically.
14
+ """
2
15
 
3
16
  from __future__ import annotations
4
17
 
@@ -13,12 +26,18 @@ logger = logging.getLogger(__name__)
13
26
 
14
27
 
15
28
  class RemoteStorage:
16
- """Sync local data to one or more rclone remotes.
29
+ """Copy local data to one or more rclone remotes (non-destructive).
30
+
31
+ Each sync cycle runs ``rclone copy`` (not ``rclone sync``) so that files
32
+ present on the remote but absent locally — e.g. files purged from the hot
33
+ tier to reclaim disk — are **never deleted**. The remote grows
34
+ monotonically and acts as a complete off-box archive; :meth:`restore` pulls
35
+ individual dataset directories back on demand.
17
36
 
18
37
  Parameters
19
38
  ----------
20
39
  local_path : str or Path
21
- Local data directory to sync.
40
+ Local data directory to copy from.
22
41
  remotes : list of dicts
23
42
  Each dict has ``provider`` and ``remote`` keys.
24
43
  """
@@ -32,24 +51,30 @@ class RemoteStorage:
32
51
  self._remotes = remotes or []
33
52
 
34
53
  def sync_one(self, remote: str) -> bool:
35
- """Sync to a single rclone remote. Returns True on success."""
54
+ """Copy local store to a single rclone remote. Returns True on success.
55
+
56
+ Uses ``rclone copy`` (not ``rclone sync``) so files that exist on the
57
+ remote but are absent locally are preserved. This guarantees that
58
+ files purged from the local hot tier remain available on the remote for
59
+ read-through :meth:`restore`.
60
+ """
36
61
  try:
37
62
  result = subprocess.run(
38
- ["rclone", "sync", str(self._local), remote, "--quiet"],
63
+ ["rclone", "copy", str(self._local), remote, "--quiet"],
39
64
  capture_output=True,
40
65
  text=True,
41
66
  timeout=300,
42
67
  )
43
68
  if result.returncode != 0:
44
- logger.error("rclone sync to %s failed: %s", remote, result.stderr)
69
+ logger.error("rclone copy to %s failed: %s", remote, result.stderr)
45
70
  return False
46
- logger.info("Synced to %s", remote)
71
+ logger.info("Copied to %s", remote)
47
72
  return True
48
73
  except FileNotFoundError:
49
74
  logger.error("rclone not found in PATH")
50
75
  return False
51
76
  except subprocess.TimeoutExpired:
52
- logger.error("rclone sync to %s timed out", remote)
77
+ logger.error("rclone copy to %s timed out", remote)
53
78
  return False
54
79
 
55
80
  def restore(self, rel_path: str) -> bool:
@@ -178,6 +178,59 @@ class RunsStore:
178
178
  """Runs currently ``running`` or ``reconnecting``."""
179
179
  return self.list_runs(state="running") + self.list_runs(state="reconnecting")
180
180
 
181
+ def prune_old_runs(self, retention_days: int) -> int:
182
+ """Delete terminal non-failed runs older than *retention_days* days.
183
+
184
+ Runs in states ``succeeded``, ``stale``, and ``cancelled`` that started
185
+ more than *retention_days* days ago are removed. ``failed`` rows are
186
+ intentionally kept as the long-term error journal. The database is
187
+ ``VACUUM``-ed after any deletion to reclaim disk space.
188
+
189
+ Parameters
190
+ ----------
191
+ retention_days : int
192
+ Number of days to retain terminal non-failed runs. Pass ``0`` (or
193
+ any value ``<= 0``) to disable pruning; the method returns ``0``
194
+ immediately without touching the database.
195
+
196
+ Returns
197
+ -------
198
+ int
199
+ Number of rows deleted (0 when pruning is disabled or when no rows
200
+ match the cutoff).
201
+
202
+ Notes
203
+ -----
204
+ ``VACUUM`` cannot run inside a transaction. This method opens a
205
+ separate connection (outside the :meth:`_conn` context manager) for the
206
+ ``VACUUM`` statement, which is executed only when at least one row was
207
+ deleted.
208
+
209
+ This method must be called from the daemon boot path *after*
210
+ :meth:`mark_stale_running` so that freshly-staled orphans age normally
211
+ rather than being immediately pruned on the next boot.
212
+ """
213
+ if retention_days <= 0:
214
+ return 0
215
+ import time
216
+ cutoff_ns = int(time.time() * 1_000_000_000) - int(retention_days * 86400 * 1_000_000_000)
217
+ with self._conn() as conn:
218
+ cursor = conn.execute(
219
+ """DELETE FROM runs
220
+ WHERE state IN ('succeeded', 'stale', 'cancelled')
221
+ AND started_at < ?""",
222
+ (cutoff_ns,),
223
+ )
224
+ deleted = cursor.rowcount
225
+ if deleted > 0:
226
+ # VACUUM cannot run inside a transaction — open a plain connection.
227
+ conn2 = sqlite3.connect(str(self._path))
228
+ try:
229
+ conn2.execute("VACUUM")
230
+ finally:
231
+ conn2.close()
232
+ return deleted
233
+
181
234
  def mark_stale_running(self) -> int:
182
235
  """Transition all ``running`` rows to ``stale`` at daemon boot.
183
236
 
@@ -201,9 +254,12 @@ class RunsStore:
201
254
  clearly attributes the state change to a restart rather than a normal
202
255
  completion or a user-visible error.
203
256
 
204
- This method must only be called from the daemon boot path (FastAPI
205
- lifespan startup). Calling it while a daemon is live would incorrectly
206
- stale-out its legitimate active runs.
257
+ This method must only be called from the daemon boot path, before any
258
+ new runs are started: ``cmd_start`` for ``dccd start`` (called before
259
+ the scheduler starts stream workers); the FastAPI lifespan for
260
+ standalone ``dccd ui`` (called before the standalone scheduler is
261
+ created). Calling it while workers are already running would
262
+ incorrectly stale-out their legitimate active runs.
207
263
  """
208
264
  import time
209
265
  now = int(time.time() * 1_000_000_000)