@smilintux/skcapstone 0.10.0 → 0.12.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/.env.example +10 -4
  2. package/.github/workflows/ci.yml +2 -2
  3. package/.github/workflows/publish.yml +9 -2
  4. package/.openclaw-workspace.json +2 -2
  5. package/CLAUDE.md +37 -0
  6. package/MISSION.md +17 -2
  7. package/README.md +282 -3
  8. package/docker/Dockerfile +7 -7
  9. package/docker/compose-templates/dev-team.yml +12 -12
  10. package/docker/compose-templates/mini-team.yml +9 -9
  11. package/docker/compose-templates/ops-team.yml +10 -10
  12. package/docker/compose-templates/research-team.yml +10 -10
  13. package/docker/entrypoint.sh +4 -4
  14. package/docs/ADR-optional-integration-backbone.md +181 -0
  15. package/docs/ARCHITECTURE.md +186 -43
  16. package/docs/BOND_WITH_GROK.md +6 -6
  17. package/docs/CUSTOM_AGENT.md +123 -30
  18. package/docs/DREAMING.md +70 -0
  19. package/docs/GETTING_STARTED.md +7 -7
  20. package/docs/QUICKSTART.md +10 -6
  21. package/docs/SKJOULE_ARCHITECTURE.md +3 -3
  22. package/docs/SOUL_SWAPPER.md +5 -5
  23. package/docs/hammertime-audit.md +402 -0
  24. package/docs/sk-integration-HANDOFF.md +117 -0
  25. package/docs/skscheduler.md +155 -0
  26. package/docs/superpowers/examples/jobs.yaml +31 -0
  27. package/docs/superpowers/plans/2026-06-08-skscheduler.md +1265 -0
  28. package/docs/superpowers/specs/2026-06-08-skscheduler-design.md +186 -0
  29. package/examples/custom-bond-template.json +1 -1
  30. package/examples/grok-feb.json +1 -1
  31. package/examples/queen-ava-feb.json +1 -1
  32. package/launchd/{com.skcapstone.skcomm-heartbeat.plist → com.skcapstone.skcomms-heartbeat.plist} +4 -4
  33. package/launchd/{com.skcapstone.skcomm-queue-drain.plist → com.skcapstone.skcomms-queue-drain.plist} +4 -4
  34. package/launchd/install-launchd.sh +6 -6
  35. package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/index.ts +3 -2
  36. package/package.json +1 -1
  37. package/pyproject.toml +16 -10
  38. package/scripts/archive-sessions.sh +7 -0
  39. package/scripts/check-updates.py +4 -4
  40. package/scripts/install-bundle.sh +8 -8
  41. package/scripts/install.ps1 +12 -11
  42. package/scripts/install.sh +159 -5
  43. package/scripts/model-fallback-monitor.sh +102 -0
  44. package/scripts/nvidia-proxy.mjs +78 -26
  45. package/scripts/refresh-anthropic-token.sh +172 -0
  46. package/scripts/release.sh +98 -0
  47. package/scripts/session-to-memory.py +219 -0
  48. package/scripts/skgateway.mjs +3 -3
  49. package/scripts/telegram-catchup-all.sh +12 -1
  50. package/scripts/verify_install.sh +2 -2
  51. package/scripts/wargov-ufo-capture/README.md +43 -0
  52. package/scripts/wargov-ufo-capture/cdp_capture_release2.py +273 -0
  53. package/scripts/wargov-ufo-capture/cdp_capture_splc_doj.py +246 -0
  54. package/scripts/wargov-ufo-capture/cdp_finish.py +271 -0
  55. package/scripts/wargov-ufo-capture/cdp_probe.py +188 -0
  56. package/scripts/wargov-ufo-capture/cdp_splc_pressrelease.py +101 -0
  57. package/scripts/wargov-ufo-capture/parse_csv.py +95 -0
  58. package/scripts/wargov-ufo-capture/pull_dvids.sh +107 -0
  59. package/scripts/watch-anthropic-token.sh +212 -0
  60. package/scripts/windows/install-tasks.ps1 +7 -7
  61. package/scripts/windows/skcapstone-task.xml +1 -1
  62. package/src/skcapstone/__init__.py +45 -3
  63. package/src/skcapstone/_cli_monolith.py +20 -15
  64. package/src/skcapstone/activity.py +5 -1
  65. package/src/skcapstone/agent_card.py +3 -2
  66. package/src/skcapstone/api.py +41 -40
  67. package/src/skcapstone/auction.py +14 -11
  68. package/src/skcapstone/backup.py +2 -1
  69. package/src/skcapstone/blueprint_registry.py +4 -3
  70. package/src/skcapstone/brain_first.py +238 -0
  71. package/src/skcapstone/changelog.py +1 -1
  72. package/src/skcapstone/chat.py +22 -17
  73. package/src/skcapstone/cli/__init__.py +9 -1
  74. package/src/skcapstone/cli/_common.py +1 -0
  75. package/src/skcapstone/cli/agents_spawner.py +5 -2
  76. package/src/skcapstone/cli/alerts.py +25 -4
  77. package/src/skcapstone/cli/bench.py +15 -15
  78. package/src/skcapstone/cli/chat.py +7 -4
  79. package/src/skcapstone/cli/consciousness.py +5 -2
  80. package/src/skcapstone/cli/context_cmd.py +18 -4
  81. package/src/skcapstone/cli/daemon.py +11 -7
  82. package/src/skcapstone/cli/gtd.py +26 -1
  83. package/src/skcapstone/cli/housekeeping.py +3 -3
  84. package/src/skcapstone/cli/identity_cmd.py +378 -0
  85. package/src/skcapstone/cli/joule_cmd.py +7 -3
  86. package/src/skcapstone/cli/memory.py +8 -6
  87. package/src/skcapstone/cli/peers_dir.py +1 -1
  88. package/src/skcapstone/cli/register_cmd.py +29 -3
  89. package/src/skcapstone/cli/scheduler_cmd.py +167 -0
  90. package/src/skcapstone/cli/session.py +25 -0
  91. package/src/skcapstone/cli/setup.py +96 -29
  92. package/src/skcapstone/cli/shell_cmd.py +53 -1
  93. package/src/skcapstone/cli/skills_cmd.py +2 -2
  94. package/src/skcapstone/cli/soul.py +8 -5
  95. package/src/skcapstone/cli/status.py +37 -11
  96. package/src/skcapstone/cli/telegram.py +21 -0
  97. package/src/skcapstone/cli/test_cmd.py +5 -5
  98. package/src/skcapstone/cli/test_connection.py +2 -2
  99. package/src/skcapstone/cli/upgrade_cmd.py +23 -14
  100. package/src/skcapstone/cli/version_cmd.py +1 -1
  101. package/src/skcapstone/cli/watch_cmd.py +9 -6
  102. package/src/skcapstone/cloud9_bridge.py +14 -14
  103. package/src/skcapstone/codex_setup.py +255 -0
  104. package/src/skcapstone/config_validator.py +7 -4
  105. package/src/skcapstone/consciousness_config.py +5 -1
  106. package/src/skcapstone/consciousness_loop.py +313 -273
  107. package/src/skcapstone/context_loader.py +121 -0
  108. package/src/skcapstone/coord_federation.py +2 -1
  109. package/src/skcapstone/coordination.py +23 -6
  110. package/src/skcapstone/crush_integration.py +2 -1
  111. package/src/skcapstone/daemon.py +132 -77
  112. package/src/skcapstone/dashboard.py +10 -10
  113. package/src/skcapstone/data/sk-agent-picker.sh +421 -0
  114. package/src/skcapstone/data/systemd/skcapstone-api.socket +9 -0
  115. package/src/skcapstone/data/systemd/skcapstone-memory-compress.service +18 -0
  116. package/src/skcapstone/data/systemd/skcapstone-memory-compress.timer +11 -0
  117. package/src/skcapstone/data/systemd/skcapstone.service +37 -0
  118. package/src/skcapstone/data/systemd/skcapstone@.service +50 -0
  119. package/src/skcapstone/data/systemd/skcomms-heartbeat.service +18 -0
  120. package/{systemd/skcomm-heartbeat.timer → src/skcapstone/data/systemd/skcomms-heartbeat.timer} +2 -2
  121. package/src/skcapstone/data/systemd/skcomms-queue-drain.service +17 -0
  122. package/{systemd/skcomm-queue-drain.timer → src/skcapstone/data/systemd/skcomms-queue-drain.timer} +2 -2
  123. package/src/skcapstone/defaults/claude/CLAUDE.md +67 -0
  124. package/src/skcapstone/defaults/claude/settings.json +74 -0
  125. package/src/skcapstone/defaults/lumina/config/claude-hooks.md +57 -0
  126. package/src/skcapstone/defaults/lumina/config/skgraph.yaml +55 -10
  127. package/src/skcapstone/defaults/lumina/config/skmemory.yaml +79 -13
  128. package/src/skcapstone/defaults/lumina/config/skvector.yaml +60 -9
  129. package/src/skcapstone/defaults/lumina/memory/long-term/18b9c0d1e2f3-cloud9-protocol.json +2 -2
  130. package/src/skcapstone/defaults/lumina/memory/long-term/a1b2c3d4e5f6-ecosystem-overview.json +2 -2
  131. package/src/skcapstone/defaults/lumina/memory/long-term/b2c3d4e5f6a7-five-pillars.json +9 -9
  132. package/src/skcapstone/defaults/lumina/memory/long-term/d4e5f6a7b8c9-site-directory.json +2 -2
  133. package/src/skcapstone/defaults/unhinged.json +13 -0
  134. package/src/skcapstone/discovery.py +43 -20
  135. package/src/skcapstone/doctor.py +941 -22
  136. package/src/skcapstone/dreaming.py +1183 -109
  137. package/src/skcapstone/emotion_tracker.py +2 -2
  138. package/src/skcapstone/export.py +4 -3
  139. package/src/skcapstone/fuse_mount.py +14 -12
  140. package/src/skcapstone/gui_installer.py +2 -2
  141. package/src/skcapstone/heartbeat.py +1 -1
  142. package/src/skcapstone/housekeeping.py +14 -14
  143. package/src/skcapstone/install_wizard.py +209 -7
  144. package/src/skcapstone/itil.py +13 -4
  145. package/src/skcapstone/kms_scheduler.py +10 -8
  146. package/src/skcapstone/launchd.py +19 -19
  147. package/src/skcapstone/mcp_launcher.py +15 -1
  148. package/src/skcapstone/mcp_server.py +83 -49
  149. package/src/skcapstone/mcp_tools/__init__.py +2 -0
  150. package/src/skcapstone/mcp_tools/_helpers.py +2 -2
  151. package/src/skcapstone/mcp_tools/ansible_tools.py +7 -4
  152. package/src/skcapstone/mcp_tools/brain_first_tools.py +90 -0
  153. package/src/skcapstone/mcp_tools/capauth_tools.py +7 -4
  154. package/src/skcapstone/mcp_tools/comm_tools.py +10 -10
  155. package/src/skcapstone/mcp_tools/coord_tools.py +8 -4
  156. package/src/skcapstone/mcp_tools/did_tools.py +11 -8
  157. package/src/skcapstone/mcp_tools/gtd_tools.py +4 -4
  158. package/src/skcapstone/mcp_tools/memory_tools.py +6 -2
  159. package/src/skcapstone/mcp_tools/notification_tools.py +22 -6
  160. package/src/skcapstone/mcp_tools/{skcomm_tools.py → skcomms_tools.py} +14 -14
  161. package/src/skcapstone/mcp_tools/soul_tools.py +8 -2
  162. package/src/skcapstone/mdns_discovery.py +2 -2
  163. package/src/skcapstone/memory_curator.py +1 -1
  164. package/src/skcapstone/memory_engine.py +10 -3
  165. package/src/skcapstone/metrics.py +30 -16
  166. package/src/skcapstone/migrate_memories.py +4 -3
  167. package/src/skcapstone/migrate_multi_agent.py +8 -7
  168. package/src/skcapstone/models.py +47 -5
  169. package/src/skcapstone/notifications.py +42 -18
  170. package/src/skcapstone/onboard.py +875 -121
  171. package/src/skcapstone/operator_link.py +170 -0
  172. package/src/skcapstone/peer_directory.py +4 -4
  173. package/src/skcapstone/peers.py +19 -19
  174. package/src/skcapstone/pillars/__init__.py +7 -5
  175. package/src/skcapstone/pillars/consciousness.py +191 -0
  176. package/src/skcapstone/pillars/identity.py +51 -7
  177. package/src/skcapstone/pillars/memory.py +9 -3
  178. package/src/skcapstone/pillars/sync.py +2 -2
  179. package/src/skcapstone/preflight.py +3 -3
  180. package/src/skcapstone/providers/docker.py +28 -28
  181. package/src/skcapstone/register.py +6 -6
  182. package/src/skcapstone/registry_client.py +5 -4
  183. package/src/skcapstone/runtime.py +14 -3
  184. package/src/skcapstone/scheduled_tasks.py +254 -19
  185. package/src/skcapstone/scheduler_jobs.py +456 -0
  186. package/src/skcapstone/scheduler_runner.py +239 -0
  187. package/src/skcapstone/scheduler_state.py +162 -0
  188. package/src/skcapstone/sdk.py +310 -0
  189. package/src/skcapstone/service_health.py +279 -39
  190. package/src/skcapstone/session_briefing.py +108 -0
  191. package/src/skcapstone/session_capture.py +1 -1
  192. package/src/skcapstone/shell.py +7 -1
  193. package/src/skcapstone/soul.py +3 -1
  194. package/src/skcapstone/soul_switch.py +3 -1
  195. package/src/skcapstone/summary.py +6 -6
  196. package/src/skcapstone/sync_engine.py +15 -15
  197. package/src/skcapstone/sync_watcher.py +2 -2
  198. package/src/skcapstone/systemd.py +55 -21
  199. package/src/skcapstone/team_comms.py +8 -8
  200. package/src/skcapstone/team_engine.py +1 -1
  201. package/src/skcapstone/testrunner.py +3 -3
  202. package/src/skcapstone/trust_graph.py +40 -5
  203. package/src/skcapstone/unified_search.py +15 -6
  204. package/src/skcapstone/uninstall_wizard.py +11 -3
  205. package/src/skcapstone/version_check.py +8 -4
  206. package/src/skcapstone/warmth_anchor.py +4 -2
  207. package/src/skcapstone/whoami.py +4 -4
  208. package/systemd/skcapstone.service +4 -6
  209. package/systemd/skcapstone@.service +7 -8
  210. package/systemd/skcomms-heartbeat.service +21 -0
  211. package/systemd/skcomms-heartbeat.timer +12 -0
  212. package/systemd/skcomms-queue-drain.service +17 -0
  213. package/systemd/skcomms-queue-drain.timer +12 -0
  214. package/tests/conftest.py +39 -0
  215. package/tests/integration/test_consciousness_e2e.py +39 -39
  216. package/tests/test_agent_card.py +1 -1
  217. package/tests/test_agent_home_scaffold.py +34 -0
  218. package/tests/test_alerts_consumer_topics.py +27 -0
  219. package/tests/test_backup.py +2 -1
  220. package/tests/test_chat.py +6 -6
  221. package/tests/test_claude_md.py +2 -2
  222. package/tests/test_cli_skills.py +10 -10
  223. package/tests/test_cli_test_cmd.py +4 -4
  224. package/tests/test_cli_test_connection.py +1 -1
  225. package/tests/test_cloud9_bridge.py +6 -6
  226. package/tests/test_consciousness_e2e.py +1 -1
  227. package/tests/test_consciousness_loop.py +10 -10
  228. package/tests/test_coordination.py +25 -0
  229. package/tests/test_cross_package.py +21 -21
  230. package/tests/test_daemon.py +4 -4
  231. package/tests/test_daemon_shutdown.py +1 -1
  232. package/tests/test_docker_provider.py +29 -29
  233. package/tests/test_doctor.py +400 -0
  234. package/tests/test_doctor_skscheduler.py +50 -0
  235. package/tests/test_dreaming_engine.py +147 -0
  236. package/tests/test_dreaming_gtd_capture.py +35 -0
  237. package/tests/test_e2e_automated.py +8 -5
  238. package/tests/test_fuse_mount.py +10 -10
  239. package/tests/test_gtd_brief.py +46 -0
  240. package/tests/test_gtd_malformed_tolerance.py +31 -0
  241. package/tests/test_housekeeping.py +15 -15
  242. package/tests/test_identity_migrate.py +251 -0
  243. package/tests/test_integration_backbone.py +598 -0
  244. package/tests/test_itil_gtd_lifecycle.py +37 -0
  245. package/tests/test_jobs_dropins.py +84 -0
  246. package/tests/test_mcp_server.py +82 -37
  247. package/tests/test_models.py +48 -4
  248. package/tests/test_multi_agent.py +31 -29
  249. package/tests/test_notifications.py +122 -32
  250. package/tests/test_onboard.py +63 -75
  251. package/tests/test_operator_link.py +78 -0
  252. package/tests/test_peers.py +14 -14
  253. package/tests/test_pillars.py +98 -0
  254. package/tests/test_preflight.py +3 -3
  255. package/tests/test_runtime.py +21 -0
  256. package/tests/test_scheduled_tasks.py +11 -6
  257. package/tests/test_scheduler_cli.py +47 -0
  258. package/tests/test_scheduler_features.py +133 -0
  259. package/tests/test_scheduler_integration.py +87 -0
  260. package/tests/test_scheduler_jobs.py +155 -0
  261. package/tests/test_scheduler_runner.py +64 -0
  262. package/tests/test_scheduler_state.py +57 -0
  263. package/tests/test_sdk.py +70 -0
  264. package/tests/test_service_health_incidents.py +34 -0
  265. package/tests/test_service_registry.py +52 -0
  266. package/tests/test_session_briefing.py +130 -0
  267. package/tests/test_snapshots.py +4 -4
  268. package/tests/test_sync_pipeline.py +26 -26
  269. package/tests/test_team_comms.py +2 -2
  270. package/tests/test_testrunner.py +2 -2
  271. package/tests/test_trust_graph.py +18 -0
  272. package/tests/test_unified_search.py +2 -2
  273. package/tests/test_version_check.py +10 -0
  274. package/tests/test_version_cmd.py +8 -8
  275. package/tests/test_whoami.py +1 -1
  276. package/systemd/skcomm-heartbeat.service +0 -18
  277. package/systemd/skcomm-queue-drain.service +0 -17
  278. /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/package.json +0 -0
  279. /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/openclaw.plugin.json +0 -0
@@ -0,0 +1,456 @@
1
+ """skscheduler — JobSpec dataclass, YAML loader, node-affinity resolution,
2
+ due-check (cron + interval), and host-alias discovery.
3
+
4
+ This module is the foundation of the unified fleet job scheduler. It is
5
+ intentionally free of I/O side-effects beyond reading config files and the
6
+ environment; all scheduling state lives elsewhere.
7
+
8
+ Typical usage::
9
+
10
+ from pathlib import Path
11
+ from skcapstone.scheduler_jobs import load_jobs, job_runs_here, is_due, current_host_aliases
12
+
13
+ jobs = load_jobs(Path("~/.skcapstone/config/jobs.yaml").expanduser())
14
+ aliases = current_host_aliases()
15
+ for job in jobs:
16
+ if job.enabled and job_runs_here(job, aliases) and is_due(job, last_run):
17
+ dispatch(job)
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import os
23
+ import re
24
+ import socket
25
+ import warnings
26
+ from dataclasses import dataclass
27
+ from datetime import datetime, timezone
28
+ from pathlib import Path
29
+ from typing import Optional, Union
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Internal helpers
34
+ # ---------------------------------------------------------------------------
35
+
36
+ _DURATION_RE = re.compile(r"^\s*(\d+(?:\.\d+)?)\s*([smhd]?)\s*$")
37
+ _UNIT_SECONDS: dict[str, float] = {"": 1, "s": 1, "m": 60, "h": 3600, "d": 86400}
38
+
39
+
40
+ def _parse_duration(value: Union[str, int, float]) -> float:
41
+ """Convert a human-readable duration string or plain number to seconds.
42
+
43
+ Args:
44
+ value: A string like ``"300s"``, ``"5m"``, ``"1h"``, ``"1d"``, or a
45
+ plain numeric value (int or float treated as seconds already).
46
+
47
+ Returns:
48
+ Duration in seconds as a float.
49
+
50
+ Raises:
51
+ ValueError: If the string is unparseable, contains a negative value,
52
+ or has an unrecognised suffix.
53
+
54
+ Examples:
55
+ >>> _parse_duration("300s")
56
+ 300.0
57
+ >>> _parse_duration("5m")
58
+ 300.0
59
+ >>> _parse_duration(600)
60
+ 600.0
61
+ """
62
+ if isinstance(value, (int, float)):
63
+ if value < 0:
64
+ raise ValueError(f"duration must be non-negative, got {value!r}")
65
+ return float(value)
66
+ m = _DURATION_RE.match(str(value))
67
+ if not m:
68
+ raise ValueError(f"invalid duration: {value!r}")
69
+ return float(m.group(1)) * _UNIT_SECONDS[m.group(2)]
70
+
71
+
72
+ # ---------------------------------------------------------------------------
73
+ # Group A — JobSpec dataclass + load_jobs
74
+ # ---------------------------------------------------------------------------
75
+
76
+ @dataclass
77
+ class JobSpec:
78
+ """Describes a single scheduled job as loaded from ``jobs.yaml``.
79
+
80
+ Attributes:
81
+ name: Unique job identifier (the YAML key).
82
+ type: Job type — ``"python"``, ``"shell"``, or ``"agent"``.
83
+ schedule: Cron expression (mutually exclusive with ``every_seconds``).
84
+ every_seconds: Interval in seconds (mutually exclusive with ``schedule``).
85
+ nodes: Node-affinity list of host aliases, or the string ``"all"``.
86
+ agent: Agent name for ``type="agent"`` jobs.
87
+ prompt: Prompt text for ``type="agent"`` jobs.
88
+ command: Shell command for ``type="shell"`` jobs.
89
+ callback: Dotted ``module:function`` path for ``type="python"`` jobs.
90
+ timeout: Hard-kill timeout in seconds.
91
+ enabled: Whether the job is active.
92
+ """
93
+
94
+ name: str
95
+ type: str = "python"
96
+ schedule: Optional[str] = None
97
+ every_seconds: Optional[float] = None
98
+ nodes: Union[str, list[str]] = "all"
99
+ agent: Optional[str] = None
100
+ prompt: Optional[str] = None
101
+ command: Optional[str] = None
102
+ callback: Optional[str] = None
103
+ timeout: float = 900.0
104
+ enabled: bool = True
105
+ # --- reliability / fleet / observability (added 2026-06-09) ---
106
+ retries: int = 0 # extra attempts on failure (0 = run once)
107
+ retry_backoff: float = 0.0 # seconds between attempts (linear)
108
+ jitter: float = 0.0 # max random splay (s) before dispatch — avoids
109
+ # fleet thundering-herd on shared cron slots
110
+ notify: str = "off" # off | on_failure | on_success | always (sk-alert hook)
111
+ notify_level: str = "warn" # sk-alert level for failure notifications
112
+
113
+
114
+ def load_jobs(config_path: Path) -> list[JobSpec]:
115
+ """Load job definitions from a ``jobs.yaml`` config file.
116
+
117
+ The YAML file must have a top-level ``jobs`` mapping. Each key becomes
118
+ the ``name`` of the resulting :class:`JobSpec`. The ``every`` field is
119
+ parsed via :func:`_parse_duration` and stored as ``every_seconds``; the
120
+ raw ``every`` key is consumed and not passed to the dataclass.
121
+
122
+ Args:
123
+ config_path: Path to the ``jobs.yaml`` file. If the file does not
124
+ exist, an empty list is returned without raising.
125
+
126
+ Returns:
127
+ A list of :class:`JobSpec` instances in definition order.
128
+
129
+ Example::
130
+
131
+ jobs = load_jobs(Path("~/.skcapstone/config/jobs.yaml").expanduser())
132
+ """
133
+ if not config_path.exists():
134
+ return []
135
+
136
+ import yaml # lazy import — pyyaml optional at module level
137
+
138
+ with config_path.open(encoding="utf-8") as fh:
139
+ data = yaml.safe_load(fh)
140
+
141
+ jobs_raw: dict = (data or {}).get("jobs") or {}
142
+ result: list[JobSpec] = []
143
+
144
+ _KNOWN_KEYS = {
145
+ "type", "schedule", "every", "nodes", "agent", "prompt",
146
+ "command", "callback", "timeout", "enabled",
147
+ "retries", "retry_backoff", "jitter", "notify", "notify_level",
148
+ }
149
+
150
+ for name, raw in jobs_raw.items():
151
+ raw = dict(raw or {})
152
+
153
+ # Warn on unrecognised keys before consuming 'every'
154
+ unknown = set(raw.keys()) - _KNOWN_KEYS
155
+ if unknown:
156
+ warnings.warn(
157
+ f"Job {name!r} has unrecognised key(s): {sorted(unknown)}. "
158
+ "Typo in config? Job may not behave as expected.",
159
+ UserWarning,
160
+ stacklevel=2,
161
+ )
162
+
163
+ # Convert 'every' → 'every_seconds'
164
+ every_raw = raw.pop("every", None)
165
+ every_seconds: Optional[float] = None
166
+ if every_raw is not None:
167
+ every_seconds = _parse_duration(every_raw)
168
+
169
+ result.append(
170
+ JobSpec(
171
+ name=name,
172
+ type=raw.get("type", "python"),
173
+ schedule=raw.get("schedule"),
174
+ every_seconds=every_seconds,
175
+ nodes=raw.get("nodes", "all"),
176
+ agent=raw.get("agent"),
177
+ prompt=raw.get("prompt"),
178
+ command=raw.get("command"),
179
+ callback=raw.get("callback"),
180
+ timeout=float(raw.get("timeout", 900.0)),
181
+ enabled=bool(raw.get("enabled", True)),
182
+ retries=int(raw.get("retries", 0)),
183
+ retry_backoff=float(raw.get("retry_backoff", 0.0)),
184
+ jitter=float(raw.get("jitter", 0.0)),
185
+ notify=str(raw.get("notify", "off")),
186
+ notify_level=str(raw.get("notify_level", "warn")),
187
+ )
188
+ )
189
+
190
+ return result
191
+
192
+
193
+ # ---------------------------------------------------------------------------
194
+ # Group A2 — jobs.d/ drop-in registration (added 2026-06-09)
195
+ # ---------------------------------------------------------------------------
196
+
197
+ def load_jobs_with_dropins(config_path: Path) -> list[JobSpec]:
198
+ """Load jobs from ``jobs.yaml`` plus every ``jobs.d/*.yaml`` drop-in.
199
+
200
+ This is the conf.d-style merge that lets external sk* services
201
+ self-register scheduled work without editing the shared ``jobs.yaml``.
202
+ The base file is loaded first, then each ``jobs.d/<name>.yaml`` (sorted
203
+ by filename) is overlaid. When two sources define the same job *name*,
204
+ the later (drop-in) definition wins and a :class:`UserWarning` is emitted.
205
+
206
+ The drop-in directory is resolved as ``config_path.parent / "jobs.d"``.
207
+
208
+ Args:
209
+ config_path: Path to the base ``jobs.yaml``. Neither the base file
210
+ nor the drop-in directory need exist; missing sources are
211
+ silently skipped (an empty list is returned when nothing exists).
212
+
213
+ Returns:
214
+ Merged list of :class:`JobSpec` instances, base jobs first followed
215
+ by drop-in-only jobs, in deterministic order.
216
+
217
+ Example::
218
+
219
+ jobs = load_jobs_with_dropins(
220
+ Path("~/.skcapstone/config/jobs.yaml").expanduser()
221
+ )
222
+ """
223
+ merged: dict[str, JobSpec] = {}
224
+
225
+ for spec in load_jobs(config_path):
226
+ merged[spec.name] = spec
227
+
228
+ dropin_dir = config_path.parent / "jobs.d"
229
+ if dropin_dir.is_dir():
230
+ for fragment in sorted(dropin_dir.glob("*.yaml")):
231
+ for spec in load_jobs(fragment):
232
+ if spec.name in merged:
233
+ warnings.warn(
234
+ f"Job {spec.name!r} in drop-in {fragment.name!r} "
235
+ f"overrides an earlier definition.",
236
+ UserWarning,
237
+ stacklevel=2,
238
+ )
239
+ merged[spec.name] = spec
240
+
241
+ return list(merged.values())
242
+
243
+
244
+ def _dropin_dir(home: Optional[Path] = None) -> Path:
245
+ """Return the ``config/jobs.d`` drop-in directory under *home*.
246
+
247
+ When *home* is not given, the skcapstone shared root is used — which
248
+ honours the ``SKCAPSTONE_HOME`` environment variable — so drop-ins land in
249
+ the same tree the scheduler reads from (and tests stay sandboxed).
250
+ """
251
+ if home is not None:
252
+ base = Path(home)
253
+ else:
254
+ try:
255
+ from . import shared_home
256
+
257
+ base = shared_home()
258
+ except Exception:
259
+ base = Path("~/.skcapstone").expanduser()
260
+ return base / "config" / "jobs.d"
261
+
262
+
263
+ def register_job(spec: dict, home: Optional[Path] = None) -> Path:
264
+ """Register a scheduled job by writing a ``jobs.d/<name>.yaml`` fragment.
265
+
266
+ This is the programmatic counterpart to hand-editing ``jobs.yaml`` — it
267
+ lets a service own its own scheduler entry. The fragment is written
268
+ atomically; calling again with the same ``name`` overwrites it (idempotent
269
+ re-registration on every service start is the intended pattern).
270
+
271
+ Args:
272
+ spec: A single job definition. Must contain ``name`` and exactly one
273
+ of ``schedule`` or ``every``. Remaining keys mirror the
274
+ ``jobs.yaml`` schema (``type``, ``command``/``callback``/``agent``,
275
+ ``nodes``, ``timeout``, ``retries``, ``notify`` …).
276
+ home: skcapstone root (defaults to ``~/.skcapstone``).
277
+
278
+ Returns:
279
+ Path to the written ``jobs.d/<name>.yaml`` fragment.
280
+
281
+ Raises:
282
+ ValueError: If ``name`` is missing or neither ``schedule`` nor
283
+ ``every`` is present.
284
+ """
285
+ import yaml # lazy — pyyaml optional at module level
286
+
287
+ spec = dict(spec)
288
+ name = spec.pop("name", None)
289
+ if not name:
290
+ raise ValueError("register_job: spec must include a 'name'")
291
+ if "schedule" not in spec and "every" not in spec:
292
+ raise ValueError(
293
+ f"register_job: job {name!r} must define 'schedule' or 'every'"
294
+ )
295
+
296
+ dropin = _dropin_dir(home)
297
+ dropin.mkdir(parents=True, exist_ok=True)
298
+
299
+ final = dropin / f"{name}.yaml"
300
+ tmp = dropin / f".{name}.yaml.tmp"
301
+ tmp.write_text(
302
+ yaml.safe_dump({"jobs": {name: spec}}, sort_keys=False),
303
+ encoding="utf-8",
304
+ )
305
+ tmp.rename(final)
306
+ return final
307
+
308
+
309
+ def unregister_job(name: str, home: Optional[Path] = None) -> bool:
310
+ """Remove a previously registered ``jobs.d/<name>.yaml`` fragment.
311
+
312
+ Args:
313
+ name: The job name used at registration.
314
+ home: skcapstone root (defaults to ``~/.skcapstone``).
315
+
316
+ Returns:
317
+ ``True`` if a fragment existed and was removed, ``False`` otherwise.
318
+ """
319
+ fragment = _dropin_dir(home) / f"{name}.yaml"
320
+ if fragment.exists():
321
+ fragment.unlink()
322
+ return True
323
+ return False
324
+
325
+
326
+ # ---------------------------------------------------------------------------
327
+ # Group B — node affinity
328
+ # ---------------------------------------------------------------------------
329
+
330
+ def job_runs_here(job: JobSpec, host_aliases: set[str]) -> bool:
331
+ """Return ``True`` if *job* should fire on the current node.
332
+
333
+ Args:
334
+ job: The :class:`JobSpec` to evaluate.
335
+ host_aliases: The set of aliases that identify the current host
336
+ (see :func:`current_host_aliases`).
337
+
338
+ Returns:
339
+ ``True`` when ``job.nodes == "all"`` or when any alias in
340
+ ``job.nodes`` is present in *host_aliases*.
341
+
342
+ Example::
343
+
344
+ aliases = current_host_aliases()
345
+ if job_runs_here(job, aliases):
346
+ dispatch(job)
347
+ """
348
+ if job.nodes == "all":
349
+ return True
350
+ node_list: list[str] = job.nodes if isinstance(job.nodes, list) else [job.nodes]
351
+ return bool(set(node_list) & host_aliases)
352
+
353
+
354
+ # ---------------------------------------------------------------------------
355
+ # Group C — due-check cron + interval with misfire catch-up
356
+ # ---------------------------------------------------------------------------
357
+
358
+ def is_due(
359
+ job: JobSpec,
360
+ last_run: Optional[datetime],
361
+ now: Optional[datetime] = None,
362
+ ) -> bool:
363
+ """Return ``True`` if *job* is due to run relative to *last_run*.
364
+
365
+ Interval jobs (``every_seconds`` set):
366
+ - Never run before → due immediately.
367
+ - Otherwise due when ``now - last_run >= every_seconds``.
368
+
369
+ Cron jobs (``schedule`` set):
370
+ - Never run before → due immediately (catches up on first start).
371
+ - Otherwise due when ``last_run`` is *before* the most recent cron
372
+ slot that has already elapsed (misfire/catch-up: at most one fire
373
+ per missed interval, not one per missed slot).
374
+
375
+ Jobs with neither field → never due (returns ``False``).
376
+
377
+ Args:
378
+ job: The :class:`JobSpec` to evaluate.
379
+ last_run: UTC-aware datetime of the last successful run, or ``None``
380
+ if the job has never run.
381
+ now: Reference "current" time (UTC-aware); defaults to
382
+ ``datetime.now(timezone.utc)``.
383
+
384
+ Returns:
385
+ ``True`` if the job should be dispatched now.
386
+
387
+ Example::
388
+
389
+ if is_due(job, state.last_run):
390
+ dispatch(job)
391
+ """
392
+ if now is None:
393
+ now = datetime.now(timezone.utc)
394
+
395
+ # Ensure *now* is tz-aware (default UTC if naive)
396
+ if now.tzinfo is None:
397
+ now = now.replace(tzinfo=timezone.utc)
398
+
399
+ # --- Interval ---
400
+ if job.every_seconds is not None:
401
+ if last_run is None:
402
+ return True
403
+ lr = last_run if last_run.tzinfo else last_run.replace(tzinfo=timezone.utc)
404
+ elapsed = (now - lr).total_seconds()
405
+ return elapsed >= job.every_seconds
406
+
407
+ # --- Cron ---
408
+ if job.schedule is not None:
409
+ if last_run is None:
410
+ return True
411
+
412
+ from croniter import croniter # lazy import
413
+
414
+ # croniter.get_prev returns the most recent past slot <= now
415
+ cron = croniter(job.schedule, now)
416
+ prev_slot: datetime = cron.get_prev(datetime)
417
+
418
+ # Ensure prev_slot is tz-aware
419
+ if prev_slot.tzinfo is None:
420
+ prev_slot = prev_slot.replace(tzinfo=timezone.utc)
421
+
422
+ lr = last_run if last_run.tzinfo else last_run.replace(tzinfo=timezone.utc)
423
+ return lr < prev_slot
424
+
425
+ # No schedule defined → never due
426
+ return False
427
+
428
+
429
+ # ---------------------------------------------------------------------------
430
+ # Group D — host alias discovery
431
+ # ---------------------------------------------------------------------------
432
+
433
+ def current_host_aliases() -> set[str]:
434
+ """Return the set of aliases that identify the current host.
435
+
436
+ Combines:
437
+ - ``socket.gethostname()`` — the OS hostname.
438
+ - Comma-separated values from the ``SK_NODE_ALIAS`` environment variable
439
+ (stripped, non-empty tokens only).
440
+
441
+ Returns:
442
+ A :class:`set` of strings usable for node-affinity matching.
443
+
444
+ Example::
445
+
446
+ # With SK_NODE_ALIAS=".41" set in the environment:
447
+ aliases = current_host_aliases()
448
+ # e.g. {'my-host', '.41'} — hostname + SK_NODE_ALIAS token
449
+ """
450
+ aliases: set[str] = {socket.gethostname()}
451
+ env_alias = os.environ.get("SK_NODE_ALIAS", "")
452
+ for token in env_alias.split(","):
453
+ stripped = token.strip()
454
+ if stripped:
455
+ aliases.add(stripped)
456
+ return aliases
@@ -0,0 +1,239 @@
1
+ """Executes JobSpecs by type (python | shell | agent) with overlap locking.
2
+
3
+ This module is the execution layer for the unified fleet job scheduler. It
4
+ is intentionally free of scheduling logic — callers decide *when* to run a
5
+ job; this module handles the *how*.
6
+
7
+ Typical usage::
8
+
9
+ from pathlib import Path
10
+ from skcapstone.scheduler_jobs import JobSpec
11
+ from skcapstone.scheduler_runner import JobRunner
12
+
13
+ runner = JobRunner(log_dir=Path("~/.skcapstone/logs").expanduser())
14
+ with runner.lock(job) as acquired:
15
+ if acquired:
16
+ result = runner.run(job)
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import contextlib
21
+ import importlib
22
+ import logging
23
+ import os
24
+ import shlex
25
+ import subprocess
26
+ from dataclasses import dataclass
27
+ from datetime import datetime, timezone
28
+ from pathlib import Path
29
+ from typing import Generator
30
+
31
+ from .scheduler_jobs import JobSpec
32
+
33
+ logger = logging.getLogger("skcapstone.scheduler_runner")
34
+
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Public result type
38
+ # ---------------------------------------------------------------------------
39
+
40
+
41
+ @dataclass
42
+ class JobResult:
43
+ """Captures the outcome of a single job execution.
44
+
45
+ Attributes:
46
+ ok: ``True`` when the job completed successfully (exit code 0 for
47
+ subprocesses, no exception for python callbacks).
48
+ exit_code: Process exit code for subprocess-based jobs. ``0`` for
49
+ successful python jobs; ``-1`` for timeouts or OS errors.
50
+ output: Combined stdout + stderr captured from subprocess jobs.
51
+ Empty for python-callback jobs.
52
+ error: Human-readable error message on failure. Empty string on
53
+ success.
54
+ """
55
+
56
+ ok: bool
57
+ exit_code: int = 0
58
+ output: str = ""
59
+ error: str = ""
60
+
61
+
62
+ # ---------------------------------------------------------------------------
63
+ # Runner
64
+ # ---------------------------------------------------------------------------
65
+
66
+
67
+ class JobRunner:
68
+ """Executes :class:`~skcapstone.scheduler_jobs.JobSpec` instances.
69
+
70
+ Each runner owns a ``log_dir`` directory where per-run log files and
71
+ per-job lock files are written.
72
+
73
+ Args:
74
+ log_dir: Directory for run logs and overlap-lock files. Created
75
+ automatically if it does not exist.
76
+ """
77
+
78
+ def __init__(self, log_dir: Path) -> None:
79
+ """Initialise the runner with a log directory.
80
+
81
+ Args:
82
+ log_dir: Writable directory for logs and lock files. Will be
83
+ created (with parents) on first use.
84
+ """
85
+ self.log_dir = Path(log_dir)
86
+
87
+ # ------------------------------------------------------------------
88
+ # Overlap lock
89
+ # ------------------------------------------------------------------
90
+
91
+ @contextlib.contextmanager
92
+ def lock(self, job: JobSpec) -> Generator[bool, None, None]:
93
+ """Acquire an exclusive per-job overlap lock.
94
+
95
+ Uses an ``O_CREAT | O_EXCL`` open on a ``<job.name>.lock`` file as
96
+ an atomic test-and-set. The lock is always released when the
97
+ context exits, even if the body raises.
98
+
99
+ Args:
100
+ job: The job whose lock should be acquired.
101
+
102
+ Yields:
103
+ ``True`` if the lock was acquired; ``False`` if another instance
104
+ already holds it (the caller should skip this run).
105
+
106
+ Example::
107
+
108
+ with runner.lock(job) as acquired:
109
+ if acquired:
110
+ result = runner.run(job)
111
+ else:
112
+ logger.info("job %s already running, skipping", job.name)
113
+ """
114
+ self.log_dir.mkdir(parents=True, exist_ok=True)
115
+ lock_path = self.log_dir / f"{job.name}.lock"
116
+ try:
117
+ fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_WRONLY)
118
+ except FileExistsError:
119
+ yield False
120
+ return
121
+ try:
122
+ os.write(fd, str(os.getpid()).encode())
123
+ os.close(fd)
124
+ yield True
125
+ finally:
126
+ # NOTE: if the process is SIGKILL'd or the host crashes, this unlink
127
+ # never runs and the lockfile blocks the job until removed. The PID
128
+ # written above is the hook for a future staleness check (compare to
129
+ # /proc/<pid> and unlink if the process is gone); v1 relies on
130
+ # operators clearing stale locks on restart.
131
+ with contextlib.suppress(OSError):
132
+ lock_path.unlink()
133
+
134
+ # ------------------------------------------------------------------
135
+ # Dispatch
136
+ # ------------------------------------------------------------------
137
+
138
+ def run(self, job: JobSpec) -> JobResult:
139
+ """Execute a job and return a :class:`JobResult`.
140
+
141
+ Dispatches to the appropriate backend based on ``job.type``:
142
+
143
+ - ``"python"`` — imports ``module`` and calls ``fn()`` from
144
+ ``job.callback`` (format: ``"module.path:function_name"``).
145
+ - ``"shell"`` — runs ``job.command`` via :mod:`subprocess` after
146
+ splitting with :func:`shlex.split`.
147
+ - ``"agent"`` — runs ``claude -p "<prompt>"`` optionally with
148
+ ``--agent <name>``.
149
+
150
+ Jobs *never* raise — all failures are returned as a
151
+ :class:`JobResult` with ``ok=False``.
152
+
153
+ Args:
154
+ job: The job specification to execute.
155
+
156
+ Returns:
157
+ A :class:`JobResult` describing the outcome.
158
+ """
159
+ if job.type == "python":
160
+ return self._run_python(job)
161
+ if job.type == "shell":
162
+ return self._run_subprocess(job, shlex.split(job.command or ""))
163
+ if job.type == "agent":
164
+ cmd = ["claude", "-p", job.prompt or ""]
165
+ if job.agent:
166
+ cmd += ["--agent", job.agent]
167
+ return self._run_subprocess(job, cmd)
168
+ return JobResult(ok=False, error=f"unknown job type: {job.type!r}")
169
+
170
+ # ------------------------------------------------------------------
171
+ # Private backends
172
+ # ------------------------------------------------------------------
173
+
174
+ def _run_python(self, job: JobSpec) -> JobResult:
175
+ """Import and call a ``module:function`` callback.
176
+
177
+ Args:
178
+ job: A python-type :class:`~skcapstone.scheduler_jobs.JobSpec`
179
+ whose ``callback`` field is ``"module.path:fn_name"``.
180
+
181
+ Returns:
182
+ :class:`JobResult` with ``ok=True`` on success, or ``ok=False``
183
+ with ``error`` set to the exception message on any failure.
184
+ """
185
+ try:
186
+ mod_name, _, fn_name = (job.callback or "").partition(":")
187
+ if not mod_name or not fn_name:
188
+ return JobResult(
189
+ ok=False,
190
+ error=f"invalid callback {job.callback!r} — expected 'module:fn'",
191
+ )
192
+ module = importlib.import_module(mod_name)
193
+ fn = getattr(module, fn_name)
194
+ fn()
195
+ return JobResult(ok=True)
196
+ except Exception as exc: # noqa: BLE001 — jobs must never crash the scheduler loop
197
+ logger.error("python job %r failed: %s", job.name, exc, exc_info=True)
198
+ return JobResult(ok=False, error=str(exc))
199
+
200
+ def _run_subprocess(self, job: JobSpec, cmd: list[str]) -> JobResult:
201
+ """Run *cmd* as a subprocess, capturing output to a timestamped log.
202
+
203
+ Args:
204
+ job: The originating :class:`~skcapstone.scheduler_jobs.JobSpec`
205
+ (used for log file naming and timeout).
206
+ cmd: Argument list passed directly to :class:`subprocess.run`.
207
+
208
+ Returns:
209
+ :class:`JobResult` with:
210
+
211
+ - ``ok=True`` and ``exit_code=0`` on success.
212
+ - ``ok=False`` and ``exit_code=<n>`` on nonzero exit.
213
+ - ``ok=False`` and ``exit_code=-1`` on timeout or OS error.
214
+ """
215
+ self.log_dir.mkdir(parents=True, exist_ok=True)
216
+ ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
217
+ log_path = self.log_dir / f"{job.name}-{ts}.log"
218
+ try:
219
+ proc = subprocess.run(
220
+ cmd,
221
+ capture_output=True,
222
+ text=True,
223
+ timeout=job.timeout,
224
+ )
225
+ out = (proc.stdout or "") + (proc.stderr or "")
226
+ log_path.write_text(out, encoding="utf-8")
227
+ ok = proc.returncode == 0
228
+ return JobResult(
229
+ ok=ok,
230
+ exit_code=proc.returncode,
231
+ output=out,
232
+ error="" if ok else out[-500:],
233
+ )
234
+ except subprocess.TimeoutExpired:
235
+ logger.error("job %r timed out after %ss", job.name, job.timeout)
236
+ return JobResult(ok=False, exit_code=-1, error=f"timeout after {job.timeout}s")
237
+ except (OSError, ValueError) as exc:
238
+ logger.error("job %r subprocess error: %s", job.name, exc)
239
+ return JobResult(ok=False, exit_code=-1, error=str(exc))