@smilintux/skcapstone 0.10.0 → 0.12.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +10 -4
- package/.github/workflows/ci.yml +2 -2
- package/.github/workflows/publish.yml +9 -2
- package/.openclaw-workspace.json +2 -2
- package/CLAUDE.md +37 -0
- package/MISSION.md +17 -2
- package/README.md +282 -3
- package/docker/Dockerfile +7 -7
- package/docker/compose-templates/dev-team.yml +12 -12
- package/docker/compose-templates/mini-team.yml +9 -9
- package/docker/compose-templates/ops-team.yml +10 -10
- package/docker/compose-templates/research-team.yml +10 -10
- package/docker/entrypoint.sh +4 -4
- package/docs/ADR-optional-integration-backbone.md +181 -0
- package/docs/ARCHITECTURE.md +186 -43
- package/docs/BOND_WITH_GROK.md +6 -6
- package/docs/CUSTOM_AGENT.md +123 -30
- package/docs/DREAMING.md +70 -0
- package/docs/GETTING_STARTED.md +7 -7
- package/docs/QUICKSTART.md +10 -6
- package/docs/SKJOULE_ARCHITECTURE.md +3 -3
- package/docs/SOUL_SWAPPER.md +5 -5
- package/docs/hammertime-audit.md +402 -0
- package/docs/sk-integration-HANDOFF.md +117 -0
- package/docs/skscheduler.md +155 -0
- package/docs/superpowers/examples/jobs.yaml +31 -0
- package/docs/superpowers/plans/2026-06-08-skscheduler.md +1265 -0
- package/docs/superpowers/specs/2026-06-08-skscheduler-design.md +186 -0
- package/examples/custom-bond-template.json +1 -1
- package/examples/grok-feb.json +1 -1
- package/examples/queen-ava-feb.json +1 -1
- package/launchd/{com.skcapstone.skcomm-heartbeat.plist → com.skcapstone.skcomms-heartbeat.plist} +4 -4
- package/launchd/{com.skcapstone.skcomm-queue-drain.plist → com.skcapstone.skcomms-queue-drain.plist} +4 -4
- package/launchd/install-launchd.sh +6 -6
- package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/index.ts +3 -2
- package/package.json +1 -1
- package/pyproject.toml +16 -10
- package/scripts/archive-sessions.sh +7 -0
- package/scripts/check-updates.py +4 -4
- package/scripts/install-bundle.sh +8 -8
- package/scripts/install.ps1 +12 -11
- package/scripts/install.sh +159 -5
- package/scripts/model-fallback-monitor.sh +102 -0
- package/scripts/nvidia-proxy.mjs +78 -26
- package/scripts/refresh-anthropic-token.sh +172 -0
- package/scripts/release.sh +98 -0
- package/scripts/session-to-memory.py +219 -0
- package/scripts/skgateway.mjs +3 -3
- package/scripts/telegram-catchup-all.sh +12 -1
- package/scripts/verify_install.sh +2 -2
- package/scripts/wargov-ufo-capture/README.md +43 -0
- package/scripts/wargov-ufo-capture/cdp_capture_release2.py +273 -0
- package/scripts/wargov-ufo-capture/cdp_capture_splc_doj.py +246 -0
- package/scripts/wargov-ufo-capture/cdp_finish.py +271 -0
- package/scripts/wargov-ufo-capture/cdp_probe.py +188 -0
- package/scripts/wargov-ufo-capture/cdp_splc_pressrelease.py +101 -0
- package/scripts/wargov-ufo-capture/parse_csv.py +95 -0
- package/scripts/wargov-ufo-capture/pull_dvids.sh +107 -0
- package/scripts/watch-anthropic-token.sh +212 -0
- package/scripts/windows/install-tasks.ps1 +7 -7
- package/scripts/windows/skcapstone-task.xml +1 -1
- package/src/skcapstone/__init__.py +45 -3
- package/src/skcapstone/_cli_monolith.py +20 -15
- package/src/skcapstone/activity.py +5 -1
- package/src/skcapstone/agent_card.py +3 -2
- package/src/skcapstone/api.py +41 -40
- package/src/skcapstone/auction.py +14 -11
- package/src/skcapstone/backup.py +2 -1
- package/src/skcapstone/blueprint_registry.py +4 -3
- package/src/skcapstone/brain_first.py +238 -0
- package/src/skcapstone/changelog.py +1 -1
- package/src/skcapstone/chat.py +22 -17
- package/src/skcapstone/cli/__init__.py +9 -1
- package/src/skcapstone/cli/_common.py +1 -0
- package/src/skcapstone/cli/agents_spawner.py +5 -2
- package/src/skcapstone/cli/alerts.py +25 -4
- package/src/skcapstone/cli/bench.py +15 -15
- package/src/skcapstone/cli/chat.py +7 -4
- package/src/skcapstone/cli/consciousness.py +5 -2
- package/src/skcapstone/cli/context_cmd.py +18 -4
- package/src/skcapstone/cli/daemon.py +11 -7
- package/src/skcapstone/cli/gtd.py +26 -1
- package/src/skcapstone/cli/housekeeping.py +3 -3
- package/src/skcapstone/cli/identity_cmd.py +378 -0
- package/src/skcapstone/cli/joule_cmd.py +7 -3
- package/src/skcapstone/cli/memory.py +8 -6
- package/src/skcapstone/cli/peers_dir.py +1 -1
- package/src/skcapstone/cli/register_cmd.py +29 -3
- package/src/skcapstone/cli/scheduler_cmd.py +167 -0
- package/src/skcapstone/cli/session.py +25 -0
- package/src/skcapstone/cli/setup.py +96 -29
- package/src/skcapstone/cli/shell_cmd.py +53 -1
- package/src/skcapstone/cli/skills_cmd.py +2 -2
- package/src/skcapstone/cli/soul.py +8 -5
- package/src/skcapstone/cli/status.py +37 -11
- package/src/skcapstone/cli/telegram.py +21 -0
- package/src/skcapstone/cli/test_cmd.py +5 -5
- package/src/skcapstone/cli/test_connection.py +2 -2
- package/src/skcapstone/cli/upgrade_cmd.py +23 -14
- package/src/skcapstone/cli/version_cmd.py +1 -1
- package/src/skcapstone/cli/watch_cmd.py +9 -6
- package/src/skcapstone/cloud9_bridge.py +14 -14
- package/src/skcapstone/codex_setup.py +255 -0
- package/src/skcapstone/config_validator.py +7 -4
- package/src/skcapstone/consciousness_config.py +5 -1
- package/src/skcapstone/consciousness_loop.py +313 -273
- package/src/skcapstone/context_loader.py +121 -0
- package/src/skcapstone/coord_federation.py +2 -1
- package/src/skcapstone/coordination.py +23 -6
- package/src/skcapstone/crush_integration.py +2 -1
- package/src/skcapstone/daemon.py +132 -77
- package/src/skcapstone/dashboard.py +10 -10
- package/src/skcapstone/data/sk-agent-picker.sh +421 -0
- package/src/skcapstone/data/systemd/skcapstone-api.socket +9 -0
- package/src/skcapstone/data/systemd/skcapstone-memory-compress.service +18 -0
- package/src/skcapstone/data/systemd/skcapstone-memory-compress.timer +11 -0
- package/src/skcapstone/data/systemd/skcapstone.service +37 -0
- package/src/skcapstone/data/systemd/skcapstone@.service +50 -0
- package/src/skcapstone/data/systemd/skcomms-heartbeat.service +18 -0
- package/{systemd/skcomm-heartbeat.timer → src/skcapstone/data/systemd/skcomms-heartbeat.timer} +2 -2
- package/src/skcapstone/data/systemd/skcomms-queue-drain.service +17 -0
- package/{systemd/skcomm-queue-drain.timer → src/skcapstone/data/systemd/skcomms-queue-drain.timer} +2 -2
- package/src/skcapstone/defaults/claude/CLAUDE.md +67 -0
- package/src/skcapstone/defaults/claude/settings.json +74 -0
- package/src/skcapstone/defaults/lumina/config/claude-hooks.md +57 -0
- package/src/skcapstone/defaults/lumina/config/skgraph.yaml +55 -10
- package/src/skcapstone/defaults/lumina/config/skmemory.yaml +79 -13
- package/src/skcapstone/defaults/lumina/config/skvector.yaml +60 -9
- package/src/skcapstone/defaults/lumina/memory/long-term/18b9c0d1e2f3-cloud9-protocol.json +2 -2
- package/src/skcapstone/defaults/lumina/memory/long-term/a1b2c3d4e5f6-ecosystem-overview.json +2 -2
- package/src/skcapstone/defaults/lumina/memory/long-term/b2c3d4e5f6a7-five-pillars.json +9 -9
- package/src/skcapstone/defaults/lumina/memory/long-term/d4e5f6a7b8c9-site-directory.json +2 -2
- package/src/skcapstone/defaults/unhinged.json +13 -0
- package/src/skcapstone/discovery.py +43 -20
- package/src/skcapstone/doctor.py +941 -22
- package/src/skcapstone/dreaming.py +1183 -109
- package/src/skcapstone/emotion_tracker.py +2 -2
- package/src/skcapstone/export.py +4 -3
- package/src/skcapstone/fuse_mount.py +14 -12
- package/src/skcapstone/gui_installer.py +2 -2
- package/src/skcapstone/heartbeat.py +1 -1
- package/src/skcapstone/housekeeping.py +14 -14
- package/src/skcapstone/install_wizard.py +209 -7
- package/src/skcapstone/itil.py +13 -4
- package/src/skcapstone/kms_scheduler.py +10 -8
- package/src/skcapstone/launchd.py +19 -19
- package/src/skcapstone/mcp_launcher.py +15 -1
- package/src/skcapstone/mcp_server.py +83 -49
- package/src/skcapstone/mcp_tools/__init__.py +2 -0
- package/src/skcapstone/mcp_tools/_helpers.py +2 -2
- package/src/skcapstone/mcp_tools/ansible_tools.py +7 -4
- package/src/skcapstone/mcp_tools/brain_first_tools.py +90 -0
- package/src/skcapstone/mcp_tools/capauth_tools.py +7 -4
- package/src/skcapstone/mcp_tools/comm_tools.py +10 -10
- package/src/skcapstone/mcp_tools/coord_tools.py +8 -4
- package/src/skcapstone/mcp_tools/did_tools.py +11 -8
- package/src/skcapstone/mcp_tools/gtd_tools.py +4 -4
- package/src/skcapstone/mcp_tools/memory_tools.py +6 -2
- package/src/skcapstone/mcp_tools/notification_tools.py +22 -6
- package/src/skcapstone/mcp_tools/{skcomm_tools.py → skcomms_tools.py} +14 -14
- package/src/skcapstone/mcp_tools/soul_tools.py +8 -2
- package/src/skcapstone/mdns_discovery.py +2 -2
- package/src/skcapstone/memory_curator.py +1 -1
- package/src/skcapstone/memory_engine.py +10 -3
- package/src/skcapstone/metrics.py +30 -16
- package/src/skcapstone/migrate_memories.py +4 -3
- package/src/skcapstone/migrate_multi_agent.py +8 -7
- package/src/skcapstone/models.py +47 -5
- package/src/skcapstone/notifications.py +42 -18
- package/src/skcapstone/onboard.py +875 -121
- package/src/skcapstone/operator_link.py +170 -0
- package/src/skcapstone/peer_directory.py +4 -4
- package/src/skcapstone/peers.py +19 -19
- package/src/skcapstone/pillars/__init__.py +7 -5
- package/src/skcapstone/pillars/consciousness.py +191 -0
- package/src/skcapstone/pillars/identity.py +51 -7
- package/src/skcapstone/pillars/memory.py +9 -3
- package/src/skcapstone/pillars/sync.py +2 -2
- package/src/skcapstone/preflight.py +3 -3
- package/src/skcapstone/providers/docker.py +28 -28
- package/src/skcapstone/register.py +6 -6
- package/src/skcapstone/registry_client.py +5 -4
- package/src/skcapstone/runtime.py +14 -3
- package/src/skcapstone/scheduled_tasks.py +254 -19
- package/src/skcapstone/scheduler_jobs.py +456 -0
- package/src/skcapstone/scheduler_runner.py +239 -0
- package/src/skcapstone/scheduler_state.py +162 -0
- package/src/skcapstone/sdk.py +310 -0
- package/src/skcapstone/service_health.py +279 -39
- package/src/skcapstone/session_briefing.py +108 -0
- package/src/skcapstone/session_capture.py +1 -1
- package/src/skcapstone/shell.py +7 -1
- package/src/skcapstone/soul.py +3 -1
- package/src/skcapstone/soul_switch.py +3 -1
- package/src/skcapstone/summary.py +6 -6
- package/src/skcapstone/sync_engine.py +15 -15
- package/src/skcapstone/sync_watcher.py +2 -2
- package/src/skcapstone/systemd.py +55 -21
- package/src/skcapstone/team_comms.py +8 -8
- package/src/skcapstone/team_engine.py +1 -1
- package/src/skcapstone/testrunner.py +3 -3
- package/src/skcapstone/trust_graph.py +40 -5
- package/src/skcapstone/unified_search.py +15 -6
- package/src/skcapstone/uninstall_wizard.py +11 -3
- package/src/skcapstone/version_check.py +8 -4
- package/src/skcapstone/warmth_anchor.py +4 -2
- package/src/skcapstone/whoami.py +4 -4
- package/systemd/skcapstone.service +4 -6
- package/systemd/skcapstone@.service +7 -8
- package/systemd/skcomms-heartbeat.service +21 -0
- package/systemd/skcomms-heartbeat.timer +12 -0
- package/systemd/skcomms-queue-drain.service +17 -0
- package/systemd/skcomms-queue-drain.timer +12 -0
- package/tests/conftest.py +39 -0
- package/tests/integration/test_consciousness_e2e.py +39 -39
- package/tests/test_agent_card.py +1 -1
- package/tests/test_agent_home_scaffold.py +34 -0
- package/tests/test_alerts_consumer_topics.py +27 -0
- package/tests/test_backup.py +2 -1
- package/tests/test_chat.py +6 -6
- package/tests/test_claude_md.py +2 -2
- package/tests/test_cli_skills.py +10 -10
- package/tests/test_cli_test_cmd.py +4 -4
- package/tests/test_cli_test_connection.py +1 -1
- package/tests/test_cloud9_bridge.py +6 -6
- package/tests/test_consciousness_e2e.py +1 -1
- package/tests/test_consciousness_loop.py +10 -10
- package/tests/test_coordination.py +25 -0
- package/tests/test_cross_package.py +21 -21
- package/tests/test_daemon.py +4 -4
- package/tests/test_daemon_shutdown.py +1 -1
- package/tests/test_docker_provider.py +29 -29
- package/tests/test_doctor.py +400 -0
- package/tests/test_doctor_skscheduler.py +50 -0
- package/tests/test_dreaming_engine.py +147 -0
- package/tests/test_dreaming_gtd_capture.py +35 -0
- package/tests/test_e2e_automated.py +8 -5
- package/tests/test_fuse_mount.py +10 -10
- package/tests/test_gtd_brief.py +46 -0
- package/tests/test_gtd_malformed_tolerance.py +31 -0
- package/tests/test_housekeeping.py +15 -15
- package/tests/test_identity_migrate.py +251 -0
- package/tests/test_integration_backbone.py +598 -0
- package/tests/test_itil_gtd_lifecycle.py +37 -0
- package/tests/test_jobs_dropins.py +84 -0
- package/tests/test_mcp_server.py +82 -37
- package/tests/test_models.py +48 -4
- package/tests/test_multi_agent.py +31 -29
- package/tests/test_notifications.py +122 -32
- package/tests/test_onboard.py +63 -75
- package/tests/test_operator_link.py +78 -0
- package/tests/test_peers.py +14 -14
- package/tests/test_pillars.py +98 -0
- package/tests/test_preflight.py +3 -3
- package/tests/test_runtime.py +21 -0
- package/tests/test_scheduled_tasks.py +11 -6
- package/tests/test_scheduler_cli.py +47 -0
- package/tests/test_scheduler_features.py +133 -0
- package/tests/test_scheduler_integration.py +87 -0
- package/tests/test_scheduler_jobs.py +155 -0
- package/tests/test_scheduler_runner.py +64 -0
- package/tests/test_scheduler_state.py +57 -0
- package/tests/test_sdk.py +70 -0
- package/tests/test_service_health_incidents.py +34 -0
- package/tests/test_service_registry.py +52 -0
- package/tests/test_session_briefing.py +130 -0
- package/tests/test_snapshots.py +4 -4
- package/tests/test_sync_pipeline.py +26 -26
- package/tests/test_team_comms.py +2 -2
- package/tests/test_testrunner.py +2 -2
- package/tests/test_trust_graph.py +18 -0
- package/tests/test_unified_search.py +2 -2
- package/tests/test_version_check.py +10 -0
- package/tests/test_version_cmd.py +8 -8
- package/tests/test_whoami.py +1 -1
- package/systemd/skcomm-heartbeat.service +0 -18
- package/systemd/skcomm-queue-drain.service +0 -17
- /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/package.json +0 -0
- /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/openclaw.plugin.json +0 -0
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
"""skscheduler — JobSpec dataclass, YAML loader, node-affinity resolution,
|
|
2
|
+
due-check (cron + interval), and host-alias discovery.
|
|
3
|
+
|
|
4
|
+
This module is the foundation of the unified fleet job scheduler. It is
|
|
5
|
+
intentionally free of I/O side-effects beyond reading config files and the
|
|
6
|
+
environment; all scheduling state lives elsewhere.
|
|
7
|
+
|
|
8
|
+
Typical usage::
|
|
9
|
+
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from skcapstone.scheduler_jobs import load_jobs, job_runs_here, is_due, current_host_aliases
|
|
12
|
+
|
|
13
|
+
jobs = load_jobs(Path("~/.skcapstone/config/jobs.yaml").expanduser())
|
|
14
|
+
aliases = current_host_aliases()
|
|
15
|
+
for job in jobs:
|
|
16
|
+
if job.enabled and job_runs_here(job, aliases) and is_due(job, last_run):
|
|
17
|
+
dispatch(job)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import os
|
|
23
|
+
import re
|
|
24
|
+
import socket
|
|
25
|
+
import warnings
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
from datetime import datetime, timezone
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import Optional, Union
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# Internal helpers
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
_DURATION_RE = re.compile(r"^\s*(\d+(?:\.\d+)?)\s*([smhd]?)\s*$")
|
|
37
|
+
_UNIT_SECONDS: dict[str, float] = {"": 1, "s": 1, "m": 60, "h": 3600, "d": 86400}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _parse_duration(value: Union[str, int, float]) -> float:
|
|
41
|
+
"""Convert a human-readable duration string or plain number to seconds.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
value: A string like ``"300s"``, ``"5m"``, ``"1h"``, ``"1d"``, or a
|
|
45
|
+
plain numeric value (int or float treated as seconds already).
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Duration in seconds as a float.
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
ValueError: If the string is unparseable, contains a negative value,
|
|
52
|
+
or has an unrecognised suffix.
|
|
53
|
+
|
|
54
|
+
Examples:
|
|
55
|
+
>>> _parse_duration("300s")
|
|
56
|
+
300.0
|
|
57
|
+
>>> _parse_duration("5m")
|
|
58
|
+
300.0
|
|
59
|
+
>>> _parse_duration(600)
|
|
60
|
+
600.0
|
|
61
|
+
"""
|
|
62
|
+
if isinstance(value, (int, float)):
|
|
63
|
+
if value < 0:
|
|
64
|
+
raise ValueError(f"duration must be non-negative, got {value!r}")
|
|
65
|
+
return float(value)
|
|
66
|
+
m = _DURATION_RE.match(str(value))
|
|
67
|
+
if not m:
|
|
68
|
+
raise ValueError(f"invalid duration: {value!r}")
|
|
69
|
+
return float(m.group(1)) * _UNIT_SECONDS[m.group(2)]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# Group A — JobSpec dataclass + load_jobs
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
@dataclass
|
|
77
|
+
class JobSpec:
|
|
78
|
+
"""Describes a single scheduled job as loaded from ``jobs.yaml``.
|
|
79
|
+
|
|
80
|
+
Attributes:
|
|
81
|
+
name: Unique job identifier (the YAML key).
|
|
82
|
+
type: Job type — ``"python"``, ``"shell"``, or ``"agent"``.
|
|
83
|
+
schedule: Cron expression (mutually exclusive with ``every_seconds``).
|
|
84
|
+
every_seconds: Interval in seconds (mutually exclusive with ``schedule``).
|
|
85
|
+
nodes: Node-affinity list of host aliases, or the string ``"all"``.
|
|
86
|
+
agent: Agent name for ``type="agent"`` jobs.
|
|
87
|
+
prompt: Prompt text for ``type="agent"`` jobs.
|
|
88
|
+
command: Shell command for ``type="shell"`` jobs.
|
|
89
|
+
callback: Dotted ``module:function`` path for ``type="python"`` jobs.
|
|
90
|
+
timeout: Hard-kill timeout in seconds.
|
|
91
|
+
enabled: Whether the job is active.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
name: str
|
|
95
|
+
type: str = "python"
|
|
96
|
+
schedule: Optional[str] = None
|
|
97
|
+
every_seconds: Optional[float] = None
|
|
98
|
+
nodes: Union[str, list[str]] = "all"
|
|
99
|
+
agent: Optional[str] = None
|
|
100
|
+
prompt: Optional[str] = None
|
|
101
|
+
command: Optional[str] = None
|
|
102
|
+
callback: Optional[str] = None
|
|
103
|
+
timeout: float = 900.0
|
|
104
|
+
enabled: bool = True
|
|
105
|
+
# --- reliability / fleet / observability (added 2026-06-09) ---
|
|
106
|
+
retries: int = 0 # extra attempts on failure (0 = run once)
|
|
107
|
+
retry_backoff: float = 0.0 # seconds between attempts (linear)
|
|
108
|
+
jitter: float = 0.0 # max random splay (s) before dispatch — avoids
|
|
109
|
+
# fleet thundering-herd on shared cron slots
|
|
110
|
+
notify: str = "off" # off | on_failure | on_success | always (sk-alert hook)
|
|
111
|
+
notify_level: str = "warn" # sk-alert level for failure notifications
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def load_jobs(config_path: Path) -> list[JobSpec]:
|
|
115
|
+
"""Load job definitions from a ``jobs.yaml`` config file.
|
|
116
|
+
|
|
117
|
+
The YAML file must have a top-level ``jobs`` mapping. Each key becomes
|
|
118
|
+
the ``name`` of the resulting :class:`JobSpec`. The ``every`` field is
|
|
119
|
+
parsed via :func:`_parse_duration` and stored as ``every_seconds``; the
|
|
120
|
+
raw ``every`` key is consumed and not passed to the dataclass.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
config_path: Path to the ``jobs.yaml`` file. If the file does not
|
|
124
|
+
exist, an empty list is returned without raising.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
A list of :class:`JobSpec` instances in definition order.
|
|
128
|
+
|
|
129
|
+
Example::
|
|
130
|
+
|
|
131
|
+
jobs = load_jobs(Path("~/.skcapstone/config/jobs.yaml").expanduser())
|
|
132
|
+
"""
|
|
133
|
+
if not config_path.exists():
|
|
134
|
+
return []
|
|
135
|
+
|
|
136
|
+
import yaml # lazy import — pyyaml optional at module level
|
|
137
|
+
|
|
138
|
+
with config_path.open(encoding="utf-8") as fh:
|
|
139
|
+
data = yaml.safe_load(fh)
|
|
140
|
+
|
|
141
|
+
jobs_raw: dict = (data or {}).get("jobs") or {}
|
|
142
|
+
result: list[JobSpec] = []
|
|
143
|
+
|
|
144
|
+
_KNOWN_KEYS = {
|
|
145
|
+
"type", "schedule", "every", "nodes", "agent", "prompt",
|
|
146
|
+
"command", "callback", "timeout", "enabled",
|
|
147
|
+
"retries", "retry_backoff", "jitter", "notify", "notify_level",
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
for name, raw in jobs_raw.items():
|
|
151
|
+
raw = dict(raw or {})
|
|
152
|
+
|
|
153
|
+
# Warn on unrecognised keys before consuming 'every'
|
|
154
|
+
unknown = set(raw.keys()) - _KNOWN_KEYS
|
|
155
|
+
if unknown:
|
|
156
|
+
warnings.warn(
|
|
157
|
+
f"Job {name!r} has unrecognised key(s): {sorted(unknown)}. "
|
|
158
|
+
"Typo in config? Job may not behave as expected.",
|
|
159
|
+
UserWarning,
|
|
160
|
+
stacklevel=2,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Convert 'every' → 'every_seconds'
|
|
164
|
+
every_raw = raw.pop("every", None)
|
|
165
|
+
every_seconds: Optional[float] = None
|
|
166
|
+
if every_raw is not None:
|
|
167
|
+
every_seconds = _parse_duration(every_raw)
|
|
168
|
+
|
|
169
|
+
result.append(
|
|
170
|
+
JobSpec(
|
|
171
|
+
name=name,
|
|
172
|
+
type=raw.get("type", "python"),
|
|
173
|
+
schedule=raw.get("schedule"),
|
|
174
|
+
every_seconds=every_seconds,
|
|
175
|
+
nodes=raw.get("nodes", "all"),
|
|
176
|
+
agent=raw.get("agent"),
|
|
177
|
+
prompt=raw.get("prompt"),
|
|
178
|
+
command=raw.get("command"),
|
|
179
|
+
callback=raw.get("callback"),
|
|
180
|
+
timeout=float(raw.get("timeout", 900.0)),
|
|
181
|
+
enabled=bool(raw.get("enabled", True)),
|
|
182
|
+
retries=int(raw.get("retries", 0)),
|
|
183
|
+
retry_backoff=float(raw.get("retry_backoff", 0.0)),
|
|
184
|
+
jitter=float(raw.get("jitter", 0.0)),
|
|
185
|
+
notify=str(raw.get("notify", "off")),
|
|
186
|
+
notify_level=str(raw.get("notify_level", "warn")),
|
|
187
|
+
)
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
return result
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# ---------------------------------------------------------------------------
|
|
194
|
+
# Group A2 — jobs.d/ drop-in registration (added 2026-06-09)
|
|
195
|
+
# ---------------------------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
def load_jobs_with_dropins(config_path: Path) -> list[JobSpec]:
|
|
198
|
+
"""Load jobs from ``jobs.yaml`` plus every ``jobs.d/*.yaml`` drop-in.
|
|
199
|
+
|
|
200
|
+
This is the conf.d-style merge that lets external sk* services
|
|
201
|
+
self-register scheduled work without editing the shared ``jobs.yaml``.
|
|
202
|
+
The base file is loaded first, then each ``jobs.d/<name>.yaml`` (sorted
|
|
203
|
+
by filename) is overlaid. When two sources define the same job *name*,
|
|
204
|
+
the later (drop-in) definition wins and a :class:`UserWarning` is emitted.
|
|
205
|
+
|
|
206
|
+
The drop-in directory is resolved as ``config_path.parent / "jobs.d"``.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
config_path: Path to the base ``jobs.yaml``. Neither the base file
|
|
210
|
+
nor the drop-in directory need exist; missing sources are
|
|
211
|
+
silently skipped (an empty list is returned when nothing exists).
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Merged list of :class:`JobSpec` instances, base jobs first followed
|
|
215
|
+
by drop-in-only jobs, in deterministic order.
|
|
216
|
+
|
|
217
|
+
Example::
|
|
218
|
+
|
|
219
|
+
jobs = load_jobs_with_dropins(
|
|
220
|
+
Path("~/.skcapstone/config/jobs.yaml").expanduser()
|
|
221
|
+
)
|
|
222
|
+
"""
|
|
223
|
+
merged: dict[str, JobSpec] = {}
|
|
224
|
+
|
|
225
|
+
for spec in load_jobs(config_path):
|
|
226
|
+
merged[spec.name] = spec
|
|
227
|
+
|
|
228
|
+
dropin_dir = config_path.parent / "jobs.d"
|
|
229
|
+
if dropin_dir.is_dir():
|
|
230
|
+
for fragment in sorted(dropin_dir.glob("*.yaml")):
|
|
231
|
+
for spec in load_jobs(fragment):
|
|
232
|
+
if spec.name in merged:
|
|
233
|
+
warnings.warn(
|
|
234
|
+
f"Job {spec.name!r} in drop-in {fragment.name!r} "
|
|
235
|
+
f"overrides an earlier definition.",
|
|
236
|
+
UserWarning,
|
|
237
|
+
stacklevel=2,
|
|
238
|
+
)
|
|
239
|
+
merged[spec.name] = spec
|
|
240
|
+
|
|
241
|
+
return list(merged.values())
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _dropin_dir(home: Optional[Path] = None) -> Path:
|
|
245
|
+
"""Return the ``config/jobs.d`` drop-in directory under *home*.
|
|
246
|
+
|
|
247
|
+
When *home* is not given, the skcapstone shared root is used — which
|
|
248
|
+
honours the ``SKCAPSTONE_HOME`` environment variable — so drop-ins land in
|
|
249
|
+
the same tree the scheduler reads from (and tests stay sandboxed).
|
|
250
|
+
"""
|
|
251
|
+
if home is not None:
|
|
252
|
+
base = Path(home)
|
|
253
|
+
else:
|
|
254
|
+
try:
|
|
255
|
+
from . import shared_home
|
|
256
|
+
|
|
257
|
+
base = shared_home()
|
|
258
|
+
except Exception:
|
|
259
|
+
base = Path("~/.skcapstone").expanduser()
|
|
260
|
+
return base / "config" / "jobs.d"
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def register_job(spec: dict, home: Optional[Path] = None) -> Path:
|
|
264
|
+
"""Register a scheduled job by writing a ``jobs.d/<name>.yaml`` fragment.
|
|
265
|
+
|
|
266
|
+
This is the programmatic counterpart to hand-editing ``jobs.yaml`` — it
|
|
267
|
+
lets a service own its own scheduler entry. The fragment is written
|
|
268
|
+
atomically; calling again with the same ``name`` overwrites it (idempotent
|
|
269
|
+
re-registration on every service start is the intended pattern).
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
spec: A single job definition. Must contain ``name`` and exactly one
|
|
273
|
+
of ``schedule`` or ``every``. Remaining keys mirror the
|
|
274
|
+
``jobs.yaml`` schema (``type``, ``command``/``callback``/``agent``,
|
|
275
|
+
``nodes``, ``timeout``, ``retries``, ``notify`` …).
|
|
276
|
+
home: skcapstone root (defaults to ``~/.skcapstone``).
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
Path to the written ``jobs.d/<name>.yaml`` fragment.
|
|
280
|
+
|
|
281
|
+
Raises:
|
|
282
|
+
ValueError: If ``name`` is missing or neither ``schedule`` nor
|
|
283
|
+
``every`` is present.
|
|
284
|
+
"""
|
|
285
|
+
import yaml # lazy — pyyaml optional at module level
|
|
286
|
+
|
|
287
|
+
spec = dict(spec)
|
|
288
|
+
name = spec.pop("name", None)
|
|
289
|
+
if not name:
|
|
290
|
+
raise ValueError("register_job: spec must include a 'name'")
|
|
291
|
+
if "schedule" not in spec and "every" not in spec:
|
|
292
|
+
raise ValueError(
|
|
293
|
+
f"register_job: job {name!r} must define 'schedule' or 'every'"
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
dropin = _dropin_dir(home)
|
|
297
|
+
dropin.mkdir(parents=True, exist_ok=True)
|
|
298
|
+
|
|
299
|
+
final = dropin / f"{name}.yaml"
|
|
300
|
+
tmp = dropin / f".{name}.yaml.tmp"
|
|
301
|
+
tmp.write_text(
|
|
302
|
+
yaml.safe_dump({"jobs": {name: spec}}, sort_keys=False),
|
|
303
|
+
encoding="utf-8",
|
|
304
|
+
)
|
|
305
|
+
tmp.rename(final)
|
|
306
|
+
return final
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def unregister_job(name: str, home: Optional[Path] = None) -> bool:
|
|
310
|
+
"""Remove a previously registered ``jobs.d/<name>.yaml`` fragment.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
name: The job name used at registration.
|
|
314
|
+
home: skcapstone root (defaults to ``~/.skcapstone``).
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
``True`` if a fragment existed and was removed, ``False`` otherwise.
|
|
318
|
+
"""
|
|
319
|
+
fragment = _dropin_dir(home) / f"{name}.yaml"
|
|
320
|
+
if fragment.exists():
|
|
321
|
+
fragment.unlink()
|
|
322
|
+
return True
|
|
323
|
+
return False
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
# ---------------------------------------------------------------------------
|
|
327
|
+
# Group B — node affinity
|
|
328
|
+
# ---------------------------------------------------------------------------
|
|
329
|
+
|
|
330
|
+
def job_runs_here(job: JobSpec, host_aliases: set[str]) -> bool:
|
|
331
|
+
"""Return ``True`` if *job* should fire on the current node.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
job: The :class:`JobSpec` to evaluate.
|
|
335
|
+
host_aliases: The set of aliases that identify the current host
|
|
336
|
+
(see :func:`current_host_aliases`).
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
``True`` when ``job.nodes == "all"`` or when any alias in
|
|
340
|
+
``job.nodes`` is present in *host_aliases*.
|
|
341
|
+
|
|
342
|
+
Example::
|
|
343
|
+
|
|
344
|
+
aliases = current_host_aliases()
|
|
345
|
+
if job_runs_here(job, aliases):
|
|
346
|
+
dispatch(job)
|
|
347
|
+
"""
|
|
348
|
+
if job.nodes == "all":
|
|
349
|
+
return True
|
|
350
|
+
node_list: list[str] = job.nodes if isinstance(job.nodes, list) else [job.nodes]
|
|
351
|
+
return bool(set(node_list) & host_aliases)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
# ---------------------------------------------------------------------------
|
|
355
|
+
# Group C — due-check cron + interval with misfire catch-up
|
|
356
|
+
# ---------------------------------------------------------------------------
|
|
357
|
+
|
|
358
|
+
def is_due(
|
|
359
|
+
job: JobSpec,
|
|
360
|
+
last_run: Optional[datetime],
|
|
361
|
+
now: Optional[datetime] = None,
|
|
362
|
+
) -> bool:
|
|
363
|
+
"""Return ``True`` if *job* is due to run relative to *last_run*.
|
|
364
|
+
|
|
365
|
+
Interval jobs (``every_seconds`` set):
|
|
366
|
+
- Never run before → due immediately.
|
|
367
|
+
- Otherwise due when ``now - last_run >= every_seconds``.
|
|
368
|
+
|
|
369
|
+
Cron jobs (``schedule`` set):
|
|
370
|
+
- Never run before → due immediately (catches up on first start).
|
|
371
|
+
- Otherwise due when ``last_run`` is *before* the most recent cron
|
|
372
|
+
slot that has already elapsed (misfire/catch-up: at most one fire
|
|
373
|
+
per missed interval, not one per missed slot).
|
|
374
|
+
|
|
375
|
+
Jobs with neither field → never due (returns ``False``).
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
job: The :class:`JobSpec` to evaluate.
|
|
379
|
+
last_run: UTC-aware datetime of the last successful run, or ``None``
|
|
380
|
+
if the job has never run.
|
|
381
|
+
now: Reference "current" time (UTC-aware); defaults to
|
|
382
|
+
``datetime.now(timezone.utc)``.
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
``True`` if the job should be dispatched now.
|
|
386
|
+
|
|
387
|
+
Example::
|
|
388
|
+
|
|
389
|
+
if is_due(job, state.last_run):
|
|
390
|
+
dispatch(job)
|
|
391
|
+
"""
|
|
392
|
+
if now is None:
|
|
393
|
+
now = datetime.now(timezone.utc)
|
|
394
|
+
|
|
395
|
+
# Ensure *now* is tz-aware (default UTC if naive)
|
|
396
|
+
if now.tzinfo is None:
|
|
397
|
+
now = now.replace(tzinfo=timezone.utc)
|
|
398
|
+
|
|
399
|
+
# --- Interval ---
|
|
400
|
+
if job.every_seconds is not None:
|
|
401
|
+
if last_run is None:
|
|
402
|
+
return True
|
|
403
|
+
lr = last_run if last_run.tzinfo else last_run.replace(tzinfo=timezone.utc)
|
|
404
|
+
elapsed = (now - lr).total_seconds()
|
|
405
|
+
return elapsed >= job.every_seconds
|
|
406
|
+
|
|
407
|
+
# --- Cron ---
|
|
408
|
+
if job.schedule is not None:
|
|
409
|
+
if last_run is None:
|
|
410
|
+
return True
|
|
411
|
+
|
|
412
|
+
from croniter import croniter # lazy import
|
|
413
|
+
|
|
414
|
+
# croniter.get_prev returns the most recent past slot <= now
|
|
415
|
+
cron = croniter(job.schedule, now)
|
|
416
|
+
prev_slot: datetime = cron.get_prev(datetime)
|
|
417
|
+
|
|
418
|
+
# Ensure prev_slot is tz-aware
|
|
419
|
+
if prev_slot.tzinfo is None:
|
|
420
|
+
prev_slot = prev_slot.replace(tzinfo=timezone.utc)
|
|
421
|
+
|
|
422
|
+
lr = last_run if last_run.tzinfo else last_run.replace(tzinfo=timezone.utc)
|
|
423
|
+
return lr < prev_slot
|
|
424
|
+
|
|
425
|
+
# No schedule defined → never due
|
|
426
|
+
return False
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
# ---------------------------------------------------------------------------
|
|
430
|
+
# Group D — host alias discovery
|
|
431
|
+
# ---------------------------------------------------------------------------
|
|
432
|
+
|
|
433
|
+
def current_host_aliases() -> set[str]:
|
|
434
|
+
"""Return the set of aliases that identify the current host.
|
|
435
|
+
|
|
436
|
+
Combines:
|
|
437
|
+
- ``socket.gethostname()`` — the OS hostname.
|
|
438
|
+
- Comma-separated values from the ``SK_NODE_ALIAS`` environment variable
|
|
439
|
+
(stripped, non-empty tokens only).
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
A :class:`set` of strings usable for node-affinity matching.
|
|
443
|
+
|
|
444
|
+
Example::
|
|
445
|
+
|
|
446
|
+
# With SK_NODE_ALIAS=".41" set in the environment:
|
|
447
|
+
aliases = current_host_aliases()
|
|
448
|
+
# e.g. {'my-host', '.41'} — hostname + SK_NODE_ALIAS token
|
|
449
|
+
"""
|
|
450
|
+
aliases: set[str] = {socket.gethostname()}
|
|
451
|
+
env_alias = os.environ.get("SK_NODE_ALIAS", "")
|
|
452
|
+
for token in env_alias.split(","):
|
|
453
|
+
stripped = token.strip()
|
|
454
|
+
if stripped:
|
|
455
|
+
aliases.add(stripped)
|
|
456
|
+
return aliases
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
"""Executes JobSpecs by type (python | shell | agent) with overlap locking.
|
|
2
|
+
|
|
3
|
+
This module is the execution layer for the unified fleet job scheduler. It
|
|
4
|
+
is intentionally free of scheduling logic — callers decide *when* to run a
|
|
5
|
+
job; this module handles the *how*.
|
|
6
|
+
|
|
7
|
+
Typical usage::
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from skcapstone.scheduler_jobs import JobSpec
|
|
11
|
+
from skcapstone.scheduler_runner import JobRunner
|
|
12
|
+
|
|
13
|
+
runner = JobRunner(log_dir=Path("~/.skcapstone/logs").expanduser())
|
|
14
|
+
with runner.lock(job) as acquired:
|
|
15
|
+
if acquired:
|
|
16
|
+
result = runner.run(job)
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import contextlib
|
|
21
|
+
import importlib
|
|
22
|
+
import logging
|
|
23
|
+
import os
|
|
24
|
+
import shlex
|
|
25
|
+
import subprocess
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
from datetime import datetime, timezone
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import Generator
|
|
30
|
+
|
|
31
|
+
from .scheduler_jobs import JobSpec
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger("skcapstone.scheduler_runner")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
# Public result type
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class JobResult:
|
|
43
|
+
"""Captures the outcome of a single job execution.
|
|
44
|
+
|
|
45
|
+
Attributes:
|
|
46
|
+
ok: ``True`` when the job completed successfully (exit code 0 for
|
|
47
|
+
subprocesses, no exception for python callbacks).
|
|
48
|
+
exit_code: Process exit code for subprocess-based jobs. ``0`` for
|
|
49
|
+
successful python jobs; ``-1`` for timeouts or OS errors.
|
|
50
|
+
output: Combined stdout + stderr captured from subprocess jobs.
|
|
51
|
+
Empty for python-callback jobs.
|
|
52
|
+
error: Human-readable error message on failure. Empty string on
|
|
53
|
+
success.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
ok: bool
|
|
57
|
+
exit_code: int = 0
|
|
58
|
+
output: str = ""
|
|
59
|
+
error: str = ""
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
# Runner
|
|
64
|
+
# ---------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class JobRunner:
|
|
68
|
+
"""Executes :class:`~skcapstone.scheduler_jobs.JobSpec` instances.
|
|
69
|
+
|
|
70
|
+
Each runner owns a ``log_dir`` directory where per-run log files and
|
|
71
|
+
per-job lock files are written.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
log_dir: Directory for run logs and overlap-lock files. Created
|
|
75
|
+
automatically if it does not exist.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def __init__(self, log_dir: Path) -> None:
|
|
79
|
+
"""Initialise the runner with a log directory.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
log_dir: Writable directory for logs and lock files. Will be
|
|
83
|
+
created (with parents) on first use.
|
|
84
|
+
"""
|
|
85
|
+
self.log_dir = Path(log_dir)
|
|
86
|
+
|
|
87
|
+
# ------------------------------------------------------------------
|
|
88
|
+
# Overlap lock
|
|
89
|
+
# ------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
@contextlib.contextmanager
|
|
92
|
+
def lock(self, job: JobSpec) -> Generator[bool, None, None]:
|
|
93
|
+
"""Acquire an exclusive per-job overlap lock.
|
|
94
|
+
|
|
95
|
+
Uses an ``O_CREAT | O_EXCL`` open on a ``<job.name>.lock`` file as
|
|
96
|
+
an atomic test-and-set. The lock is always released when the
|
|
97
|
+
context exits, even if the body raises.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
job: The job whose lock should be acquired.
|
|
101
|
+
|
|
102
|
+
Yields:
|
|
103
|
+
``True`` if the lock was acquired; ``False`` if another instance
|
|
104
|
+
already holds it (the caller should skip this run).
|
|
105
|
+
|
|
106
|
+
Example::
|
|
107
|
+
|
|
108
|
+
with runner.lock(job) as acquired:
|
|
109
|
+
if acquired:
|
|
110
|
+
result = runner.run(job)
|
|
111
|
+
else:
|
|
112
|
+
logger.info("job %s already running, skipping", job.name)
|
|
113
|
+
"""
|
|
114
|
+
self.log_dir.mkdir(parents=True, exist_ok=True)
|
|
115
|
+
lock_path = self.log_dir / f"{job.name}.lock"
|
|
116
|
+
try:
|
|
117
|
+
fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
|
118
|
+
except FileExistsError:
|
|
119
|
+
yield False
|
|
120
|
+
return
|
|
121
|
+
try:
|
|
122
|
+
os.write(fd, str(os.getpid()).encode())
|
|
123
|
+
os.close(fd)
|
|
124
|
+
yield True
|
|
125
|
+
finally:
|
|
126
|
+
# NOTE: if the process is SIGKILL'd or the host crashes, this unlink
|
|
127
|
+
# never runs and the lockfile blocks the job until removed. The PID
|
|
128
|
+
# written above is the hook for a future staleness check (compare to
|
|
129
|
+
# /proc/<pid> and unlink if the process is gone); v1 relies on
|
|
130
|
+
# operators clearing stale locks on restart.
|
|
131
|
+
with contextlib.suppress(OSError):
|
|
132
|
+
lock_path.unlink()
|
|
133
|
+
|
|
134
|
+
# ------------------------------------------------------------------
|
|
135
|
+
# Dispatch
|
|
136
|
+
# ------------------------------------------------------------------
|
|
137
|
+
|
|
138
|
+
def run(self, job: JobSpec) -> JobResult:
|
|
139
|
+
"""Execute a job and return a :class:`JobResult`.
|
|
140
|
+
|
|
141
|
+
Dispatches to the appropriate backend based on ``job.type``:
|
|
142
|
+
|
|
143
|
+
- ``"python"`` — imports ``module`` and calls ``fn()`` from
|
|
144
|
+
``job.callback`` (format: ``"module.path:function_name"``).
|
|
145
|
+
- ``"shell"`` — runs ``job.command`` via :mod:`subprocess` after
|
|
146
|
+
splitting with :func:`shlex.split`.
|
|
147
|
+
- ``"agent"`` — runs ``claude -p "<prompt>"`` optionally with
|
|
148
|
+
``--agent <name>``.
|
|
149
|
+
|
|
150
|
+
Jobs *never* raise — all failures are returned as a
|
|
151
|
+
:class:`JobResult` with ``ok=False``.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
job: The job specification to execute.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
A :class:`JobResult` describing the outcome.
|
|
158
|
+
"""
|
|
159
|
+
if job.type == "python":
|
|
160
|
+
return self._run_python(job)
|
|
161
|
+
if job.type == "shell":
|
|
162
|
+
return self._run_subprocess(job, shlex.split(job.command or ""))
|
|
163
|
+
if job.type == "agent":
|
|
164
|
+
cmd = ["claude", "-p", job.prompt or ""]
|
|
165
|
+
if job.agent:
|
|
166
|
+
cmd += ["--agent", job.agent]
|
|
167
|
+
return self._run_subprocess(job, cmd)
|
|
168
|
+
return JobResult(ok=False, error=f"unknown job type: {job.type!r}")
|
|
169
|
+
|
|
170
|
+
# ------------------------------------------------------------------
|
|
171
|
+
# Private backends
|
|
172
|
+
# ------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
def _run_python(self, job: JobSpec) -> JobResult:
|
|
175
|
+
"""Import and call a ``module:function`` callback.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
job: A python-type :class:`~skcapstone.scheduler_jobs.JobSpec`
|
|
179
|
+
whose ``callback`` field is ``"module.path:fn_name"``.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
:class:`JobResult` with ``ok=True`` on success, or ``ok=False``
|
|
183
|
+
with ``error`` set to the exception message on any failure.
|
|
184
|
+
"""
|
|
185
|
+
try:
|
|
186
|
+
mod_name, _, fn_name = (job.callback or "").partition(":")
|
|
187
|
+
if not mod_name or not fn_name:
|
|
188
|
+
return JobResult(
|
|
189
|
+
ok=False,
|
|
190
|
+
error=f"invalid callback {job.callback!r} — expected 'module:fn'",
|
|
191
|
+
)
|
|
192
|
+
module = importlib.import_module(mod_name)
|
|
193
|
+
fn = getattr(module, fn_name)
|
|
194
|
+
fn()
|
|
195
|
+
return JobResult(ok=True)
|
|
196
|
+
except Exception as exc: # noqa: BLE001 — jobs must never crash the scheduler loop
|
|
197
|
+
logger.error("python job %r failed: %s", job.name, exc, exc_info=True)
|
|
198
|
+
return JobResult(ok=False, error=str(exc))
|
|
199
|
+
|
|
200
|
+
def _run_subprocess(self, job: JobSpec, cmd: list[str]) -> JobResult:
|
|
201
|
+
"""Run *cmd* as a subprocess, capturing output to a timestamped log.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
job: The originating :class:`~skcapstone.scheduler_jobs.JobSpec`
|
|
205
|
+
(used for log file naming and timeout).
|
|
206
|
+
cmd: Argument list passed directly to :class:`subprocess.run`.
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
:class:`JobResult` with:
|
|
210
|
+
|
|
211
|
+
- ``ok=True`` and ``exit_code=0`` on success.
|
|
212
|
+
- ``ok=False`` and ``exit_code=<n>`` on nonzero exit.
|
|
213
|
+
- ``ok=False`` and ``exit_code=-1`` on timeout or OS error.
|
|
214
|
+
"""
|
|
215
|
+
self.log_dir.mkdir(parents=True, exist_ok=True)
|
|
216
|
+
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
|
217
|
+
log_path = self.log_dir / f"{job.name}-{ts}.log"
|
|
218
|
+
try:
|
|
219
|
+
proc = subprocess.run(
|
|
220
|
+
cmd,
|
|
221
|
+
capture_output=True,
|
|
222
|
+
text=True,
|
|
223
|
+
timeout=job.timeout,
|
|
224
|
+
)
|
|
225
|
+
out = (proc.stdout or "") + (proc.stderr or "")
|
|
226
|
+
log_path.write_text(out, encoding="utf-8")
|
|
227
|
+
ok = proc.returncode == 0
|
|
228
|
+
return JobResult(
|
|
229
|
+
ok=ok,
|
|
230
|
+
exit_code=proc.returncode,
|
|
231
|
+
output=out,
|
|
232
|
+
error="" if ok else out[-500:],
|
|
233
|
+
)
|
|
234
|
+
except subprocess.TimeoutExpired:
|
|
235
|
+
logger.error("job %r timed out after %ss", job.name, job.timeout)
|
|
236
|
+
return JobResult(ok=False, exit_code=-1, error=f"timeout after {job.timeout}s")
|
|
237
|
+
except (OSError, ValueError) as exc:
|
|
238
|
+
logger.error("job %r subprocess error: %s", job.name, exc)
|
|
239
|
+
return JobResult(ok=False, exit_code=-1, error=str(exc))
|