@smilintux/skcapstone 0.9.0 → 0.12.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +10 -4
- package/.github/workflows/ci.yml +2 -2
- package/.github/workflows/publish.yml +9 -2
- package/.openclaw-workspace.json +2 -2
- package/CLAUDE.md +37 -0
- package/MISSION.md +17 -2
- package/README.md +282 -3
- package/docker/Dockerfile +7 -7
- package/docker/compose-templates/dev-team.yml +12 -12
- package/docker/compose-templates/mini-team.yml +9 -9
- package/docker/compose-templates/ops-team.yml +10 -10
- package/docker/compose-templates/research-team.yml +10 -10
- package/docker/entrypoint.sh +4 -4
- package/docs/ADR-optional-integration-backbone.md +181 -0
- package/docs/ARCHITECTURE.md +186 -43
- package/docs/BOND_WITH_GROK.md +6 -6
- package/docs/CUSTOM_AGENT.md +278 -1
- package/docs/DREAMING.md +70 -0
- package/docs/GETTING_STARTED.md +10 -7
- package/docs/QUICKSTART.md +10 -6
- package/docs/SKJOULE_ARCHITECTURE.md +3 -3
- package/docs/SOUL_SWAPPER.md +5 -5
- package/docs/hammertime-audit.md +402 -0
- package/docs/sk-integration-HANDOFF.md +117 -0
- package/docs/skscheduler.md +155 -0
- package/docs/superpowers/examples/jobs.yaml +31 -0
- package/docs/superpowers/plans/2026-06-08-skscheduler.md +1265 -0
- package/docs/superpowers/specs/2026-06-08-skscheduler-design.md +186 -0
- package/examples/custom-bond-template.json +1 -1
- package/examples/grok-feb.json +1 -1
- package/examples/queen-ava-feb.json +1 -1
- package/launchd/com.skcapstone.daemon.plist +52 -0
- package/launchd/com.skcapstone.memory-compress.plist +45 -0
- package/launchd/com.skcapstone.skcomms-heartbeat.plist +33 -0
- package/launchd/com.skcapstone.skcomms-queue-drain.plist +34 -0
- package/launchd/install-launchd.sh +156 -0
- package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/index.ts +3 -2
- package/package.json +1 -1
- package/pyproject.toml +16 -10
- package/scripts/archive-sessions.sh +95 -0
- package/scripts/check-updates.py +4 -4
- package/scripts/install-bundle.sh +8 -8
- package/scripts/install.ps1 +12 -11
- package/scripts/install.sh +196 -11
- package/scripts/model-fallback-monitor.sh +102 -0
- package/scripts/notion-api.py +259 -0
- package/scripts/nvidia-proxy.mjs +908 -0
- package/scripts/proxy-monitor.sh +89 -0
- package/scripts/refresh-anthropic-token.sh +172 -0
- package/scripts/release.sh +98 -0
- package/scripts/session-to-memory.py +219 -0
- package/scripts/skgateway.mjs +856 -0
- package/scripts/telegram-catchup-all.sh +147 -0
- package/scripts/verify_install.sh +2 -2
- package/scripts/wargov-ufo-capture/README.md +43 -0
- package/scripts/wargov-ufo-capture/cdp_capture_release2.py +273 -0
- package/scripts/wargov-ufo-capture/cdp_capture_splc_doj.py +246 -0
- package/scripts/wargov-ufo-capture/cdp_finish.py +271 -0
- package/scripts/wargov-ufo-capture/cdp_probe.py +188 -0
- package/scripts/wargov-ufo-capture/cdp_splc_pressrelease.py +101 -0
- package/scripts/wargov-ufo-capture/parse_csv.py +95 -0
- package/scripts/wargov-ufo-capture/pull_dvids.sh +107 -0
- package/scripts/watch-anthropic-token.sh +212 -0
- package/scripts/windows/install-tasks.ps1 +7 -7
- package/scripts/windows/skcapstone-task.xml +1 -1
- package/src/skcapstone/__init__.py +45 -3
- package/src/skcapstone/_cli_monolith.py +20 -15
- package/src/skcapstone/activity.py +5 -1
- package/src/skcapstone/agent_card.py +3 -2
- package/src/skcapstone/api.py +41 -40
- package/src/skcapstone/auction.py +14 -11
- package/src/skcapstone/backup.py +2 -1
- package/src/skcapstone/blueprint_registry.py +4 -3
- package/src/skcapstone/blueprints/builtins/itil-operations.yaml +40 -0
- package/src/skcapstone/brain_first.py +238 -0
- package/src/skcapstone/changelog.py +1 -1
- package/src/skcapstone/chat.py +22 -17
- package/src/skcapstone/cli/__init__.py +9 -1
- package/src/skcapstone/cli/_common.py +1 -0
- package/src/skcapstone/cli/agents_spawner.py +5 -2
- package/src/skcapstone/cli/alerts.py +25 -4
- package/src/skcapstone/cli/bench.py +15 -15
- package/src/skcapstone/cli/chat.py +7 -4
- package/src/skcapstone/cli/consciousness.py +5 -2
- package/src/skcapstone/cli/context_cmd.py +18 -4
- package/src/skcapstone/cli/daemon.py +121 -42
- package/src/skcapstone/cli/gtd.py +26 -1
- package/src/skcapstone/cli/housekeeping.py +3 -3
- package/src/skcapstone/cli/identity_cmd.py +378 -0
- package/src/skcapstone/cli/joule_cmd.py +7 -3
- package/src/skcapstone/cli/memory.py +8 -6
- package/src/skcapstone/cli/peers_dir.py +1 -1
- package/src/skcapstone/cli/register_cmd.py +29 -3
- package/src/skcapstone/cli/scheduler_cmd.py +167 -0
- package/src/skcapstone/cli/session.py +25 -0
- package/src/skcapstone/cli/setup.py +96 -29
- package/src/skcapstone/cli/shell_cmd.py +53 -1
- package/src/skcapstone/cli/skills_cmd.py +2 -2
- package/src/skcapstone/cli/soul.py +8 -5
- package/src/skcapstone/cli/status.py +37 -11
- package/src/skcapstone/cli/telegram.py +21 -0
- package/src/skcapstone/cli/test_cmd.py +5 -5
- package/src/skcapstone/cli/test_connection.py +2 -2
- package/src/skcapstone/cli/upgrade_cmd.py +23 -14
- package/src/skcapstone/cli/version_cmd.py +1 -1
- package/src/skcapstone/cli/watch_cmd.py +9 -6
- package/src/skcapstone/cloud9_bridge.py +14 -14
- package/src/skcapstone/codex_setup.py +255 -0
- package/src/skcapstone/config_validator.py +7 -4
- package/src/skcapstone/consciousness_config.py +5 -1
- package/src/skcapstone/consciousness_loop.py +313 -273
- package/src/skcapstone/context_loader.py +121 -0
- package/src/skcapstone/coord_federation.py +2 -1
- package/src/skcapstone/coordination.py +23 -6
- package/src/skcapstone/crush_integration.py +2 -1
- package/src/skcapstone/daemon.py +151 -88
- package/src/skcapstone/dashboard.py +10 -10
- package/src/skcapstone/data/sk-agent-picker.sh +421 -0
- package/src/skcapstone/data/systemd/skcapstone-api.socket +9 -0
- package/src/skcapstone/data/systemd/skcapstone-memory-compress.service +18 -0
- package/src/skcapstone/data/systemd/skcapstone-memory-compress.timer +11 -0
- package/src/skcapstone/data/systemd/skcapstone.service +37 -0
- package/src/skcapstone/data/systemd/skcapstone@.service +50 -0
- package/src/skcapstone/data/systemd/skcomms-heartbeat.service +18 -0
- package/{systemd/skcomm-heartbeat.timer → src/skcapstone/data/systemd/skcomms-heartbeat.timer} +2 -2
- package/src/skcapstone/data/systemd/skcomms-queue-drain.service +17 -0
- package/{systemd/skcomm-queue-drain.timer → src/skcapstone/data/systemd/skcomms-queue-drain.timer} +2 -2
- package/src/skcapstone/defaults/claude/CLAUDE.md +67 -0
- package/src/skcapstone/defaults/claude/settings.json +74 -0
- package/src/skcapstone/defaults/lumina/config/claude-hooks.md +57 -0
- package/src/skcapstone/defaults/lumina/config/skgraph.yaml +55 -10
- package/src/skcapstone/defaults/lumina/config/skmemory.yaml +79 -13
- package/src/skcapstone/defaults/lumina/config/skvector.yaml +60 -9
- package/src/skcapstone/defaults/lumina/memory/long-term/18b9c0d1e2f3-cloud9-protocol.json +2 -2
- package/src/skcapstone/defaults/lumina/memory/long-term/a1b2c3d4e5f6-ecosystem-overview.json +2 -2
- package/src/skcapstone/defaults/lumina/memory/long-term/b2c3d4e5f6a7-five-pillars.json +9 -9
- package/src/skcapstone/defaults/lumina/memory/long-term/d4e5f6a7b8c9-site-directory.json +2 -2
- package/src/skcapstone/defaults/unhinged.json +13 -0
- package/src/skcapstone/discovery.py +43 -20
- package/src/skcapstone/doctor.py +941 -22
- package/src/skcapstone/dreaming.py +1183 -109
- package/src/skcapstone/emotion_tracker.py +2 -2
- package/src/skcapstone/export.py +4 -3
- package/src/skcapstone/fuse_mount.py +35 -25
- package/src/skcapstone/gui_installer.py +2 -2
- package/src/skcapstone/heartbeat.py +34 -30
- package/src/skcapstone/housekeeping.py +14 -14
- package/src/skcapstone/install_wizard.py +209 -7
- package/src/skcapstone/itil.py +13 -4
- package/src/skcapstone/kms_scheduler.py +10 -8
- package/src/skcapstone/launchd.py +426 -0
- package/src/skcapstone/mcp_launcher.py +15 -1
- package/src/skcapstone/mcp_server.py +341 -49
- package/src/skcapstone/mcp_tools/__init__.py +2 -0
- package/src/skcapstone/mcp_tools/_helpers.py +2 -2
- package/src/skcapstone/mcp_tools/ansible_tools.py +7 -4
- package/src/skcapstone/mcp_tools/brain_first_tools.py +90 -0
- package/src/skcapstone/mcp_tools/capauth_tools.py +7 -4
- package/src/skcapstone/mcp_tools/comm_tools.py +10 -10
- package/src/skcapstone/mcp_tools/coord_tools.py +8 -4
- package/src/skcapstone/mcp_tools/did_tools.py +11 -8
- package/src/skcapstone/mcp_tools/gtd_tools.py +4 -4
- package/src/skcapstone/mcp_tools/memory_tools.py +6 -2
- package/src/skcapstone/mcp_tools/notification_tools.py +22 -6
- package/src/skcapstone/mcp_tools/{skcomm_tools.py → skcomms_tools.py} +14 -14
- package/src/skcapstone/mcp_tools/soul_tools.py +8 -2
- package/src/skcapstone/mdns_discovery.py +2 -2
- package/src/skcapstone/memory_curator.py +1 -1
- package/src/skcapstone/memory_engine.py +10 -3
- package/src/skcapstone/metrics.py +30 -16
- package/src/skcapstone/migrate_memories.py +4 -3
- package/src/skcapstone/migrate_multi_agent.py +8 -7
- package/src/skcapstone/models.py +47 -5
- package/src/skcapstone/notifications.py +42 -18
- package/src/skcapstone/onboard.py +1000 -126
- package/src/skcapstone/operator_link.py +170 -0
- package/src/skcapstone/peer_directory.py +4 -4
- package/src/skcapstone/peers.py +19 -19
- package/src/skcapstone/pillars/__init__.py +7 -5
- package/src/skcapstone/pillars/consciousness.py +191 -0
- package/src/skcapstone/pillars/identity.py +51 -7
- package/src/skcapstone/pillars/memory.py +9 -3
- package/src/skcapstone/pillars/sync.py +2 -2
- package/src/skcapstone/preflight.py +3 -3
- package/src/skcapstone/providers/docker.py +28 -28
- package/src/skcapstone/register.py +6 -6
- package/src/skcapstone/registry_client.py +5 -4
- package/src/skcapstone/runtime.py +14 -3
- package/src/skcapstone/scheduled_tasks.py +254 -19
- package/src/skcapstone/scheduler_jobs.py +456 -0
- package/src/skcapstone/scheduler_runner.py +239 -0
- package/src/skcapstone/scheduler_state.py +162 -0
- package/src/skcapstone/sdk.py +310 -0
- package/src/skcapstone/service_health.py +279 -39
- package/src/skcapstone/session_briefing.py +108 -0
- package/src/skcapstone/session_capture.py +1 -1
- package/src/skcapstone/shell.py +7 -1
- package/src/skcapstone/soul.py +3 -1
- package/src/skcapstone/soul_switch.py +3 -1
- package/src/skcapstone/summary.py +6 -6
- package/src/skcapstone/sync_engine.py +15 -15
- package/src/skcapstone/sync_watcher.py +2 -2
- package/src/skcapstone/systemd.py +72 -21
- package/src/skcapstone/team_comms.py +8 -8
- package/src/skcapstone/team_engine.py +1 -1
- package/src/skcapstone/testrunner.py +3 -3
- package/src/skcapstone/trust_graph.py +40 -5
- package/src/skcapstone/unified_search.py +15 -6
- package/src/skcapstone/uninstall_wizard.py +11 -3
- package/src/skcapstone/version_check.py +8 -4
- package/src/skcapstone/warmth_anchor.py +4 -2
- package/src/skcapstone/whoami.py +4 -4
- package/systemd/skcapstone.service +4 -6
- package/systemd/skcapstone@.service +7 -8
- package/systemd/skcomms-heartbeat.service +21 -0
- package/systemd/skcomms-heartbeat.timer +12 -0
- package/systemd/skcomms-queue-drain.service +17 -0
- package/systemd/skcomms-queue-drain.timer +12 -0
- package/tests/conftest.py +39 -0
- package/tests/integration/test_consciousness_e2e.py +39 -39
- package/tests/test_agent_card.py +1 -1
- package/tests/test_agent_home_scaffold.py +34 -0
- package/tests/test_alerts_consumer_topics.py +27 -0
- package/tests/test_backup.py +2 -1
- package/tests/test_chat.py +6 -6
- package/tests/test_claude_md.py +2 -2
- package/tests/test_cli_skills.py +10 -10
- package/tests/test_cli_test_cmd.py +4 -4
- package/tests/test_cli_test_connection.py +1 -1
- package/tests/test_cloud9_bridge.py +6 -6
- package/tests/test_consciousness_e2e.py +1 -1
- package/tests/test_consciousness_loop.py +10 -10
- package/tests/test_coordination.py +25 -0
- package/tests/test_cross_package.py +21 -21
- package/tests/test_daemon.py +4 -4
- package/tests/test_daemon_shutdown.py +1 -1
- package/tests/test_docker_provider.py +29 -29
- package/tests/test_doctor.py +400 -0
- package/tests/test_doctor_skscheduler.py +50 -0
- package/tests/test_dreaming_engine.py +147 -0
- package/tests/test_dreaming_gtd_capture.py +35 -0
- package/tests/test_e2e_automated.py +8 -5
- package/tests/test_fuse_mount.py +10 -10
- package/tests/test_gtd_brief.py +46 -0
- package/tests/test_gtd_malformed_tolerance.py +31 -0
- package/tests/test_housekeeping.py +15 -15
- package/tests/test_identity_migrate.py +251 -0
- package/tests/test_integration_backbone.py +598 -0
- package/tests/test_itil_gtd_lifecycle.py +37 -0
- package/tests/test_jobs_dropins.py +84 -0
- package/tests/test_mcp_server.py +82 -37
- package/tests/test_models.py +48 -4
- package/tests/test_multi_agent.py +31 -29
- package/tests/test_notifications.py +122 -32
- package/tests/test_onboard.py +63 -75
- package/tests/test_operator_link.py +78 -0
- package/tests/test_peers.py +14 -14
- package/tests/test_pillars.py +98 -0
- package/tests/test_preflight.py +3 -3
- package/tests/test_runtime.py +21 -0
- package/tests/test_scheduled_tasks.py +11 -6
- package/tests/test_scheduler_cli.py +47 -0
- package/tests/test_scheduler_features.py +133 -0
- package/tests/test_scheduler_integration.py +87 -0
- package/tests/test_scheduler_jobs.py +155 -0
- package/tests/test_scheduler_runner.py +64 -0
- package/tests/test_scheduler_state.py +57 -0
- package/tests/test_sdk.py +70 -0
- package/tests/test_service_health_incidents.py +34 -0
- package/tests/test_service_registry.py +52 -0
- package/tests/test_session_briefing.py +130 -0
- package/tests/test_snapshots.py +4 -4
- package/tests/test_sync_pipeline.py +26 -26
- package/tests/test_team_comms.py +2 -2
- package/tests/test_testrunner.py +2 -2
- package/tests/test_trust_graph.py +18 -0
- package/tests/test_unified_search.py +2 -2
- package/tests/test_version_check.py +10 -0
- package/tests/test_version_cmd.py +8 -8
- package/tests/test_whoami.py +1 -1
- package/systemd/skcomm-heartbeat.service +0 -18
- package/systemd/skcomm-queue-drain.service +0 -17
- /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/package.json +0 -0
- /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/openclaw.plugin.json +0 -0
|
@@ -20,12 +20,21 @@ Usage:
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
22
|
import logging
|
|
23
|
+
import os
|
|
24
|
+
import random
|
|
25
|
+
import shutil
|
|
26
|
+
import subprocess
|
|
23
27
|
import threading
|
|
28
|
+
import time
|
|
24
29
|
from dataclasses import dataclass, field
|
|
25
30
|
from datetime import datetime, timedelta, timezone
|
|
26
31
|
from pathlib import Path
|
|
27
32
|
from typing import Callable, Optional
|
|
28
33
|
|
|
34
|
+
from .scheduler_jobs import JobSpec, is_due, job_runs_here
|
|
35
|
+
from .scheduler_runner import JobRunner
|
|
36
|
+
from .scheduler_state import SchedulerState
|
|
37
|
+
|
|
29
38
|
logger = logging.getLogger("skcapstone.scheduled_tasks")
|
|
30
39
|
|
|
31
40
|
|
|
@@ -55,16 +64,25 @@ class ScheduledTask:
|
|
|
55
64
|
last_error: Optional[str] = None
|
|
56
65
|
run_count: int = 0
|
|
57
66
|
error_count: int = 0
|
|
67
|
+
delay_first_run: float = 0.0
|
|
58
68
|
|
|
59
69
|
def is_due(self, now: Optional[datetime] = None) -> bool:
|
|
60
70
|
"""Return True if the task interval has elapsed since last_run.
|
|
61
71
|
|
|
62
|
-
A task with no prior run is always considered due
|
|
72
|
+
A task with no prior run is always considered due, unless
|
|
73
|
+
``delay_first_run`` is set — in that case the first run is
|
|
74
|
+
deferred by that many seconds from process start.
|
|
63
75
|
|
|
64
76
|
Args:
|
|
65
77
|
now: Reference time for the check (defaults to UTC now).
|
|
66
78
|
"""
|
|
67
79
|
if self.last_run is None:
|
|
80
|
+
if self.delay_first_run > 0:
|
|
81
|
+
if not hasattr(self, "_created_at"):
|
|
82
|
+
object.__setattr__(self, "_created_at", datetime.now(timezone.utc))
|
|
83
|
+
reference = now or datetime.now(timezone.utc)
|
|
84
|
+
elapsed = (reference - self._created_at).total_seconds()
|
|
85
|
+
return elapsed >= self.delay_first_run
|
|
68
86
|
return True
|
|
69
87
|
reference = now or datetime.now(timezone.utc)
|
|
70
88
|
elapsed = (reference - self.last_run).total_seconds()
|
|
@@ -122,6 +140,10 @@ class TaskScheduler:
|
|
|
122
140
|
self._tasks: list[ScheduledTask] = []
|
|
123
141
|
self._lock = threading.Lock()
|
|
124
142
|
self._thread: Optional[threading.Thread] = None
|
|
143
|
+
self._config_jobs: list[JobSpec] = []
|
|
144
|
+
self._host_aliases: set[str] = set()
|
|
145
|
+
self._state: Optional[SchedulerState] = None
|
|
146
|
+
self._job_runner: Optional[JobRunner] = None
|
|
125
147
|
|
|
126
148
|
# ------------------------------------------------------------------
|
|
127
149
|
# Public API
|
|
@@ -132,6 +154,7 @@ class TaskScheduler:
|
|
|
132
154
|
name: str,
|
|
133
155
|
interval_seconds: float,
|
|
134
156
|
callback: Callable[[], None],
|
|
157
|
+
delay_first_run: float = 0.0,
|
|
135
158
|
) -> ScheduledTask:
|
|
136
159
|
"""Register a recurring task.
|
|
137
160
|
|
|
@@ -139,11 +162,12 @@ class TaskScheduler:
|
|
|
139
162
|
name: Unique task name (used in logs and status output).
|
|
140
163
|
interval_seconds: Minimum seconds between executions.
|
|
141
164
|
callback: Zero-argument callable to invoke.
|
|
165
|
+
delay_first_run: Seconds to wait before first execution (default 0 = immediate).
|
|
142
166
|
|
|
143
167
|
Returns:
|
|
144
168
|
The created ScheduledTask (caller may inspect it at runtime).
|
|
145
169
|
"""
|
|
146
|
-
task = ScheduledTask(name=name, interval_seconds=interval_seconds, callback=callback)
|
|
170
|
+
task = ScheduledTask(name=name, interval_seconds=interval_seconds, callback=callback, delay_first_run=delay_first_run)
|
|
147
171
|
with self._lock:
|
|
148
172
|
self._tasks.append(task)
|
|
149
173
|
logger.debug("Registered scheduled task '%s' every %.0fs", name, interval_seconds)
|
|
@@ -188,6 +212,181 @@ class TaskScheduler:
|
|
|
188
212
|
for t in self._tasks
|
|
189
213
|
]
|
|
190
214
|
|
|
215
|
+
def load_config_jobs(
|
|
216
|
+
self,
|
|
217
|
+
jobs: list[JobSpec],
|
|
218
|
+
hostname: str,
|
|
219
|
+
host_aliases: set[str],
|
|
220
|
+
state_root: Path,
|
|
221
|
+
) -> None:
|
|
222
|
+
"""Load config-driven jobs and initialise per-host execution state.
|
|
223
|
+
|
|
224
|
+
Filters *jobs* to only those that are enabled and whose node affinity
|
|
225
|
+
matches *host_aliases*. Initialises a :class:`SchedulerState` for
|
|
226
|
+
tracking run history and a :class:`JobRunner` for dispatching jobs.
|
|
227
|
+
|
|
228
|
+
**Call before** :meth:`start`. The attributes ``_config_jobs``,
|
|
229
|
+
``_state``, and ``_job_runner`` are not lock-protected against
|
|
230
|
+
concurrent mutation while the scheduler thread is running.
|
|
231
|
+
``build_scheduler`` already calls this before ``start()``, so
|
|
232
|
+
documenting this constraint is sufficient for v1.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
jobs: Full list of :class:`~skcapstone.scheduler_jobs.JobSpec`
|
|
236
|
+
instances as returned by
|
|
237
|
+
:func:`~skcapstone.scheduler_jobs.load_jobs`.
|
|
238
|
+
hostname: The current host's primary identifier (typically
|
|
239
|
+
``socket.gethostname()``), used as the state sub-directory.
|
|
240
|
+
host_aliases: Full set of aliases for the current host, used for
|
|
241
|
+
node-affinity matching via
|
|
242
|
+
:func:`~skcapstone.scheduler_jobs.job_runs_here`.
|
|
243
|
+
state_root: Root directory under which per-host scheduler state
|
|
244
|
+
(``scheduler/<hostname>/state.json``) and log files are stored.
|
|
245
|
+
"""
|
|
246
|
+
self._host_aliases = host_aliases
|
|
247
|
+
self._state = SchedulerState(root=state_root, hostname=hostname)
|
|
248
|
+
self._job_runner = JobRunner(
|
|
249
|
+
log_dir=state_root / "scheduler" / hostname / "logs"
|
|
250
|
+
)
|
|
251
|
+
self._config_jobs = [
|
|
252
|
+
j for j in jobs if j.enabled and job_runs_here(j, host_aliases)
|
|
253
|
+
]
|
|
254
|
+
logger.info(
|
|
255
|
+
"Loaded %d config job(s) for host %s",
|
|
256
|
+
len(self._config_jobs),
|
|
257
|
+
hostname,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
def tick_config_jobs(self, now: Optional[datetime] = None) -> None:
|
|
261
|
+
"""Fire any config-driven jobs that are due at *now*.
|
|
262
|
+
|
|
263
|
+
Skips silently when no config jobs are loaded or state/runner are not
|
|
264
|
+
initialised (i.e. :meth:`load_config_jobs` has not been called).
|
|
265
|
+
|
|
266
|
+
Each due job is dispatched to its own short-lived daemon thread so
|
|
267
|
+
the tick returns immediately. Long-running jobs (e.g. ``agent``
|
|
268
|
+
type, timeout up to 900 s) therefore never block the scheduler daemon
|
|
269
|
+
thread — which also drives heartbeats and all built-in tasks.
|
|
270
|
+
|
|
271
|
+
The due-check is intentionally kept in the tick thread (it is cheap).
|
|
272
|
+
The overlap lock is acquired *inside* the worker thread so it spans
|
|
273
|
+
the actual run; :meth:`_run_config_job` handles lock + run + state.
|
|
274
|
+
|
|
275
|
+
Note: because ``record_run`` is called asynchronously inside the
|
|
276
|
+
worker, the next tick may evaluate the same job as "due" before
|
|
277
|
+
``record_run`` completes. The per-job overlap lock prevents a second
|
|
278
|
+
concurrent execution in that window — the second worker acquires
|
|
279
|
+
``got=False`` and returns immediately. :class:`SchedulerState` uses
|
|
280
|
+
a ``threading.Lock`` so concurrent ``record_run`` calls are safe.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
now: Reference UTC timestamp for due-checks. Defaults to
|
|
284
|
+
``datetime.now(timezone.utc)`` when not provided.
|
|
285
|
+
"""
|
|
286
|
+
if not self._config_jobs or self._state is None or self._job_runner is None:
|
|
287
|
+
return
|
|
288
|
+
now = now or datetime.now(timezone.utc)
|
|
289
|
+
for job in self._config_jobs:
|
|
290
|
+
if not is_due(job, self._state.last_run(job.name), now):
|
|
291
|
+
continue
|
|
292
|
+
threading.Thread(
|
|
293
|
+
target=self._run_config_job,
|
|
294
|
+
args=(job, now),
|
|
295
|
+
name=f"skjob-{job.name}",
|
|
296
|
+
daemon=True,
|
|
297
|
+
).start()
|
|
298
|
+
|
|
299
|
+
def _run_config_job(self, job: JobSpec, fire_time: datetime) -> None:
|
|
300
|
+
"""Run a single config job in its own thread: lock, execute, record.
|
|
301
|
+
|
|
302
|
+
This method is the body of the per-job daemon thread spawned by
|
|
303
|
+
:meth:`tick_config_jobs`. It acquires the per-job overlap lock,
|
|
304
|
+
runs the job via the configured :class:`~skcapstone.scheduler_runner.JobRunner`,
|
|
305
|
+
then records the result via
|
|
306
|
+
:class:`~skcapstone.scheduler_state.SchedulerState`.
|
|
307
|
+
|
|
308
|
+
If the lock cannot be obtained the method returns immediately without
|
|
309
|
+
running or recording — this is the safe path when the previous run is
|
|
310
|
+
still in progress (which can happen if a job's execution time exceeds
|
|
311
|
+
one tick interval).
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
job: The :class:`~skcapstone.scheduler_jobs.JobSpec` to execute.
|
|
315
|
+
fire_time: The UTC timestamp at which this job was determined to be
|
|
316
|
+
due (propagated to :meth:`~skcapstone.scheduler_state.SchedulerState.record_run`
|
|
317
|
+
so state timestamps reflect the scheduled fire time rather than
|
|
318
|
+
the wall-clock time of completion).
|
|
319
|
+
"""
|
|
320
|
+
with self._job_runner.lock(job) as got:
|
|
321
|
+
if not got:
|
|
322
|
+
logger.debug("job '%s' still running — skip", job.name)
|
|
323
|
+
return
|
|
324
|
+
# Jitter: random splay before dispatch so fleet nodes sharing a cron
|
|
325
|
+
# slot don't stampede a shared resource (LLM endpoint, registry, etc).
|
|
326
|
+
if getattr(job, "jitter", 0.0) > 0:
|
|
327
|
+
time.sleep(random.uniform(0.0, float(job.jitter)))
|
|
328
|
+
# Run with retries + linear backoff for transient infra failures.
|
|
329
|
+
attempts = max(1, int(getattr(job, "retries", 0)) + 1)
|
|
330
|
+
result = None
|
|
331
|
+
for i in range(attempts):
|
|
332
|
+
result = self._job_runner.run(job)
|
|
333
|
+
if result.ok:
|
|
334
|
+
break
|
|
335
|
+
if i < attempts - 1:
|
|
336
|
+
logger.warning(
|
|
337
|
+
"job '%s' attempt %d/%d failed: %s — retrying",
|
|
338
|
+
job.name, i + 1, attempts, result.error,
|
|
339
|
+
)
|
|
340
|
+
backoff = float(getattr(job, "retry_backoff", 0.0))
|
|
341
|
+
if backoff > 0:
|
|
342
|
+
time.sleep(backoff)
|
|
343
|
+
self._state.record_run(
|
|
344
|
+
job.name, now=fire_time, ok=result.ok, error=result.error
|
|
345
|
+
)
|
|
346
|
+
if not result.ok:
|
|
347
|
+
logger.warning(
|
|
348
|
+
"job '%s' failed after %d attempt(s): %s",
|
|
349
|
+
job.name, attempts, result.error,
|
|
350
|
+
)
|
|
351
|
+
self._maybe_notify(job, result, attempts)
|
|
352
|
+
|
|
353
|
+
@staticmethod
|
|
354
|
+
def _maybe_notify(job: JobSpec, result, attempts: int) -> None:
|
|
355
|
+
"""Fire an sk-alert per the job's ``notify`` policy.
|
|
356
|
+
|
|
357
|
+
Policy values: ``off`` (default), ``on_failure``, ``on_success``,
|
|
358
|
+
``always``. Sends the job name, status, attempt count, and a tail of
|
|
359
|
+
the captured output to Chef's Telegram via the ``sk-alert`` primitive.
|
|
360
|
+
Never raises — notification failure must not break the scheduler.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
job: The job that ran.
|
|
364
|
+
result: The :class:`~skcapstone.scheduler_runner.JobResult`.
|
|
365
|
+
attempts: Number of attempts made (for the message).
|
|
366
|
+
"""
|
|
367
|
+
mode = getattr(job, "notify", "off")
|
|
368
|
+
if mode == "off":
|
|
369
|
+
return
|
|
370
|
+
want = (
|
|
371
|
+
mode == "always"
|
|
372
|
+
or (mode == "on_failure" and not result.ok)
|
|
373
|
+
or (mode == "on_success" and result.ok)
|
|
374
|
+
)
|
|
375
|
+
if not want:
|
|
376
|
+
return
|
|
377
|
+
status = "✅ ok" if result.ok else "❌ FAILED"
|
|
378
|
+
suffix = f" (after {attempts} attempts)" if attempts > 1 else ""
|
|
379
|
+
tail = "\n".join((result.output or result.error or "").strip().splitlines()[-12:])
|
|
380
|
+
msg = f"🗓️ skscheduler · {job.name} · {status}{suffix}"
|
|
381
|
+
if tail:
|
|
382
|
+
msg += "\n" + tail
|
|
383
|
+
level = "info" if result.ok else getattr(job, "notify_level", "warn")
|
|
384
|
+
alert = shutil.which("sk-alert") or os.path.expanduser("~/.skenv/bin/sk-alert")
|
|
385
|
+
try:
|
|
386
|
+
subprocess.run([alert, "-l", level, msg], timeout=30, check=False)
|
|
387
|
+
except Exception as exc: # noqa: BLE001 — notify must never break the loop
|
|
388
|
+
logger.warning("notify failed for job '%s': %s", job.name, exc)
|
|
389
|
+
|
|
191
390
|
# ------------------------------------------------------------------
|
|
192
391
|
# Internal
|
|
193
392
|
# ------------------------------------------------------------------
|
|
@@ -203,6 +402,8 @@ class TaskScheduler:
|
|
|
203
402
|
if task.is_due(now):
|
|
204
403
|
task.run()
|
|
205
404
|
|
|
405
|
+
self.tick_config_jobs(now)
|
|
406
|
+
|
|
206
407
|
self._stop_event.wait(timeout=self._tick_interval)
|
|
207
408
|
|
|
208
409
|
|
|
@@ -214,29 +415,50 @@ class TaskScheduler:
|
|
|
214
415
|
def make_memory_promotion_task(home: Path) -> Callable[[], None]:
|
|
215
416
|
"""Return a callback that runs an hourly memory promotion sweep.
|
|
216
417
|
|
|
217
|
-
|
|
218
|
-
|
|
418
|
+
The sweep runs in a dedicated background thread so it never blocks the
|
|
419
|
+
scheduler (and therefore never blocks watchdog pings or other scheduled
|
|
420
|
+
tasks). A ``threading.Event`` gate prevents overlapping sweeps.
|
|
421
|
+
|
|
422
|
+
The sweep is rate-limited to 50 promotions per run to bound I/O time.
|
|
219
423
|
|
|
220
424
|
Args:
|
|
221
425
|
home: Agent home directory containing the ``memory/`` subtree.
|
|
222
426
|
"""
|
|
427
|
+
_running = threading.Event()
|
|
223
428
|
|
|
224
|
-
def
|
|
225
|
-
|
|
429
|
+
def _sweep() -> None:
|
|
430
|
+
try:
|
|
431
|
+
from .memory_promoter import PromotionEngine
|
|
432
|
+
|
|
433
|
+
engine = PromotionEngine(home)
|
|
434
|
+
result = engine.sweep(limit=50)
|
|
435
|
+
if result.promoted:
|
|
436
|
+
logger.info(
|
|
437
|
+
"Memory promotion sweep: %d promoted of %d scanned",
|
|
438
|
+
len(result.promoted),
|
|
439
|
+
result.scanned,
|
|
440
|
+
)
|
|
441
|
+
else:
|
|
442
|
+
logger.debug(
|
|
443
|
+
"Memory promotion sweep: %d scanned, 0 promoted",
|
|
444
|
+
result.scanned,
|
|
445
|
+
)
|
|
446
|
+
except Exception as exc:
|
|
447
|
+
logger.error("Memory promotion sweep error: %s", exc)
|
|
448
|
+
finally:
|
|
449
|
+
_running.clear()
|
|
226
450
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
result.scanned,
|
|
239
|
-
)
|
|
451
|
+
def _run() -> None:
|
|
452
|
+
if _running.is_set():
|
|
453
|
+
logger.debug("Memory promotion sweep already running — skipping")
|
|
454
|
+
return
|
|
455
|
+
_running.set()
|
|
456
|
+
t = threading.Thread(
|
|
457
|
+
target=_sweep,
|
|
458
|
+
name="memory-promotion-sweep",
|
|
459
|
+
daemon=True,
|
|
460
|
+
)
|
|
461
|
+
t.start()
|
|
240
462
|
|
|
241
463
|
return _run
|
|
242
464
|
|
|
@@ -498,6 +720,7 @@ def build_scheduler(
|
|
|
498
720
|
name="memory_promotion_sweep",
|
|
499
721
|
interval_seconds=3600, # 1 hour
|
|
500
722
|
callback=make_memory_promotion_task(home),
|
|
723
|
+
delay_first_run=120, # let daemon stabilize before first sweep
|
|
501
724
|
)
|
|
502
725
|
|
|
503
726
|
scheduler.register(
|
|
@@ -543,4 +766,16 @@ def build_scheduler(
|
|
|
543
766
|
except Exception:
|
|
544
767
|
logger.debug("ITIL scheduled tasks not available — skipped")
|
|
545
768
|
|
|
769
|
+
from .scheduler_jobs import load_jobs_with_dropins, current_host_aliases
|
|
770
|
+
import socket
|
|
771
|
+
jobs_path = Path(home) / "config" / "jobs.yaml"
|
|
772
|
+
jobs = load_jobs_with_dropins(jobs_path)
|
|
773
|
+
if jobs:
|
|
774
|
+
scheduler.load_config_jobs(
|
|
775
|
+
jobs=jobs,
|
|
776
|
+
hostname=socket.gethostname(),
|
|
777
|
+
host_aliases=current_host_aliases(),
|
|
778
|
+
state_root=Path(home),
|
|
779
|
+
)
|
|
780
|
+
|
|
546
781
|
return scheduler
|