PyPI - hermeskill - Versions diffs - 0.1.0a1__py3-none-any.whl - Mend

hermeskill 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

hermeskill/__init__.py +57 -0
hermeskill/_version.py +1 -0
hermeskill/apoptosis.py +342 -0
hermeskill/calibration.py +235 -0
hermeskill/certificate.py +87 -0
hermeskill/checks.py +292 -0
hermeskill/cli.py +769 -0
hermeskill/client.py +433 -0
hermeskill/config.py +120 -0
hermeskill/exceptions.py +19 -0
hermeskill/policies.py +128 -0
hermeskill/pricing.py +82 -0
hermeskill/py.typed +0 -0
hermeskill/supervisor.py +257 -0
hermeskill/types/__init__.py +69 -0
hermeskill/types/agents.py +33 -0
hermeskill/types/calibration.py +66 -0
hermeskill/types/enums.py +70 -0
hermeskill/types/events.py +46 -0
hermeskill/types/feedback.py +29 -0
hermeskill/types/grants.py +75 -0
hermeskill/types/heartbeats.py +26 -0
hermeskill/types/kills.py +117 -0
hermeskill/types/policy.py +51 -0
hermeskill/watcher.py +661 -0
hermeskill-0.1.0a1.dist-info/METADATA +23 -0
hermeskill-0.1.0a1.dist-info/RECORD +29 -0
hermeskill-0.1.0a1.dist-info/WHEEL +4 -0
hermeskill-0.1.0a1.dist-info/entry_points.txt +2 -0

hermeskill/__init__.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""Hermeskill SDK — apoptosis protocol core for AI agent supervision.
+Framework-agnostic core: WatcherState, symptom checks, death certificates,
+kill-event client, operator CLI. Install a framework adapter on top:
+    pip install hermeskill-hermes         # Hermes Agent plugin (recommended)
+The bare `hermeskill` package imports with no third-party agent-framework
+dependencies.
+Public exceptions:
+    from hermeskill import HermeskillTerminated
+    # Raised by framework adapters and `checkpoint()` when the agent is
+    # killed by Hermeskill. Catch at your top-level run loop if you need
+    # cleanup before exit.
+`checkpoint()` is a cooperative termination point for custom run loops;
+raises HermeskillTerminated if a kill directive is pending.
+"""
+from hermeskill._version import __version__
+from hermeskill.calibration import LabeledKill, build_calibration_report
+from hermeskill.exceptions import HermeskillError, HermeskillTerminated
+from hermeskill.supervisor import Heartbeat, ProcessSupervisor, SupervisorResult
+__all__ = [
+    "Heartbeat",
+    "HermeskillError",
+    "HermeskillTerminated",
+    "LabeledKill",
+    "ProcessSupervisor",
+    "SupervisorResult",
+    "__version__",
+    "build_calibration_report",
+    "checkpoint",
+]
+def checkpoint() -> None:
+    """Cooperative termination point for custom run loops.
+    Call inside long-running synchronous work to give Hermeskill a chance to
+    terminate the agent. Raises HermeskillTerminated if any registered watcher
+    has its apoptosis flag set; no-op otherwise. Safe to call from code with
+    no registered watcher (returns immediately).
+    """
+    from hermeskill.exceptions import HermeskillTerminated
+    from hermeskill.watcher import all_watchers
+    for state in all_watchers():
+        if state.terminate_requested:
+            raise HermeskillTerminated(
+                state.terminate_reason or "terminated",
+                kill_event_id=state.terminate_kill_event_id,
+            )

hermeskill/_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.1.0a1"

hermeskill/apoptosis.py ADDED Viewed

@@ -0,0 +1,342 @@
+"""L2 forced-termination watchdog.
+L1 (cooperative termination) is handled by the framework adapter — the
+kill stub or checkpoint raise at tool/chain boundaries. It works as long
+as the agent's event loop is alive and reaching await points. When it
+isn't — agent is wedged inside a sync tool, or stubbornly ignoring the
+cooperative signal — we need an out-of-band path that can cancel from
+outside the loop.
+That's L2: **one daemon `threading.Thread` per watched agent**, holding a
+reference to the agent's asyncio loop and main `Task`. The thread sleeps
+on `state._terminate_event`. When apoptosis fires, it waits the policy's
+`cooperative_grace_seconds`, checks whether the task finished on its own
+(L1 worked → no escalation), and if not, calls
+`loop.call_soon_threadsafe(task.cancel)` — scheduling cancellation from
+*outside* the loop, which is the part that defeats the wedged-loop case.
+**Why a thread, not an asyncio task.** If we scheduled the L2 timer with
+`asyncio.create_task(...)` in the agent's own loop, it would queue
+behind whatever's blocking that loop — i.e. behind the very thing it's
+trying to interrupt. Same-loop scheduling defeats the entire purpose.
+Run as a thread, run outside the loop. *Do not* refactor this back into
+the loop in a future cleanup pass — leave this comment as ballast.
+**Honest limitation.** `task.cancel()` raises CancelledError at the next
+*await point*. If an agent is wedged in pure-Python CPU code (`while
+True: pass` inside a sync tool with no awaits anywhere reachable), the
+cancellation will not fire — Python provides no portable way to
+interrupt a thread mid-bytecode. The watchdog logs the escalation
+attempt; in that case the only real recourse is killing the OS process
+(operator escalation, M3 webhook fires, M5 grants document the case).
+The watchdog still handles the realistic case (async tool wedged on a
+slow network call ignoring cooperative shutdown) — which is what the
+plan's "blocked-loop test" intends to exercise.
+Public surface: `Watchdog(state, grace_seconds)`, `.arm(loop, task)`,
+`.stop()`. Idempotent arming — call from `on_chain_start` every time;
+the first call starts the thread, later calls just refresh the captured
+loop + task in case a new invocation runs in a different task.
+"""
+from __future__ import annotations
+import asyncio
+import logging
+import threading
+import time
+from datetime import UTC, datetime
+from typing import TYPE_CHECKING
+from hermeskill.types import (
+    DeathCertificate,
+    KillEventIn,
+    ShutdownLogEntry,
+    TriggerType,
+)
+if TYPE_CHECKING:
+    from hermeskill.watcher import WatcherState
+logger = logging.getLogger("hermeskill.apoptosis")
+class Watchdog:
+    """L2 forced-termination thread. One per `WatcherState`."""
+    # Polling cadence for the thread's main wait + grace-period loops.
+    # Trades responsiveness against wakeup cost; 100ms is plenty fast for
+    # human-perceptible kill latency without burning CPU on idle agents.
+    _POLL_SECONDS = 0.1
+    def __init__(self, state: WatcherState, *, grace_seconds: float) -> None:
+        self.state = state
+        self.grace_seconds = grace_seconds
+        self._loop: asyncio.AbstractEventLoop | None = None
+        self._task: asyncio.Task[object] | None = None
+        self._thread: threading.Thread | None = None
+        self._stop = threading.Event()
+        # Guards the loop/task slots. Cheap — only touched on arm() + on
+        # transitions inside _run().
+        self._lock = threading.Lock()
+        # True iff we've already issued a call_soon_threadsafe(task.cancel)
+        # for this kill; prevents double-cancel on long-grace policies.
+        self._escalated = False
+    # --- public API -------------------------------------------------------
+    def arm(
+        self,
+        loop: asyncio.AbstractEventLoop,
+        task: asyncio.Task[object],
+    ) -> None:
+        """Capture the loop + task to watch. Idempotent.
+        On first call: starts the daemon thread.
+        On later calls: refreshes the loop/task slots (a new ainvoke may
+        run in a different task than the previous one).
+        Safe to call from any thread, including the agent's own loop.
+        """
+        with self._lock:
+            self._loop = loop
+            self._task = task
+            if self._thread is None:
+                self._thread = threading.Thread(
+                    target=self._run,
+                    daemon=True,
+                    name=f"hermeskill-watchdog-{self.state.agent_id}",
+                )
+                self._thread.start()
+    def stop(self, *, join_timeout: float = 2.0) -> None:
+        """Signal the thread to exit. Does NOT force-cancel the task.
+        Called on agent unregister / process shutdown. The thread joins
+        within `join_timeout`; if it doesn't, we log and move on (daemon
+        threads die with the process anyway).
+        """
+        self._stop.set()
+        # Poke the terminate_event so a thread blocked on it wakes up to
+        # observe the stop flag. (We can't `notify` a threading.Event the
+        # same way as a Condition — set() is the only signal mechanism.)
+        self.state._terminate_event.set()
+        with self._lock:
+            thread = self._thread
+        if thread is not None and thread.is_alive():
+            thread.join(timeout=join_timeout)
+            if thread.is_alive():
+                logger.warning(
+                    "hermeskill L2 watchdog: thread %s did not join within %.1fs",
+                    thread.name,
+                    join_timeout,
+                )
+    # --- thread body ------------------------------------------------------
+    def _run(self) -> None:
+        """The daemon thread: wait for kill, give grace, escalate.
+        Loop structure:
+          1. Wait on `_terminate_event` (with timeout so we can poll
+             `_stop` and the flag).
+          2. When triggered, wait `grace_seconds` for cooperative
+             termination — checking `task.done()` periodically to bail
+             out early when L1 wins.
+          3. If task still alive after grace: escalate via
+             `loop.call_soon_threadsafe(task.cancel)`.
+          4. Exit. One watchdog = one kill — no re-arm on the same state.
+        """
+        logger.debug(
+            "hermeskill L2 watchdog armed for agent %s (grace=%.1fs)",
+            self.state.agent_id,
+            self.grace_seconds,
+        )
+        try:
+            # --- step 1: wait for kill signal -----------------------
+            while not self._stop.is_set():
+                triggered = self.state._terminate_event.wait(timeout=self._POLL_SECONDS)
+                if self._stop.is_set():
+                    return
+                # Defensive: also check the flag in case a caller wrote
+                # it directly without going through request_termination.
+                if triggered or self.state.terminate_requested:
+                    break
+            else:
+                return  # stopped before any kill
+            # --- step 2: cooperative-grace window -------------------
+            deadline = time.monotonic() + self.grace_seconds
+            while time.monotonic() < deadline:
+                if self._stop.is_set():
+                    return
+                with self._lock:
+                    task = self._task
+                if task is not None and task.done():
+                    logger.debug(
+                        "hermeskill L2 watchdog: agent %s cooperated, no escalation",
+                        self.state.agent_id,
+                    )
+                    return
+                time.sleep(self._POLL_SECONDS)
+            # --- step 3: escalate -----------------------------------
+            self._escalate()
+        except Exception:
+            logger.exception("hermeskill L2 watchdog crashed for agent %s", self.state.agent_id)
+    def _escalate(self) -> None:
+        with self._lock:
+            loop = self._loop
+            task = self._task
+            already = self._escalated
+            self._escalated = True
+        if already:
+            return
+        if loop is None or task is None:
+            logger.warning(
+                "hermeskill L2 watchdog: no loop/task captured for agent %s; "
+                "cannot escalate (this is the case the docstring's 'honest "
+                "limitation' note describes — operator must kill the process)",
+                self.state.agent_id,
+            )
+            return
+        if task.done():
+            return  # narrowly raced with cooperative completion
+        logger.warning(
+            "hermeskill L2 watchdog: cooperative grace (%.1fs) expired for "
+            "agent %s; forcing task cancellation",
+            self.grace_seconds,
+            self.state.agent_id,
+        )
+        try:
+            loop.call_soon_threadsafe(task.cancel)
+        except RuntimeError:
+            # Loop already closed — nothing left to cancel against.
+            logger.debug(
+                "hermeskill L2 watchdog: loop already closed for agent %s",
+                self.state.agent_id,
+            )
+        # Record a lifecycle event AND a shutdown-log step so the death
+        # cert shows the watchdog fired and audit can correlate timings.
+        try:
+            self.state.record_lifecycle(
+                "watchdog_escalated",
+                grace_seconds=self.grace_seconds,
+            )
+            self.state.record_shutdown_step(
+                "watchdog_escalated",
+                grace_seconds=self.grace_seconds,
+            )
+        except Exception:
+            logger.exception("watchdog: failed to record escalation lifecycle")
+# --- death certificate builder + posting ----------------------------------
+def build_death_certificate(
+    state: WatcherState,
+    *,
+    terminated_at: datetime | None = None,
+) -> DeathCertificate:
+    """Snapshot `state` into a forensic death certificate.
+    The cert is built at the very end of the death sequence, after L1
+    cooperative termination has raised `HermeskillTerminated` and the
+    wrapper has caught it. By then:
+      * `state.terminate_requested` is True
+      * `state.terminate_reason` is set (first-cause-wins)
+      * `state.terminate_requested_at` is the time the decision was made
+      * `state.symptoms_log` holds every symptom (terminal + warning) the
+        agent saw during its lifetime
+      * `state.shutdown_log` holds the structured shutdown steps so far
+    `terminated_at` defaults to now() — the moment of cert build, which
+    is effectively the moment of death from the SDK's POV.
+    The cert intentionally does NOT include `customer_id` / `policy_id` /
+    `feedback_url` — those are server-authoritative (the SDK shouldn't
+    be in the business of claiming customer ownership; the server fills
+    them from the API key and from M3's signed-token machinery).
+    """
+    now = terminated_at or datetime.now(UTC)
+    triggered_at = state.terminate_requested_at or now
+    reason = state.terminate_reason or "unknown"
+    # M4: branch on `state.manual_kill` rather than `terminate_reason`.
+    # The poller writes the dict atomically with the flag flip, so its
+    # presence is the authoritative signal that this kill was operator-
+    # issued.
+    manual = state.manual_kill
+    if manual is not None:
+        trigger_type = TriggerType.MANUAL
+        operator = manual.get("operator")
+        operator_reason = manual.get("operator_reason")
+    else:
+        trigger_type = TriggerType.AUTO
+        operator = None
+        operator_reason = None
+    return DeathCertificate(
+        agent_id=state.agent_id,
+        triggered_at=triggered_at,
+        terminated_at=now,
+        trigger_type=trigger_type,
+        trigger_reason=reason,
+        symptoms_log=list(state.symptoms_log),
+        final_state={},  # v2 / cleanup-hook hookpoint
+        shutdown_log=[_normalize_step(s) for s in state.shutdown_log],
+        operator=operator,
+        operator_reason=operator_reason,
+    )
+def build_kill_event_payload(
+    state: WatcherState,
+    *,
+    terminated_at: datetime | None = None,
+) -> KillEventIn:
+    """Wrap the death cert into the `POST /agents/{id}/kill_events` body."""
+    cert = build_death_certificate(state, terminated_at=terminated_at)
+    return KillEventIn(
+        trigger_type=cert.trigger_type,
+        trigger_reason=cert.trigger_reason,
+        triggered_at=cert.triggered_at,
+        terminated_at=cert.terminated_at,
+        death_certificate=cert,
+        shutdown_log=cert.shutdown_log,
+    )
+def _normalize_step(raw: dict[str, object]) -> ShutdownLogEntry:
+    """Coerce a `record_shutdown_step()`-format dict into the typed model.
+    Steps are appended to `state.shutdown_log` as plain dicts (cheap
+    write path); we type-validate them only when the cert is built.
+    """
+    at_value = raw.get("at")
+    if isinstance(at_value, str):
+        at = datetime.fromisoformat(at_value)
+    elif isinstance(at_value, datetime):
+        at = at_value
+    else:
+        at = datetime.now(UTC)
+    duration_raw = raw.get("duration_ms")
+    duration_ms: float | None = (
+        None if duration_raw is None else float(duration_raw)  # type: ignore[arg-type]
+    )
+    detail = raw.get("detail") or {}
+    if not isinstance(detail, dict):
+        detail = {}
+    step_raw = raw.get("step")
+    step = str(step_raw) if step_raw is not None else "unknown"
+    return ShutdownLogEntry(
+        step=step,
+        at=at,
+        duration_ms=duration_ms,
+        detail=detail,
+    )

hermeskill/calibration.py ADDED Viewed

@@ -0,0 +1,235 @@
+"""Feedback-driven threshold calibration (Phase 4).
+The control plane already collects an operator's verdict on every kill via the
+one-click feedback link baked into each death certificate
+(`kill_events.feedback_label`). Until now those labels just sat in the database.
+This module turns them into a **transparent, advisory** calibration report: per
+symptom, how often did kills under a given policy get labeled false-positive,
+and — if that rate is high enough on a large enough sample — what looser
+threshold should a human *consider* setting.
+Design constraints (these are deliberate, and they are the point):
+  * **Suggest-only.** We never mutate a policy. Policies are SDK-defined
+    constants (`hermeskill.policies`); the "suggestion" is literally "edit that
+    constant." Auto-tuning limits from agent-influenced feedback would be both
+    an overclaim and a genuine safety hole.
+  * **No learning / no ML.** It's a rate, a sample-size gate, and one fixed
+    conservative step. A reviewer can read the whole rule in a minute and
+    trust it precisely because there's no black box.
+  * **Evidence over precision.** The suggested number is a conservative nudge
+    (`* 1.5`, rounded to something readable). What should actually drive the
+    decision is the evidence we lead with: the false-positive rate and n.
+  * **False positives only.** See `hermeskill.types.calibration` — the data can't
+    speak to kills that never fired, so we never recommend tightening.
+"""
+from __future__ import annotations
+import math
+from collections import Counter
+from collections.abc import Iterable
+from dataclasses import dataclass
+from hermeskill.types import (
+    CalibrationReport,
+    FeedbackLabel,
+    Policy,
+    SymptomCalibration,
+    SymptomType,
+)
+# --- tunables (transparent, documented) ----------------------------------
+#: Below this many labeled kills for a symptom, we report stats but make no
+#: suggestion — a 1-of-1 false positive is noise, not a signal.
+MIN_SAMPLES_PER_SYMPTOM = 5
+#: Only suggest loosening when at least this fraction of a symptom's labeled
+#: kills were false positives. 30% wrong is a real calibration problem.
+FALSE_POSITIVE_ACTION_THRESHOLD = 0.30
+#: The fixed conservative step. We loosen by half, then round to a readable
+#: value. Intentionally *not* derived from the false-positive rate — a
+#: rate-scaled number ("3 → 4.2") reads as false precision; a flat nudge plus
+#: the evidence reads as honest.
+LOOSEN_FACTOR = 1.5
+@dataclass(frozen=True)
+class _Knob:
+    """A symptom's single numeric threshold, and how to round a suggestion."""
+    field: str
+    kind: str  # "int" | "seconds" | "usd"
+#: Symptoms that map to exactly one numeric threshold worth suggesting. The
+#: others (tool_scope_violation → allowlist, heartbeat_stale → liveness,
+#: manual_kill → operator) have no single knob and are reported stats-only.
+_SYMPTOM_KNOB: dict[SymptomType, _Knob] = {
+    SymptomType.LOOP: _Knob("max_loop_repeats", "int"),
+    SymptomType.TOKEN_RUNAWAY: _Knob("max_cost_usd", "usd"),
+    SymptomType.WALL_CLOCK: _Knob("max_runtime_seconds", "seconds"),
+}
+@dataclass(frozen=True)
+class LabeledKill:
+    """One past kill plus the operator's verdict on it.
+    The minimal input the calibrator needs. The control plane builds these
+    from `kill_events` rows (symptom extracted from the death cert's terminal
+    `symptoms_log` entry, label from `feedback_label`); tests build them
+    directly. Unlabeled kills are simply not passed in.
+    """
+    symptom: SymptomType
+    label: FeedbackLabel
+def _loosen(current: float, kind: str) -> float:
+    """Apply the fixed conservative step and round to a readable value."""
+    raw = current * LOOSEN_FACTOR
+    if kind == "int":
+        return float(math.ceil(raw))
+    if kind == "seconds":
+        # Nearest minute reads better than 450.0s.
+        return float(round(raw / 60) * 60)
+    # usd
+    return round(raw, 2)
+def _confidence_for(n: int) -> str:
+    """Sample-size → confidence tier. Below MIN_SAMPLES it's not called."""
+    if n >= 30:
+        return "high"
+    if n >= 10:
+        return "medium"
+    return "low"
+def _pct(rate: float) -> str:
+    return f"{rate * 100:.0f}%"
+def _calibrate_symptom(
+    symptom: SymptomType, labels: list[FeedbackLabel], policy: Policy
+) -> SymptomCalibration:
+    counts = Counter(labels)
+    total = len(labels)
+    good = counts[FeedbackLabel.GOOD_KILL]
+    false_pos = counts[FeedbackLabel.FALSE_POSITIVE]
+    missed = counts[FeedbackLabel.MISSED_KILL]
+    other = counts[FeedbackLabel.OTHER]
+    fp_rate = false_pos / total if total else 0.0
+    knob = _SYMPTOM_KNOB.get(symptom)
+    base = SymptomCalibration(
+        symptom=symptom,
+        total_labeled=total,
+        good_kills=good,
+        false_positives=false_pos,
+        missed_kills=missed,
+        other=other,
+        false_positive_rate=fp_rate,
+        confidence="insufficient_data",
+        rationale="",
+    )
+    # 1. Not enough data to say anything.
+    if total < MIN_SAMPLES_PER_SYMPTOM:
+        return base.model_copy(
+            update={
+                "rationale": (
+                    f"n={total} labeled kill(s); need "
+                    f"{MIN_SAMPLES_PER_SYMPTOM}+ before suggesting a change."
+                )
+            }
+        )
+    confidence = _confidence_for(total)
+    # 2. No single numeric knob for this symptom — stats only.
+    if knob is None:
+        return base.model_copy(
+            update={
+                "confidence": confidence,
+                "rationale": (
+                    f"{_pct(fp_rate)} false-positive (n={total}). No single "
+                    f"numeric threshold maps to {symptom.value}; review the "
+                    f"tool allowlist / liveness settings by hand."
+                ),
+            }
+        )
+    current = float(getattr(policy.thresholds, knob.field))
+    # 3. Well-calibrated — false-positive rate within tolerance.
+    if fp_rate < FALSE_POSITIVE_ACTION_THRESHOLD:
+        return base.model_copy(
+            update={
+                "threshold_field": knob.field,
+                "current_value": current,
+                "confidence": confidence,
+                "rationale": (
+                    f"{_pct(fp_rate)} false-positive (n={total}) — within "
+                    f"tolerance ({_pct(FALSE_POSITIVE_ACTION_THRESHOLD)}); "
+                    f"no change suggested."
+                ),
+            }
+        )
+    # 4. Too many false positives — suggest loosening.
+    suggested = _loosen(current, knob.kind)
+    return base.model_copy(
+        update={
+            "threshold_field": knob.field,
+            "current_value": current,
+            "suggested_value": suggested,
+            "confidence": confidence,
+            "rationale": (
+                f"{_pct(fp_rate)} of {symptom.value} kills under "
+                f"'{policy.name}' were labeled false-positive (n={total}). "
+                f"Consider raising {knob.field} "
+                f"{_fmt(current, knob.kind)}→{_fmt(suggested, knob.kind)}."
+            ),
+        }
+    )
+def _fmt(value: float, kind: str) -> str:
+    """Render a threshold value the way a human writes it in the policy."""
+    if kind == "usd":
+        return f"${value:g}"
+    if kind == "seconds":
+        return f"{value:g}s"
+    return f"{value:g}"
+def build_calibration_report(
+    policy: Policy, labeled_kills: Iterable[LabeledKill]
+) -> CalibrationReport:
+    """Aggregate labeled kills into an advisory calibration report.
+    Pure and deterministic: same inputs → same report, no I/O. Symptoms are
+    reported in `SymptomType` declaration order, but only those with at least
+    one labeled kill appear. A symptom with a high false-positive rate on a
+    sufficient sample gets a loosening suggestion; everything else is
+    stats-only (see the four branches in `_calibrate_symptom`).
+    """
+    by_symptom: dict[SymptomType, list[FeedbackLabel]] = {}
+    for kill in labeled_kills:
+        by_symptom.setdefault(kill.symptom, []).append(kill.label)
+    symptoms = [
+        _calibrate_symptom(symptom, by_symptom[symptom], policy)
+        for symptom in SymptomType
+        if symptom in by_symptom
+    ]
+    total = sum(s.total_labeled for s in symptoms)
+    return CalibrationReport(
+        policy_name=policy.name,
+        total_labeled_kills=total,
+        symptoms=symptoms,
+    )