npm - loki-mode - Versions diffs - 7.19.0 → 7.19.2 - Mend

loki-mode 7.19.0 → 7.19.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/SKILL.md +2 -2
package/VERSION +1 -1
package/autonomy/completion-council.sh +483 -0
package/autonomy/config.example.yaml +26 -0
package/autonomy/run.sh +103 -3
package/dashboard/__init__.py +1 -1
package/dashboard/server.py +40 -11
package/dashboard/static/index.html +543 -497
package/docs/INSTALLATION.md +1 -1
package/docs/UNCERTAINTY-ESCALATION-PLAN.md +396 -0
package/docs/VERIFIED-COMPLETION-PLAN.md +462 -0
package/loki-ts/dist/loki.js +2 -2
package/mcp/__init__.py +1 -1
package/package.json +1 -1
package/skills/quality-gates.md +115 -0

package/autonomy/run.sh CHANGED Viewed

@@ -124,6 +124,26 @@
 #   LOKI_NOTIFICATIONS   - Enable desktop notifications (default: true)
 #   LOKI_NOTIFICATION_SOUND - Play sound with notifications (default: true)
 #
+# Uncertainty-Gated Escalation (v7.19.2, default-on):
+#   LOKI_UNCERTAINTY_ESCALATION  - Master on/off for proactive stuck-escalation (default: 1; set 0 to
+#                                  disable; byte-identical when off). Decision lives in
+#                                  completion-council.sh (uncertainty_should_escalate); action in run.sh.
+#                                  NOTE: AUTONOMY_MODE defaults to "perpetual"; in perpetual mode PAUSE
+#                                  is auto-cleared by check_human_intervention, so escalation degrades
+#                                  to notify-only (notification fires, run does NOT halt).
+#   LOKI_UNCERTAINTY_ROUNDS      - Consecutive rounds where >=2 of 3 proxies must co-occur before
+#                                  escalating (default: 2; recommended range 2-3). Debounces transient
+#                                  noise: a single hot proxy never escalates alone.
+#   LOKI_UNCERTAINTY_NOCHANGE_MIN - Proxy 1 threshold: consecutive_no_change value that marks p1 hot.
+#                                  (default: COUNCIL_STAGNATION_LIMIT - 1, i.e. one below the circuit-
+#                                  breaker limit so escalation fires before the breaker ends the run).
+#                                  Floored at 1 at runtime.
+#   LOKI_UNCERTAINTY_SPLIT_ROUNDS - Proxy 3 threshold: number of consecutive trailing council verdicts
+#                                  that must be REJECTED-with-approver (split) to mark p3 hot
+#                                  (default: 2). Between council votes p3 may be stale; it is always
+#                                  fresh when proxy 1 is hot because proxy 1 hot forces a circuit-
+#                                  breaker vote that refreshes verdicts.
+#
 # Human Intervention (Auto-Claude pattern):
 #   PAUSE file:          touch .loki/PAUSE - pauses after current session
 #   HUMAN_INPUT.md:      echo "instructions" > .loki/HUMAN_INPUT.md
@@ -286,6 +306,10 @@ parse_simple_yaml() {
     set_from_yaml "$file" "completion.council.check_interval" "LOKI_COUNCIL_CHECK_INTERVAL"
     set_from_yaml "$file" "completion.council.min_iterations" "LOKI_COUNCIL_MIN_ITERATIONS"
     set_from_yaml "$file" "completion.council.stagnation_limit" "LOKI_COUNCIL_STAGNATION_LIMIT"
+    set_from_yaml "$file" "completion.uncertainty.escalation" "LOKI_UNCERTAINTY_ESCALATION"
+    set_from_yaml "$file" "completion.uncertainty.rounds" "LOKI_UNCERTAINTY_ROUNDS"
+    set_from_yaml "$file" "completion.uncertainty.nochange_min" "LOKI_UNCERTAINTY_NOCHANGE_MIN"
+    set_from_yaml "$file" "completion.uncertainty.split_rounds" "LOKI_UNCERTAINTY_SPLIT_ROUNDS"
     # Model
     set_from_yaml "$file" "model.prompt_repetition" "LOKI_PROMPT_REPETITION"
@@ -428,6 +452,10 @@ parse_yaml_with_yq() {
         "completion.council.check_interval:LOKI_COUNCIL_CHECK_INTERVAL"
         "completion.council.min_iterations:LOKI_COUNCIL_MIN_ITERATIONS"
         "completion.council.stagnation_limit:LOKI_COUNCIL_STAGNATION_LIMIT"
+        "completion.uncertainty.escalation:LOKI_UNCERTAINTY_ESCALATION"
+        "completion.uncertainty.rounds:LOKI_UNCERTAINTY_ROUNDS"
+        "completion.uncertainty.nochange_min:LOKI_UNCERTAINTY_NOCHANGE_MIN"
+        "completion.uncertainty.split_rounds:LOKI_UNCERTAINTY_SPLIT_ROUNDS"
         "model.prompt_repetition:LOKI_PROMPT_REPETITION"
         "model.confidence_routing:LOKI_CONFIDENCE_ROUTING"
         "model.autonomy_mode:LOKI_AUTONOMY_MODE"
@@ -9833,10 +9861,24 @@ except (json.JSONDecodeError, KeyError, TypeError, OSError):
             # BUG-RUN-003: Restore ITERATION_COUNT from persisted state
             ITERATION_COUNT=$(python3 -c "import json; print(json.load(open('.loki/autonomy-state.json')).get('iterationCount', 0))" 2>/dev/null || echo "0")
-            # Reset retry count if previous session ended in a terminal state
-            # This allows new sessions to start fresh after failures
+            # Reset retry count + iteration count if previous session ended in a
+            # terminal state. A fresh `loki start` after a terminal run is a NEW
+            # run and must start from a fresh baseline. This matters for the
+            # verified-completion evidence gate (v7.19.1): the run-start SHA
+            # recapture in run_autonomous is gated on ITERATION_COUNT==0, so a
+            # stale count here would leave the gate diffing against the PRIOR
+            # run's start SHA (toothless). Terminal states covered:
+            #   - failure terminals: failed|max_iterations_reached|
+            #     max_retries_exceeded|exited
+            #   - success terminals: council_approved|council_force_approved|
+            #     completion_promise_fulfilled (the run finished; a re-run is new)
+            #   - running: previous process died mid-run (crash); nothing resumes
+            #     from "running" (paused/interrupted are the explicit resume
+            #     signals), so this closes the crash-rerun toothless-gate path.
+            # Deliberately NOT reset (genuine resume / user re-run expecting to
+            # continue): paused, interrupted, budget_exceeded, stopped.
             case "$prev_status" in
-                failed|max_iterations_reached|max_retries_exceeded|exited)
+                failed|max_iterations_reached|max_retries_exceeded|exited|council_approved|council_force_approved|completion_promise_fulfilled|running)
                     log_info "Previous session ended with status: $prev_status. Resetting for new session."
                     RETRY_COUNT=0
                     ITERATION_COUNT=0
@@ -11412,6 +11454,21 @@ run_autonomous() {
     load_state
     local retry=$RETRY_COUNT
+    # Capture run-start SHA for the evidence hard gate (v7.19.1).
+    # Fresh-run-aware: recapture HEAD when ITERATION_COUNT==0 (fresh invocation,
+    # reset, or corrupted/missing baseline); preserve only on a genuine resume
+    # (ITERATION_COUNT>0) so the diff window is not moved mid-run. A naive
+    # set-if-absent would leave a stale first-run baseline on every later run,
+    # making the gate toothless. Non-git or zero-commit repos write an empty
+    # file, which the gate treats as inconclusive (pass-through).
+    local _start_sha_file=".loki/state/start-sha"
+    mkdir -p ".loki/state"
+    if [ "${ITERATION_COUNT:-0}" -eq 0 ] || [ ! -s "$_start_sha_file" ]; then
+        (cd "${TARGET_DIR:-.}" && git rev-parse HEAD 2>/dev/null) > "$_start_sha_file" 2>/dev/null || true
+    fi
+    _LOKI_RUN_START_SHA="$(cat "$_start_sha_file" 2>/dev/null || echo "")"
+    export _LOKI_RUN_START_SHA
     # Notify dashboard of active project directory (for AI Chat cross-directory usage)
     if command -v curl &>/dev/null; then
         local project_cwd
@@ -12361,6 +12418,33 @@ if __name__ == "__main__":
             council_track_iteration "$log_file"
         fi
+        # Uncertainty-gated escalation (v7.19.2, Slice B action).
+        # The decision lives in completion-council.sh:uncertainty_should_escalate
+        # (pure, debounced once-per-stuck-episode, knob-first on
+        # LOKI_UNCERTAINTY_ESCALATION). This block only ACTS when the function
+        # returns rc 0. The type guard keeps it a silent no-op if the decision
+        # function is not present (byte-identical when the feature is absent/off).
+        if type uncertainty_should_escalate &>/dev/null && uncertainty_should_escalate; then
+            log_error "[Uncertainty] Escalating to human: >=2 of 3 stuck-signals co-occurred for N rounds (no-change / oscillation / council-split). PAUSE written; handoff saved."
+            log_warn  "[Uncertainty] To opt out of proactive escalation: set LOKI_UNCERTAINTY_ESCALATION=0"
+            # Structured handoff doc before the bare PAUSE (mirrors GATE precedent).
+            write_structured_handoff "uncertainty_escalation"
+            notify_intervention_needed "Uncertainty escalation: >=2 of 3 stuck-signals co-occurred for N rounds"
+            # Marker file for dashboard / external consumers. Empty touch has no
+            # partial-write window, so atomic temp+mv is not required here.
+            mkdir -p "${TARGET_DIR:-.}/.loki/signals"
+            touch "${TARGET_DIR:-.}/.loki/signals/UNCERTAINTY_ESCALATION"
+            # PAUSE is consumed by check_human_intervention: it halts in
+            # non-perpetual mode; in perpetual mode it auto-clears + notifies.
+            # That degrade is free; we add no consumer logic here.
+            touch "${TARGET_DIR:-.}/.loki/PAUSE"
+            # Perpetual-mode honesty: detect with the SAME vars the existing PAUSE
+            # consumer uses (run.sh check_human_intervention), print-only.
+            if [ "$AUTONOMY_MODE" = "perpetual" ] || [ "$PERPETUAL_MODE" = "true" ]; then
+                log_warn "[Uncertainty] Perpetual mode: PAUSE will be auto-cleared; this is notify-only and will NOT halt the run."
+            fi
+        fi
         # Check for success - ONLY stop on explicit completion promise
         # There's never a "complete" product - always improvements, bugs, features
         if [ $exit_code -eq 0 ]; then
@@ -12413,6 +12497,20 @@ if __name__ == "__main__":
                 log_warn "  Review details under .loki/quality/reviews/ ; gate_failures=${gate_failures}"
                 _gate_block_for_completion=""
                 # Fall through; the gate-failed loop continues normally
+            # v7.19.1: the verified-completion evidence gate must also guard the
+            # DEFAULT completion route (a completion claim via loki_complete_task
+            # / the completion-promise text), not only the interval-gated council
+            # path. Otherwise an agent can self-assert "done" with an empty diff
+            # and red tests and exit as completion_promise_fulfilled, bypassing
+            # the gate entirely -- exactly the fabrication this feature prevents.
+            # Mirrors the code_review block above (B-17). Opt-out: the gate's own
+            # LOKI_EVIDENCE_GATE=0 (council_evidence_gate returns 0 immediately
+            # when disabled, so this branch never fires). Gate output (reason +
+            # opt-out hint) is printed by council_evidence_gate itself.
+            elif check_completion_promise "$iter_output" && type council_evidence_gate &>/dev/null && ! council_evidence_gate; then
+                log_warn "Completion claim rejected: evidence gate found no proof of completion (empty diff vs run-start SHA, or red tests)."
+                log_warn "  Details under .loki/council/evidence-block.json ; opt out with LOKI_EVIDENCE_GATE=0"
+                # Fall through; keep iterating until there is real evidence.
             elif check_completion_promise "$iter_output"; then
                 echo ""
                 if [ -n "$COMPLETION_PROMISE" ]; then
@@ -12765,6 +12863,8 @@ check_human_intervention() {
         rm -f "$loki_dir/signals/COUNCIL_REVIEW_REQUESTED"
         if type council_checklist_gate &>/dev/null && ! council_checklist_gate; then
             log_info "Council force-review: blocked by checklist hard gate"
+        elif type council_evidence_gate &>/dev/null && ! council_evidence_gate; then
+            log_info "Council force-review: blocked by evidence hard gate"
         elif type council_vote &>/dev/null && council_vote; then
             log_header "COMPLETION COUNCIL: FORCE REVIEW - PROJECT COMPLETE"
             # BUG #17 fix: Write COMPLETED marker, generate council report, and

package/dashboard/__init__.py CHANGED Viewed

@@ -7,7 +7,7 @@ Modules:
     control: Session control API (start/stop/pause/resume)
 """
-__version__ = "7.19.0"
+__version__ = "7.19.2"
 # Expose the control app for easy import
 try:

package/dashboard/server.py CHANGED Viewed

@@ -6569,17 +6569,46 @@ _DEFAULT_QUALITY_GATES = [
 @app.get("/api/council/gate")
 async def get_council_gate():
-    """Get council hard gate status."""
-    gate_file = _get_loki_dir() / "council" / "gate-block.json"
-    if not gate_file.exists():
-        return {"blocked": False, "gates": _DEFAULT_QUALITY_GATES}
-    try:
-        data = json.loads(gate_file.read_text())
-        if "gates" not in data:
-            data["gates"] = _DEFAULT_QUALITY_GATES
-        return data
-    except (json.JSONDecodeError, IOError):
-        return {"blocked": False, "gates": _DEFAULT_QUALITY_GATES, "error": "Failed to read gate file"}
+    """Get council hard gate status.
+    Surfaces TWO independent hard gates, both written to .loki/council/:
+      - gate-block.json:     the legacy quality hard gate
+      - evidence-block.json: the verified-completion evidence gate (v7.19.1),
+                             which blocks STOP unless there is real evidence
+                             (nonzero diff vs run-start SHA AND green tests).
+    Either being present means completion is blocked. The response keeps the
+    legacy top-level shape (blocked/gates) for backward compatibility and adds
+    an `evidence` key so the UI can show WHY a verified-completion block fired.
+    """
+    council_dir = _get_loki_dir() / "council"
+    gate_file = council_dir / "gate-block.json"
+    evidence_file = council_dir / "evidence-block.json"
+    # Legacy quality gate (backward-compatible top level).
+    if gate_file.exists():
+        try:
+            data = json.loads(gate_file.read_text())
+            if "gates" not in data:
+                data["gates"] = _DEFAULT_QUALITY_GATES
+        except (json.JSONDecodeError, IOError):
+            data = {"blocked": False, "gates": _DEFAULT_QUALITY_GATES, "error": "Failed to read gate file"}
+    else:
+        data = {"blocked": False, "gates": _DEFAULT_QUALITY_GATES}
+    # Verified-completion evidence gate (additive).
+    if evidence_file.exists():
+        try:
+            evidence = json.loads(evidence_file.read_text())
+        except (json.JSONDecodeError, IOError):
+            evidence = {"blocked": True, "error": "Failed to read evidence-block file"}
+        data["evidence"] = evidence
+        # If either gate blocks, the overall status is blocked.
+        if evidence.get("blocked"):
+            data["blocked"] = True
+    else:
+        data["evidence"] = {"blocked": False}
+    return data
 # =============================================================================