@codyswann/lisa 2.130.6 → 2.132.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/codex/scripts/block-no-verify.sh +12 -3
- package/package.json +1 -1
- package/plugins/lisa/.claude-plugin/plugin.json +37 -1
- package/plugins/lisa/.codex-plugin/hooks.json +40 -0
- package/plugins/lisa/.codex-plugin/plugin.json +1 -1
- package/plugins/lisa/hooks/block-no-verify.agy.sh +14 -8
- package/plugins/lisa/hooks/block-no-verify.sh +26 -12
- package/plugins/lisa/hooks/enforce-verification-gate.sh +222 -0
- package/plugins/lisa/skills/implement/SKILL.md +28 -5
- package/plugins/lisa-agy/hooks/block-no-verify.agy.sh +14 -8
- package/plugins/lisa-agy/plugin.json +1 -1
- package/plugins/lisa-agy/skills/implement/SKILL.md +28 -5
- package/plugins/lisa-cdk/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-cdk/.codex-plugin/plugin.json +1 -1
- package/plugins/lisa-cdk-agy/plugin.json +1 -1
- package/plugins/lisa-cdk-copilot/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-cdk-cursor/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-copilot/.claude-plugin/plugin.json +32 -1
- package/plugins/lisa-copilot/hooks/block-no-verify.sh +26 -12
- package/plugins/lisa-copilot/hooks/enforce-verification-gate.sh +222 -0
- package/plugins/lisa-copilot/skills/implement/SKILL.md +28 -5
- package/plugins/lisa-cursor/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-cursor/hooks/block-no-verify.sh +26 -12
- package/plugins/lisa-cursor/hooks/enforce-verification-gate.sh +222 -0
- package/plugins/lisa-cursor/hooks/hooks.json +18 -0
- package/plugins/lisa-cursor/skills/implement/SKILL.md +28 -5
- package/plugins/lisa-expo/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-expo/.codex-plugin/plugin.json +1 -1
- package/plugins/lisa-expo-agy/plugin.json +1 -1
- package/plugins/lisa-expo-copilot/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-expo-cursor/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-harper-fabric/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-harper-fabric/.codex-plugin/plugin.json +1 -1
- package/plugins/lisa-harper-fabric-agy/plugin.json +1 -1
- package/plugins/lisa-harper-fabric-copilot/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-harper-fabric-cursor/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-nestjs/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-nestjs/.codex-plugin/plugin.json +1 -1
- package/plugins/lisa-nestjs-agy/plugin.json +1 -1
- package/plugins/lisa-nestjs-copilot/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-nestjs-cursor/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-openclaw/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-openclaw/.codex-plugin/plugin.json +1 -1
- package/plugins/lisa-openclaw-agy/plugin.json +1 -1
- package/plugins/lisa-openclaw-copilot/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-openclaw-cursor/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-rails/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-rails/.codex-plugin/plugin.json +1 -1
- package/plugins/lisa-rails-agy/plugin.json +1 -1
- package/plugins/lisa-rails-copilot/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-rails-cursor/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-typescript/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-typescript/.codex-plugin/plugin.json +1 -1
- package/plugins/lisa-typescript-agy/plugin.json +1 -1
- package/plugins/lisa-typescript-copilot/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-typescript-cursor/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-wiki/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-wiki/.codex-plugin/plugin.json +1 -1
- package/plugins/lisa-wiki-agy/plugin.json +1 -1
- package/plugins/lisa-wiki-copilot/.claude-plugin/plugin.json +1 -1
- package/plugins/lisa-wiki-cursor/.claude-plugin/plugin.json +1 -1
- package/plugins/src/base/.claude-plugin/plugin.json +16 -2
- package/plugins/src/base/hooks/block-no-verify.agy.sh +14 -8
- package/plugins/src/base/hooks/block-no-verify.sh +26 -12
- package/plugins/src/base/hooks/enforce-verification-gate.sh +222 -0
- package/plugins/src/base/skills/implement/SKILL.md +28 -5
|
@@ -96,7 +96,14 @@ IF it is a Fix (bug), execute the Reproduce sub-flow FIRST:
|
|
|
96
96
|
1. Write a simple API client and call the offending API
|
|
97
97
|
2. Start the server on localhost and use the Playwright CLI or Chrome DevTools
|
|
98
98
|
|
|
99
|
-
Using the general-purpose agent in Team Lead session, determine how you will know that the task is fully complete
|
|
99
|
+
Using the general-purpose agent in Team Lead session, determine how you will know that the task is fully complete. Write this as an **effective completion condition** — one an independent verifier could confirm from observed output alone, not from your assertion that it works. A strong condition has:
|
|
100
|
+
|
|
101
|
+
- **One measurable end state** — a status code, an exit code, a row count, an observable UI state, an empty queue. Not "it looks right" or "the code is correct".
|
|
102
|
+
- **A stated proof command that surfaces the evidence** — exactly how the running system is exercised so the result is observable (e.g. `curl … returns 200 with {…}`, "the Playwright run reaches the dashboard", "`SELECT … ` returns the new row"). Quality gates (test/typecheck/lint) do NOT count — they are prerequisites.
|
|
103
|
+
- **Constraints that must hold** — anything that must not change on the way there (e.g. "no other endpoint's response changes", "no migration is dropped").
|
|
104
|
+
|
|
105
|
+
This condition is the contract the Verify flow proves and records in the verification verdict (below); it is what the completion gate checks before the flow may stop.
|
|
106
|
+
|
|
100
107
|
1. Examples
|
|
101
108
|
1. Direct deploy the changes to dev and then Write a simple API client and call the offending API
|
|
102
109
|
2. Start the server on localhost and then Use the Playwright CLI or Chrome DevTools
|
|
@@ -116,8 +123,8 @@ Every task MUST include this JSON metadata block. Do NOT omit `skills` (use `[]`
|
|
|
116
123
|
"learnings": ["..."],
|
|
117
124
|
"verification": {
|
|
118
125
|
"type": "ui-recording|api-test|cli-test|database-check|manual-check|documentation",
|
|
119
|
-
"command": "the proof command — must run the actual system (NOT test/typecheck/lint, those are quality gates)",
|
|
120
|
-
"expected": "
|
|
126
|
+
"command": "the proof command — must run the actual system and surface its result in the transcript (NOT test/typecheck/lint, those are quality gates). Phrase it so an independent verifier sees the evidence, e.g. `curl -s localhost:3000/health` not `check that health works`",
|
|
127
|
+
"expected": "the single measurable end state that proves success — observable system behavior (status code, response body, row count, UI state), not a subjective judgement"
|
|
121
128
|
}
|
|
122
129
|
}
|
|
123
130
|
```
|
|
@@ -131,6 +138,22 @@ Before shutting down the team, execute the Verify flow:
|
|
|
131
138
|
|
|
132
139
|
1. Run quality gates: lint, typecheck, tests — all must pass. These are prerequisites, NOT verification.
|
|
133
140
|
2. `verification-specialist`: verify locally by running the actual system and observing results (empirical proof that the change works). This is the real verification step.
|
|
141
|
+
2a. **Record the verification verdict** — the independent, machine-readable proof that gates completion. The `verification-specialist` writes `${CLAUDE_PROJECT_DIR:-.}/.lisa/verification-status.json` with one entry per acceptance criterion, each carrying the proof command's observed evidence:
|
|
142
|
+
|
|
143
|
+
```json
|
|
144
|
+
{
|
|
145
|
+
"plan": "<plan-name>",
|
|
146
|
+
"status": "pass | fail | blocked | in_progress",
|
|
147
|
+
"criteria": [
|
|
148
|
+
{ "task": "<task id or title>", "criterion": "<the completion condition>", "status": "pass | fail", "evidence": "<the proof command run and the observed result>" }
|
|
149
|
+
],
|
|
150
|
+
"updated_at": "<ISO8601 UTC>"
|
|
151
|
+
}
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Set `status: "pass"` only when every criterion is `pass` with real evidence (output from running the system, not a claim). The verdict must be judged by an agent that did NOT implement the change (the `verification-specialist`), never self-certified by the implementer. This is runtime scratch — it is gitignored and MUST NOT be committed (treat it like the secrets exclusion in the commit step).
|
|
155
|
+
|
|
156
|
+
On Claude, the `enforce-verification-gate.sh` Stop hook reads this file and **will not let the flow stop** until it shows a terminal, all-`pass` verdict — carrying over the non-bypassable completion gate of the `/goal` primitive, but checked deterministically against real evidence rather than by a transcript-only evaluator model. If you must stop before completion (a readiness gate failed, a blocker was found, a dependency is unresolved), write the verdict with `status: "blocked"` and the reason: that records the outcome and releases the gate instead of leaving it to spin. Other harnesses fall back to this prose obligation.
|
|
134
157
|
3. Write e2e test encoding the verification
|
|
135
158
|
4. Record Implement usage on the originating work artifact via `lisa:usage-accounting` so the work item (or other implementation-owned artifact) gains a direct `implement` usage entry in the canonical `## Lisa Usage` section. If the parent / child graph is already known, prefer `record_and_rollup` so ancestor totals refresh in the same write; otherwise still write the direct entry, and if runtime usage is unavailable, use `source: unavailable` with nullable token/cost fields instead of skipping the row.
|
|
136
159
|
5. Commit ALL outstanding changes in logical batches on the branch (minus sensitive data/information) — not just changes made by the agent team. This includes pre-existing uncommitted changes that were on the branch before the plan started. Do NOT filter commits to only "task-related" files. If it shows up in git status, it gets committed (unless it contains secrets).
|
|
@@ -140,6 +163,6 @@ Before shutting down the team, execute the Verify flow:
|
|
|
140
163
|
9. Merge the PR
|
|
141
164
|
10. Monitor the deploy action that triggers automatically from the successful merge
|
|
142
165
|
11. If deploy fails, create a task for the agent team to fix the failure, open a new PR and then go back to step 7
|
|
143
|
-
12. Remote verification: `verification-specialist` verifies in target environment (same checks as local verification, but on remote)
|
|
166
|
+
12. Remote verification: `verification-specialist` verifies in target environment (same checks as local verification, but on remote), and refreshes the verdict (step 2a) to reflect the remote result.
|
|
144
167
|
13. `ops-specialist`: post-deploy health check, monitor for errors in first minutes
|
|
145
|
-
14. If remote verification fails, create a task for the agent team to find out why it failed, fix it and return to step 5 (
|
|
168
|
+
14. If remote verification fails, create a task for the agent team to find out why it failed, fix it and return to step 5. **Bound this loop**: after a small number of full fix→deploy→reverify cycles without reaching a passing remote verdict (treat ~3 as the ceiling unless the work item states otherwise), stop retrying — file a build-ready fix ticket, write the verdict with `status: "blocked"` and the diagnosis, and move the work item to blocked rather than looping indefinitely. The completion gate releases on a `blocked` verdict, so the flow ends with a recorded outcome instead of a silent spin or a self-declared success.
|
|
@@ -1,11 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "lisa",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.132.0",
|
|
4
4
|
"description": "Universal governance — agents, skills, commands, hooks, and rules for all projects",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Cody Swann"
|
|
7
7
|
},
|
|
8
8
|
"hooks": {
|
|
9
|
+
"userPromptSubmitted": [
|
|
10
|
+
{
|
|
11
|
+
"matcher": "",
|
|
12
|
+
"hooks": [
|
|
13
|
+
{
|
|
14
|
+
"type": "command",
|
|
15
|
+
"command": "${CLAUDE_PLUGIN_ROOT}/hooks/enforce-verification-gate.sh"
|
|
16
|
+
}
|
|
17
|
+
]
|
|
18
|
+
}
|
|
19
|
+
],
|
|
9
20
|
"preToolUse": [
|
|
10
21
|
{
|
|
11
22
|
"matcher": "Bash",
|
|
@@ -19,6 +30,26 @@
|
|
|
19
30
|
"command": "${CLAUDE_PLUGIN_ROOT}/hooks/parity-safety-net.sh"
|
|
20
31
|
}
|
|
21
32
|
]
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"matcher": "",
|
|
36
|
+
"hooks": [
|
|
37
|
+
{
|
|
38
|
+
"type": "command",
|
|
39
|
+
"command": "${CLAUDE_PLUGIN_ROOT}/hooks/enforce-verification-gate.sh"
|
|
40
|
+
}
|
|
41
|
+
]
|
|
42
|
+
}
|
|
43
|
+
],
|
|
44
|
+
"agentStop": [
|
|
45
|
+
{
|
|
46
|
+
"matcher": "",
|
|
47
|
+
"hooks": [
|
|
48
|
+
{
|
|
49
|
+
"type": "command",
|
|
50
|
+
"command": "${CLAUDE_PLUGIN_ROOT}/hooks/enforce-verification-gate.sh"
|
|
51
|
+
}
|
|
52
|
+
]
|
|
22
53
|
}
|
|
23
54
|
],
|
|
24
55
|
"sessionStart": [
|
|
@@ -1,11 +1,20 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
|
-
# PreToolUse hook for Bash: blocks
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
#
|
|
2
|
+
# PreToolUse hook for Bash: blocks commands that bypass git's verification hooks.
|
|
3
|
+
# Bypassing pre-commit/pre-push hooks (which exist for a reason) is blocked in
|
|
4
|
+
# all of its forms; the fix is to address the underlying issue, not silence the
|
|
5
|
+
# check. See feedback_never_no_verify in user memory.
|
|
6
6
|
#
|
|
7
|
-
#
|
|
8
|
-
# --no-verify
|
|
7
|
+
# Blocked bypass vectors:
|
|
8
|
+
# 1. the --no-verify long flag (any subcommand, any position, incl. subshells);
|
|
9
|
+
# 2. HUSKY=0 / HUSKY_SKIP_HOOKS=... — disables husky-managed git hooks;
|
|
10
|
+
# 3. core.hooksPath pointed at /dev/null or set empty — disables ALL git hooks.
|
|
11
|
+
#
|
|
12
|
+
# Word-boundary matching avoids false positives on longer flags (--no-verify-ssl,
|
|
13
|
+
# --no-verify-host) and on a legit custom hooks path (core.hooksPath=.husky).
|
|
14
|
+
#
|
|
15
|
+
# The short `-n` form is intentionally NOT matched (see block-no-verify.agy.sh):
|
|
16
|
+
# grep cannot distinguish a real -n option from -n in commit-message prose or an
|
|
17
|
+
# unrelated piped command, and -n is far more common than --no-verify.
|
|
9
18
|
set -euo pipefail
|
|
10
19
|
|
|
11
20
|
input="$(cat)"
|
|
@@ -20,13 +29,18 @@ if [ -z "$command_str" ]; then
|
|
|
20
29
|
exit 0
|
|
21
30
|
fi
|
|
22
31
|
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
# while
|
|
26
|
-
if printf '%s' "$command_str" | grep -Eq '(^|[^[:alnum:]_-])--no-verify($|[^[:alnum:]_-])'
|
|
32
|
+
# Each pattern is bounded by non-token characters so longer flags
|
|
33
|
+
# (--no-verify-ssl) and legit values (core.hooksPath=.husky, HUSKY=1) don't match,
|
|
34
|
+
# while every syntactic position is caught (incl. subshells, e.g. `(git commit --no-verify)`).
|
|
35
|
+
if printf '%s' "$command_str" | grep -Eq '(^|[^[:alnum:]_-])--no-verify($|[^[:alnum:]_-])' \
|
|
36
|
+
|| printf '%s' "$command_str" | grep -Eq '(^|[^[:alnum:]_-])HUSKY=0($|[^[:alnum:]])' \
|
|
37
|
+
|| printf '%s' "$command_str" | grep -Eq '(^|[^[:alnum:]_-])HUSKY_SKIP_HOOKS=' \
|
|
38
|
+
|| printf '%s' "$command_str" | grep -Eq 'core\.hooksPath([[:space:]]*=)?[[:space:]]*/dev/null' \
|
|
39
|
+
|| printf '%s' "$command_str" | grep -Eq 'core\.hooksPath[[:space:]]*=[[:space:]]*($|[[:space:];&|"'\''])'; then
|
|
27
40
|
cat >&2 <<'EOF'
|
|
28
|
-
Blocked:
|
|
29
|
-
issue (lint error, failing
|
|
41
|
+
Blocked: this command bypasses pre-commit/pre-push hooks (--no-verify, HUSKY=0,
|
|
42
|
+
or core.hooksPath disabling). Fix the underlying issue (lint error, failing
|
|
43
|
+
test, formatting) or ask the user before bypassing.
|
|
30
44
|
|
|
31
45
|
If the user has explicitly authorized the bypass for this specific command,
|
|
32
46
|
re-run after they confirm.
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Enforces the /lisa:implement completion gate: the lead session may not stop
|
|
3
|
+
# while an implement flow is active until an independent, machine-readable
|
|
4
|
+
# verification verdict proves every acceptance criterion passed.
|
|
5
|
+
#
|
|
6
|
+
# This carries over the most valuable property of the native `/goal` primitive
|
|
7
|
+
# (Claude Code v2.1.139, Codex 0.128.0): a NON-BYPASSABLE completion gate where
|
|
8
|
+
# the agent doing the work cannot self-certify "done". Unlike `/goal`'s small
|
|
9
|
+
# evaluator model — which only reads the transcript and cannot run tools — this
|
|
10
|
+
# gate is deterministic and judges a structured artifact the
|
|
11
|
+
# verification-specialist writes from REAL tool output, so it is both stronger
|
|
12
|
+
# and not foolable by a prose claim of success.
|
|
13
|
+
#
|
|
14
|
+
# Intentionally Claude-specific (like enforce-team-first.sh). Other harnesses
|
|
15
|
+
# may not fire a Stop hook; they fall back to the prose completion gate in
|
|
16
|
+
# skills/implement/SKILL.md.
|
|
17
|
+
#
|
|
18
|
+
# Triggered on four hook events:
|
|
19
|
+
# - UserPromptSubmit : arm enforcement when the prompt starts with
|
|
20
|
+
# /lisa:implement (or /implement)
|
|
21
|
+
# - PreToolUse : arm enforcement when the Skill tool loads lisa:implement
|
|
22
|
+
# (covers nested/programmatic invocation, e.g. intake)
|
|
23
|
+
# - SubagentStart : mark teammate sessions exempt — teammates inherit the
|
|
24
|
+
# lead's flow and must not be gated on their own stop
|
|
25
|
+
# - Stop : block the lead stop unless a FRESH terminal verdict
|
|
26
|
+
# (status pass|blocked, all criteria pass) exists, bounded
|
|
27
|
+
# by a per-session block counter so a genuinely-stuck flow
|
|
28
|
+
# ESCALATES instead of looping forever.
|
|
29
|
+
#
|
|
30
|
+
# The verdict artifact lives at "$CLAUDE_PROJECT_DIR/.lisa/verification-status.json":
|
|
31
|
+
# {
|
|
32
|
+
# "plan": "<plan-name>",
|
|
33
|
+
# "status": "pass" | "fail" | "blocked" | "in_progress",
|
|
34
|
+
# "criteria": [
|
|
35
|
+
# { "task": "...", "criterion": "...", "status": "pass" | "fail", "evidence": "..." }
|
|
36
|
+
# ],
|
|
37
|
+
# "updated_at": "<ISO8601 UTC>"
|
|
38
|
+
# }
|
|
39
|
+
# status "pass" (all criteria pass) or "blocked" (flow recorded a blocker and is
|
|
40
|
+
# stopping deliberately) are terminal and release the gate. "fail"/"in_progress"
|
|
41
|
+
# or a missing/stale file keep it closed.
|
|
42
|
+
#
|
|
43
|
+
# Per-session state lives under "$STATE_DIR" as flag files keyed by session_id.
|
|
44
|
+
# Stale state (>24h) is cleaned on each invocation.
|
|
45
|
+
#
|
|
46
|
+
# Fail-open: any unexpected jq parse failure or missing field exits 0 rather
|
|
47
|
+
# than blocking. A broken gate must never brick a session.
|
|
48
|
+
|
|
49
|
+
set -uo pipefail
|
|
50
|
+
|
|
51
|
+
INPUT=$(cat 2>/dev/null || true)
|
|
52
|
+
if [ -z "$INPUT" ]; then
|
|
53
|
+
exit 0
|
|
54
|
+
fi
|
|
55
|
+
|
|
56
|
+
HOOK_EVENT=$(printf '%s' "$INPUT" | jq -r '.hook_event_name // empty' 2>/dev/null || true)
|
|
57
|
+
SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // empty' 2>/dev/null || true)
|
|
58
|
+
|
|
59
|
+
if [ -z "$SESSION_ID" ]; then
|
|
60
|
+
exit 0
|
|
61
|
+
fi
|
|
62
|
+
|
|
63
|
+
STATE_DIR="${TMPDIR:-/tmp}/lisa-verification-gate"
|
|
64
|
+
mkdir -p "$STATE_DIR" 2>/dev/null || exit 0
|
|
65
|
+
|
|
66
|
+
ARM_FLAG="${STATE_DIR}/${SESSION_ID}.armed"
|
|
67
|
+
SUBAGENT_FLAG="${STATE_DIR}/${SESSION_ID}.subagent"
|
|
68
|
+
COUNT_FILE="${STATE_DIR}/${SESSION_ID}.blocks"
|
|
69
|
+
|
|
70
|
+
# Best-effort cleanup of stale state files. Errors are ignored.
|
|
71
|
+
find "$STATE_DIR" -maxdepth 1 -type f -mmin +1440 -delete 2>/dev/null || true
|
|
72
|
+
|
|
73
|
+
# How many consecutive blocks before the gate releases (with escalation) to
|
|
74
|
+
# avoid an infinite stop loop. Mirrors /goal's "or stop after N turns" clause.
|
|
75
|
+
MAX_BLOCKS=8
|
|
76
|
+
|
|
77
|
+
arm_once() {
|
|
78
|
+
# Arm without bumping mtime if already armed — the arm time is the freshness
|
|
79
|
+
# baseline for the verdict, so re-arming on a later prompt must not make an
|
|
80
|
+
# already-written verdict look stale.
|
|
81
|
+
[ -f "$ARM_FLAG" ] || touch "$ARM_FLAG" 2>/dev/null || true
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
case "$HOOK_EVENT" in
|
|
85
|
+
SubagentStart)
|
|
86
|
+
touch "$SUBAGENT_FLAG" 2>/dev/null || true
|
|
87
|
+
exit 0
|
|
88
|
+
;;
|
|
89
|
+
|
|
90
|
+
UserPromptSubmit)
|
|
91
|
+
PROMPT=$(printf '%s' "$INPUT" | jq -r '.prompt // empty' 2>/dev/null || true)
|
|
92
|
+
if [ -n "$PROMPT" ]; then
|
|
93
|
+
LEADING=$(printf '%s' "$PROMPT" | sed -n '1p' | sed -E 's/^[[:space:]]*//')
|
|
94
|
+
case "$LEADING" in
|
|
95
|
+
/lisa:implement*|/implement*)
|
|
96
|
+
arm_once
|
|
97
|
+
;;
|
|
98
|
+
esac
|
|
99
|
+
fi
|
|
100
|
+
exit 0
|
|
101
|
+
;;
|
|
102
|
+
|
|
103
|
+
PreToolUse)
|
|
104
|
+
TOOL_NAME=$(printf '%s' "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null || true)
|
|
105
|
+
if [ "$TOOL_NAME" = "Skill" ]; then
|
|
106
|
+
SKILL_NAME=$(printf '%s' "$INPUT" | jq -r '.tool_input.skill // empty' 2>/dev/null || true)
|
|
107
|
+
case "$SKILL_NAME" in
|
|
108
|
+
lisa:implement|implement)
|
|
109
|
+
arm_once
|
|
110
|
+
;;
|
|
111
|
+
esac
|
|
112
|
+
fi
|
|
113
|
+
exit 0
|
|
114
|
+
;;
|
|
115
|
+
|
|
116
|
+
Stop)
|
|
117
|
+
: # fall through to enforcement
|
|
118
|
+
;;
|
|
119
|
+
|
|
120
|
+
*)
|
|
121
|
+
exit 0
|
|
122
|
+
;;
|
|
123
|
+
esac
|
|
124
|
+
|
|
125
|
+
# --- Stop enforcement path ---
|
|
126
|
+
|
|
127
|
+
# Teammates inherit the lead's flow; never gate a subagent stop.
|
|
128
|
+
if [ -f "$SUBAGENT_FLAG" ]; then
|
|
129
|
+
exit 0
|
|
130
|
+
fi
|
|
131
|
+
|
|
132
|
+
# No implement flow armed — nothing to gate.
|
|
133
|
+
if [ ! -f "$ARM_FLAG" ]; then
|
|
134
|
+
exit 0
|
|
135
|
+
fi
|
|
136
|
+
|
|
137
|
+
PROJECT_DIR="${CLAUDE_PROJECT_DIR:-.}"
|
|
138
|
+
VERDICT_FILE="${PROJECT_DIR}/.lisa/verification-status.json"
|
|
139
|
+
|
|
140
|
+
# A terminal verdict (pass or blocked) with no failing criterion, written AFTER
|
|
141
|
+
# the flow was armed, releases the gate.
|
|
142
|
+
verdict_is_terminal() {
|
|
143
|
+
[ -f "$VERDICT_FILE" ] || return 1
|
|
144
|
+
|
|
145
|
+
local status fails
|
|
146
|
+
status=$(jq -r '.status // empty' "$VERDICT_FILE" 2>/dev/null || true)
|
|
147
|
+
case "$status" in
|
|
148
|
+
pass|blocked) : ;;
|
|
149
|
+
*) return 1 ;;
|
|
150
|
+
esac
|
|
151
|
+
|
|
152
|
+
fails=$(jq -r '[.criteria[]? | select((.status // "") == "fail")] | length' "$VERDICT_FILE" 2>/dev/null || echo 1)
|
|
153
|
+
[ "$fails" = "0" ] || return 1
|
|
154
|
+
|
|
155
|
+
# Reject a stale verdict left over from a previous plan in this session.
|
|
156
|
+
if [ "$VERDICT_FILE" -ot "$ARM_FLAG" ]; then
|
|
157
|
+
return 1
|
|
158
|
+
fi
|
|
159
|
+
|
|
160
|
+
return 0
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if verdict_is_terminal; then
|
|
164
|
+
# Gate satisfied — disarm so a follow-up stop in the same session is not
|
|
165
|
+
# re-gated against the now-consumed verdict, and allow the stop.
|
|
166
|
+
rm -f "$ARM_FLAG" "$COUNT_FILE" 2>/dev/null || true
|
|
167
|
+
exit 0
|
|
168
|
+
fi
|
|
169
|
+
|
|
170
|
+
# Verdict missing, failing, or stale — block, but bound the loop.
|
|
171
|
+
COUNT=$(cat "$COUNT_FILE" 2>/dev/null || echo 0)
|
|
172
|
+
case "$COUNT" in
|
|
173
|
+
''|*[!0-9]*) COUNT=0 ;;
|
|
174
|
+
esac
|
|
175
|
+
COUNT=$((COUNT + 1))
|
|
176
|
+
echo "$COUNT" > "$COUNT_FILE" 2>/dev/null || true
|
|
177
|
+
|
|
178
|
+
if [ "$COUNT" -gt "$MAX_BLOCKS" ]; then
|
|
179
|
+
rm -f "$ARM_FLAG" "$COUNT_FILE" 2>/dev/null || true
|
|
180
|
+
cat >&2 <<EOF
|
|
181
|
+
Verification gate: still no passing verdict after ${MAX_BLOCKS} attempts.
|
|
182
|
+
Releasing the stop gate to avoid an infinite loop. The /lisa:implement Verify
|
|
183
|
+
flow did NOT prove completion. Do NOT claim this work is verified — escalate to
|
|
184
|
+
a human, or file a build-ready fix ticket and move the work item to blocked.
|
|
185
|
+
EOF
|
|
186
|
+
exit 0
|
|
187
|
+
fi
|
|
188
|
+
|
|
189
|
+
REASON_DETAIL=""
|
|
190
|
+
if [ -f "$VERDICT_FILE" ]; then
|
|
191
|
+
REASON_DETAIL=$(jq -r '[.criteria[]? | select((.status // "") != "pass") | " - \(.task // "?"): \(.criterion // "?") [\(.status // "missing")]"] | .[]' "$VERDICT_FILE" 2>/dev/null || true)
|
|
192
|
+
fi
|
|
193
|
+
|
|
194
|
+
{
|
|
195
|
+
echo "Blocked: /lisa:implement may not stop until verification is proven."
|
|
196
|
+
echo
|
|
197
|
+
if [ ! -f "$VERDICT_FILE" ]; then
|
|
198
|
+
echo "No verification verdict found at .lisa/verification-status.json."
|
|
199
|
+
echo "The Verify flow must run the verification-specialist (run the actual"
|
|
200
|
+
echo "system, observe results) and write a machine-readable verdict — schema"
|
|
201
|
+
echo "in skills/implement/SKILL.md — with status \"pass\" and every"
|
|
202
|
+
echo "acceptance criterion proven."
|
|
203
|
+
echo
|
|
204
|
+
echo "If you are stopping deliberately because of a blocker (readiness gate"
|
|
205
|
+
echo "failed, base branch missing, unresolved dependency), write the verdict"
|
|
206
|
+
echo "with status \"blocked\" and the reason instead. That records the"
|
|
207
|
+
echo "outcome and releases this gate."
|
|
208
|
+
else
|
|
209
|
+
echo "The verification verdict is not terminal-and-passing. Outstanding:"
|
|
210
|
+
if [ -n "$REASON_DETAIL" ]; then
|
|
211
|
+
printf '%s\n' "$REASON_DETAIL"
|
|
212
|
+
else
|
|
213
|
+
echo " (status is not pass/blocked, or the verdict is older than this run)"
|
|
214
|
+
fi
|
|
215
|
+
echo
|
|
216
|
+
echo "Fix the failing criteria and re-verify, or — if genuinely blocked —"
|
|
217
|
+
echo "set status \"blocked\" with the reason."
|
|
218
|
+
fi
|
|
219
|
+
echo
|
|
220
|
+
echo "(Attempt ${COUNT}/${MAX_BLOCKS} — the gate releases after ${MAX_BLOCKS} to avoid a loop.)"
|
|
221
|
+
} >&2
|
|
222
|
+
exit 2
|
|
@@ -96,7 +96,14 @@ IF it is a Fix (bug), execute the Reproduce sub-flow FIRST:
|
|
|
96
96
|
1. Write a simple API client and call the offending API
|
|
97
97
|
2. Start the server on localhost and use the Playwright CLI or Chrome DevTools
|
|
98
98
|
|
|
99
|
-
Using the general-purpose agent in Team Lead session, determine how you will know that the task is fully complete
|
|
99
|
+
Using the general-purpose agent in Team Lead session, determine how you will know that the task is fully complete. Write this as an **effective completion condition** — one an independent verifier could confirm from observed output alone, not from your assertion that it works. A strong condition has:
|
|
100
|
+
|
|
101
|
+
- **One measurable end state** — a status code, an exit code, a row count, an observable UI state, an empty queue. Not "it looks right" or "the code is correct".
|
|
102
|
+
- **A stated proof command that surfaces the evidence** — exactly how the running system is exercised so the result is observable (e.g. `curl … returns 200 with {…}`, "the Playwright run reaches the dashboard", "`SELECT … ` returns the new row"). Quality gates (test/typecheck/lint) do NOT count — they are prerequisites.
|
|
103
|
+
- **Constraints that must hold** — anything that must not change on the way there (e.g. "no other endpoint's response changes", "no migration is dropped").
|
|
104
|
+
|
|
105
|
+
This condition is the contract the Verify flow proves and records in the verification verdict (below); it is what the completion gate checks before the flow may stop.
|
|
106
|
+
|
|
100
107
|
1. Examples
|
|
101
108
|
1. Direct deploy the changes to dev and then Write a simple API client and call the offending API
|
|
102
109
|
2. Start the server on localhost and then Use the Playwright CLI or Chrome DevTools
|
|
@@ -116,8 +123,8 @@ Every task MUST include this JSON metadata block. Do NOT omit `skills` (use `[]`
|
|
|
116
123
|
"learnings": ["..."],
|
|
117
124
|
"verification": {
|
|
118
125
|
"type": "ui-recording|api-test|cli-test|database-check|manual-check|documentation",
|
|
119
|
-
"command": "the proof command — must run the actual system (NOT test/typecheck/lint, those are quality gates)",
|
|
120
|
-
"expected": "
|
|
126
|
+
"command": "the proof command — must run the actual system and surface its result in the transcript (NOT test/typecheck/lint, those are quality gates). Phrase it so an independent verifier sees the evidence, e.g. `curl -s localhost:3000/health` not `check that health works`",
|
|
127
|
+
"expected": "the single measurable end state that proves success — observable system behavior (status code, response body, row count, UI state), not a subjective judgement"
|
|
121
128
|
}
|
|
122
129
|
}
|
|
123
130
|
```
|
|
@@ -131,6 +138,22 @@ Before shutting down the team, execute the Verify flow:
|
|
|
131
138
|
|
|
132
139
|
1. Run quality gates: lint, typecheck, tests — all must pass. These are prerequisites, NOT verification.
|
|
133
140
|
2. `verification-specialist`: verify locally by running the actual system and observing results (empirical proof that the change works). This is the real verification step.
|
|
141
|
+
2a. **Record the verification verdict** — the independent, machine-readable proof that gates completion. The `verification-specialist` writes `${CLAUDE_PROJECT_DIR:-.}/.lisa/verification-status.json` with one entry per acceptance criterion, each carrying the proof command's observed evidence:
|
|
142
|
+
|
|
143
|
+
```json
|
|
144
|
+
{
|
|
145
|
+
"plan": "<plan-name>",
|
|
146
|
+
"status": "pass | fail | blocked | in_progress",
|
|
147
|
+
"criteria": [
|
|
148
|
+
{ "task": "<task id or title>", "criterion": "<the completion condition>", "status": "pass | fail", "evidence": "<the proof command run and the observed result>" }
|
|
149
|
+
],
|
|
150
|
+
"updated_at": "<ISO8601 UTC>"
|
|
151
|
+
}
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Set `status: "pass"` only when every criterion is `pass` with real evidence (output from running the system, not a claim). The verdict must be judged by an agent that did NOT implement the change (the `verification-specialist`), never self-certified by the implementer. This is runtime scratch — it is gitignored and MUST NOT be committed (treat it like the secrets exclusion in the commit step).
|
|
155
|
+
|
|
156
|
+
On Claude, the `enforce-verification-gate.sh` Stop hook reads this file and **will not let the flow stop** until it shows a terminal, all-`pass` verdict — carrying over the non-bypassable completion gate of the `/goal` primitive, but checked deterministically against real evidence rather than by a transcript-only evaluator model. If you must stop before completion (a readiness gate failed, a blocker was found, a dependency is unresolved), write the verdict with `status: "blocked"` and the reason: that records the outcome and releases the gate instead of leaving it to spin. Other harnesses fall back to this prose obligation.
|
|
134
157
|
3. Write e2e test encoding the verification
|
|
135
158
|
4. Record Implement usage on the originating work artifact via `lisa:usage-accounting` so the work item (or other implementation-owned artifact) gains a direct `implement` usage entry in the canonical `## Lisa Usage` section. If the parent / child graph is already known, prefer `record_and_rollup` so ancestor totals refresh in the same write; otherwise still write the direct entry, and if runtime usage is unavailable, use `source: unavailable` with nullable token/cost fields instead of skipping the row.
|
|
136
159
|
5. Commit ALL outstanding changes in logical batches on the branch (minus sensitive data/information) — not just changes made by the agent team. This includes pre-existing uncommitted changes that were on the branch before the plan started. Do NOT filter commits to only "task-related" files. If it shows up in git status, it gets committed (unless it contains secrets).
|
|
@@ -140,6 +163,6 @@ Before shutting down the team, execute the Verify flow:
|
|
|
140
163
|
9. Merge the PR
|
|
141
164
|
10. Monitor the deploy action that triggers automatically from the successful merge
|
|
142
165
|
11. If deploy fails, create a task for the agent team to fix the failure, open a new PR and then go back to step 7
|
|
143
|
-
12. Remote verification: `verification-specialist` verifies in target environment (same checks as local verification, but on remote)
|
|
166
|
+
12. Remote verification: `verification-specialist` verifies in target environment (same checks as local verification, but on remote), and refreshes the verdict (step 2a) to reflect the remote result.
|
|
144
167
|
13. `ops-specialist`: post-deploy health check, monitor for errors in first minutes
|
|
145
|
-
14. If remote verification fails, create a task for the agent team to find out why it failed, fix it and return to step 5 (
|
|
168
|
+
14. If remote verification fails, create a task for the agent team to find out why it failed, fix it and return to step 5. **Bound this loop**: after a small number of full fix→deploy→reverify cycles without reaching a passing remote verdict (treat ~3 as the ceiling unless the work item states otherwise), stop retrying — file a build-ready fix ticket, write the verdict with `status: "blocked"` and the diagnosis, and move the work item to blocked rather than looping indefinitely. The completion gate releases on a `blocked` verdict, so the flow ends with a recorded outcome instead of a silent spin or a self-declared success.
|
|
@@ -1,11 +1,20 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
|
-
# PreToolUse hook for Bash: blocks
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
#
|
|
2
|
+
# PreToolUse hook for Bash: blocks commands that bypass git's verification hooks.
|
|
3
|
+
# Bypassing pre-commit/pre-push hooks (which exist for a reason) is blocked in
|
|
4
|
+
# all of its forms; the fix is to address the underlying issue, not silence the
|
|
5
|
+
# check. See feedback_never_no_verify in user memory.
|
|
6
6
|
#
|
|
7
|
-
#
|
|
8
|
-
# --no-verify
|
|
7
|
+
# Blocked bypass vectors:
|
|
8
|
+
# 1. the --no-verify long flag (any subcommand, any position, incl. subshells);
|
|
9
|
+
# 2. HUSKY=0 / HUSKY_SKIP_HOOKS=... — disables husky-managed git hooks;
|
|
10
|
+
# 3. core.hooksPath pointed at /dev/null or set empty — disables ALL git hooks.
|
|
11
|
+
#
|
|
12
|
+
# Word-boundary matching avoids false positives on longer flags (--no-verify-ssl,
|
|
13
|
+
# --no-verify-host) and on a legit custom hooks path (core.hooksPath=.husky).
|
|
14
|
+
#
|
|
15
|
+
# The short `-n` form is intentionally NOT matched (see block-no-verify.agy.sh):
|
|
16
|
+
# grep cannot distinguish a real -n option from -n in commit-message prose or an
|
|
17
|
+
# unrelated piped command, and -n is far more common than --no-verify.
|
|
9
18
|
set -euo pipefail
|
|
10
19
|
|
|
11
20
|
input="$(cat)"
|
|
@@ -20,13 +29,18 @@ if [ -z "$command_str" ]; then
|
|
|
20
29
|
exit 0
|
|
21
30
|
fi
|
|
22
31
|
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
# while
|
|
26
|
-
if printf '%s' "$command_str" | grep -Eq '(^|[^[:alnum:]_-])--no-verify($|[^[:alnum:]_-])'
|
|
32
|
+
# Each pattern is bounded by non-token characters so longer flags
|
|
33
|
+
# (--no-verify-ssl) and legit values (core.hooksPath=.husky, HUSKY=1) don't match,
|
|
34
|
+
# while every syntactic position is caught (incl. subshells, e.g. `(git commit --no-verify)`).
|
|
35
|
+
if printf '%s' "$command_str" | grep -Eq '(^|[^[:alnum:]_-])--no-verify($|[^[:alnum:]_-])' \
|
|
36
|
+
|| printf '%s' "$command_str" | grep -Eq '(^|[^[:alnum:]_-])HUSKY=0($|[^[:alnum:]])' \
|
|
37
|
+
|| printf '%s' "$command_str" | grep -Eq '(^|[^[:alnum:]_-])HUSKY_SKIP_HOOKS=' \
|
|
38
|
+
|| printf '%s' "$command_str" | grep -Eq 'core\.hooksPath([[:space:]]*=)?[[:space:]]*/dev/null' \
|
|
39
|
+
|| printf '%s' "$command_str" | grep -Eq 'core\.hooksPath[[:space:]]*=[[:space:]]*($|[[:space:];&|"'\''])'; then
|
|
27
40
|
cat >&2 <<'EOF'
|
|
28
|
-
Blocked:
|
|
29
|
-
issue (lint error, failing
|
|
41
|
+
Blocked: this command bypasses pre-commit/pre-push hooks (--no-verify, HUSKY=0,
|
|
42
|
+
or core.hooksPath disabling). Fix the underlying issue (lint error, failing
|
|
43
|
+
test, formatting) or ask the user before bypassing.
|
|
30
44
|
|
|
31
45
|
If the user has explicitly authorized the bypass for this specific command,
|
|
32
46
|
re-run after they confirm.
|