@friedbotstudio/create-baseline 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/README.md +7 -3
  2. package/obj/template/.claude/commands/grant-push.md +19 -0
  3. package/obj/template/.claude/commands/init-project.md +26 -4
  4. package/obj/template/.claude/hooks/consent_gate_grant.mjs +107 -0
  5. package/obj/template/.claude/hooks/git_commit_guard.mjs +224 -0
  6. package/obj/template/.claude/hooks/harness_continuation.sh +101 -34
  7. package/obj/template/.claude/hooks/lib/common.mjs +283 -0
  8. package/obj/template/.claude/hooks/lib/common.sh +1 -1
  9. package/obj/template/.claude/hooks/memory_session_start.sh +20 -6
  10. package/obj/template/.claude/hooks/memory_stop.sh +161 -2
  11. package/obj/template/.claude/hooks/spec_approval_guard.sh +1 -1
  12. package/obj/template/.claude/hooks/swarm_approval_guard.sh +1 -1
  13. package/obj/template/.claude/hooks/tests/fixtures/ac008_byte_equal_reference.txt +7 -7
  14. package/obj/template/.claude/hooks/tests/fixtures/memory_stop_landmark_baseline.txt +21 -0
  15. package/obj/template/.claude/hooks/tests/fixtures/regenerate-ac008.sh +47 -0
  16. package/obj/template/.claude/hooks/tests/memory_session_start_test.sh +7 -3
  17. package/obj/template/.claude/hooks/tests/memory_stop_intent_test.sh +329 -0
  18. package/obj/template/.claude/hooks/tests/regenerate_ac008_test.sh +99 -0
  19. package/obj/template/.claude/memory/README.md +8 -3
  20. package/obj/template/.claude/memory/backlog.md +12 -0
  21. package/obj/template/.claude/project.json +6 -1
  22. package/obj/template/.claude/settings.json +3 -4
  23. package/obj/template/.claude/skills/audit-baseline/audit.sh +28 -16
  24. package/obj/template/.claude/skills/audit-baseline/tests/fixtures/_pending_opener_only.md +3 -0
  25. package/obj/template/.claude/skills/audit-baseline/tests/fixtures/preamble_full_empty_body.md +4 -0
  26. package/obj/template/.claude/skills/audit-baseline/tests/fixtures/preamble_full_with_entries.md +9 -0
  27. package/obj/template/.claude/skills/audit-baseline/tests/fixtures/preamble_no_opener.md +3 -0
  28. package/obj/template/.claude/skills/audit-baseline/tests/fixtures/preamble_opener_only.md +3 -0
  29. package/obj/template/.claude/skills/audit-baseline/tests/preamble_check_test.sh +147 -0
  30. package/obj/template/.claude/skills/chore/SKILL.md +5 -3
  31. package/obj/template/.claude/skills/commit/SKILL.md +5 -4
  32. package/obj/template/.claude/skills/copywriting/LICENSE +21 -0
  33. package/obj/template/.claude/skills/copywriting/NOTICE +23 -0
  34. package/obj/template/.claude/skills/copywriting/SKILL.md +1 -1
  35. package/obj/template/.claude/skills/design-ui/SKILL.md +23 -5
  36. package/obj/template/.claude/skills/design-ui/references/design-vs-development.md +26 -5
  37. package/obj/template/.claude/skills/design-ui/references/orchestration.md +1 -0
  38. package/obj/template/.claude/skills/design-ui/references/state-machine.md +5 -3
  39. package/obj/template/.claude/skills/documentation/LICENSE +202 -0
  40. package/obj/template/.claude/skills/documentation/NOTICE +22 -0
  41. package/obj/template/.claude/skills/harness/SKILL.md +3 -1
  42. package/obj/template/.claude/skills/humanizer/LICENSE +21 -0
  43. package/obj/template/.claude/skills/humanizer/NOTICE +21 -0
  44. package/obj/template/.claude/skills/impeccable/LICENSE +202 -0
  45. package/obj/template/.claude/skills/impeccable/NOTICE +24 -0
  46. package/obj/template/.claude/skills/memory-flush/SKILL.md +20 -4
  47. package/obj/template/.claude/skills/memory-flush/sweep.py +74 -6
  48. package/obj/template/.claude/skills/memory-flush/tests/run.sh +300 -1
  49. package/obj/template/.claude/skills/tdd/SKILL.md +2 -1
  50. package/obj/template/.claude/skills/tdd/drift_check.py +180 -0
  51. package/obj/template/.claude/skills/tdd/tests/drift_check_test.sh +190 -0
  52. package/obj/template/.claude/skills/tdd/tests/run.sh +21 -0
  53. package/obj/template/.claude/skills/technical-tutorials/LICENSE +21 -0
  54. package/obj/template/.claude/skills/technical-tutorials/NOTICE +23 -0
  55. package/obj/template/.claude/skills/technical-tutorials/SKILL.md +1 -1
  56. package/obj/template/.claude/skills/triage/SKILL.md +8 -3
  57. package/obj/template/CLAUDE.md +34 -23
  58. package/obj/template/docs/init/seed.md +36 -21
  59. package/obj/template/manifest.json +59 -33
  60. package/package.json +1 -1
  61. package/src/CLAUDE.template.md +34 -23
  62. package/src/memory/backlog.template.md +12 -0
  63. package/src/project.template.json +6 -1
  64. package/src/seed.template.md +36 -21
  65. package/src/settings.template.json +3 -4
  66. package/obj/template/.claude/hooks/consent_gate_grant.sh +0 -89
  67. package/obj/template/.claude/hooks/git_commit_guard.sh +0 -93
@@ -223,11 +223,15 @@ test_when_audit_runs_against_changed_repo_then_exit_0() {
223
223
 
224
224
  test_when_hook_runs_unchanged_tree_then_header_and_table_byte_equal() {
225
225
  # Run hook against the real repo memory tree and compare header+table to
226
- # the captured pre-spec reference.
226
+ # the captured pre-spec reference. The fixture's HEAD field is the literal
227
+ # sentinel `n/a` (see fixtures/regenerate-ac008.sh); the test normalizes
228
+ # any captured `HEAD: \`<short-sha>\`` to `HEAD: \`n/a\`` before comparing
229
+ # so the test is byte-stable across commits.
227
230
  local out; out="$(run_hook "$REPO_ROOT")"
228
231
  local actual_block
229
232
  actual_block="$(printf '%s\n' "$out" | python3 -c '
230
- import sys
233
+ import re, sys
234
+ HEAD_RE = re.compile(r"^(HEAD:\s*`)[^`]+(`)")
231
235
  lines = sys.stdin.read().split("\n")
232
236
  started = False
233
237
  out = []
@@ -236,7 +240,7 @@ for ln in lines:
236
240
  started = True
237
241
  if not started:
238
242
  continue
239
- out.append(ln)
243
+ out.append(HEAD_RE.sub(r"\1n/a\2", ln))
240
244
  if ln.startswith("| `pending-questions.md`"):
241
245
  break
242
246
  sys.stdout.write("\n".join(out) + "\n")
@@ -0,0 +1,329 @@
1
+ #!/usr/bin/env bash
2
+ # Fixture-based integration tests for memory_stop.sh intent-extraction surface.
3
+ # Covers AC-001..AC-004, AC-010, AC-012 from docs/specs/backlog-memory-bucket.md
4
+ #
5
+ # Each test builds a synthetic transcript JSONL under a tempdir + project root,
6
+ # invokes the real hook with CLAUDE_PROJECT_DIR redirected at the tempdir, and
7
+ # asserts on the resulting _pending.md body. The hook is a passive collector —
8
+ # tests assert on what it APPENDED, not on side effects elsewhere.
9
+ #
10
+ # All tests in this file start RED until the implement worker:
11
+ # (a) captures the no-intent baseline fixture at
12
+ # .claude/hooks/tests/fixtures/memory_stop_landmark_baseline.txt
13
+ # by running the CURRENT (pre-extension) hook against the AC-004 input;
14
+ # (b) extends memory_stop.sh with the intent-extraction surface;
15
+ # (c) re-runs this file — AC-001/2/3/10 should now PASS (extension added)
16
+ # and AC-004/12 should stay PASS (baseline still matches because the
17
+ # no-intent path is byte-stable).
18
+
19
+ set -uo pipefail
20
+
21
+ HERE="$(cd "$(dirname "$0")" && pwd)"
22
+ REPO_ROOT="$(cd "$HERE/../../.." && pwd)"
23
+ HOOK="$REPO_ROOT/.claude/hooks/memory_stop.sh"
24
+ FIXTURES="$HERE/fixtures"
25
+ LANDMARK_BASELINE="$FIXTURES/memory_stop_landmark_baseline.txt"
26
+
27
+ PASS=0; FAIL=0; FAILED=()
28
+
29
+ # --- assertion helpers (Foundation) ------------------------------------------
30
+
31
+ fail() { echo " FAIL: $*"; return 1; }
32
+
33
+ assert_file_contains() {
34
+ local path="$1" needle="$2" msg="$3"
35
+ if grep -qF -- "$needle" "$path" 2>/dev/null; then return 0; fi
36
+ fail "$msg :: $path missing literal: $needle"
37
+ }
38
+
39
+ assert_file_not_contains() {
40
+ local path="$1" needle="$2" msg="$3"
41
+ if grep -qF -- "$needle" "$path" 2>/dev/null; then
42
+ fail "$msg :: $path should NOT contain literal: $needle"
43
+ return 1
44
+ fi
45
+ }
46
+
47
+ assert_grep_count() {
48
+ local path="$1" pattern="$2" expected="$3" msg="$4"
49
+ local got
50
+ got="$(grep -cE "$pattern" "$path" 2>/dev/null || true)"
51
+ [ -z "$got" ] && got=0
52
+ if [ "$got" -eq "$expected" ]; then return 0; fi
53
+ fail "$msg :: pattern /$pattern/ count expected=$expected got=$got"
54
+ }
55
+
56
+ # --- transcript builders (Foundation) ----------------------------------------
57
+
58
+ # Each builder appends one JSONL event. Event shape matches what Claude Code
59
+ # writes to a real transcript: {"message": {"role": ..., "content": [...]}}.
60
+
61
+ append_text_event() {
62
+ local path="$1" role="$2" text="$3"
63
+ python3 - "$path" "$role" "$text" <<'PY'
64
+ import json, sys
65
+ path, role, text = sys.argv[1], sys.argv[2], sys.argv[3]
66
+ event = {"message": {"role": role, "content": [{"type": "text", "text": text}]}}
67
+ with open(path, "a", encoding="utf-8") as f:
68
+ f.write(json.dumps(event) + "\n")
69
+ PY
70
+ }
71
+
72
+ append_tool_use_event() {
73
+ local path="$1" tool="$2" file_path="$3"
74
+ python3 - "$path" "$tool" "$file_path" <<'PY'
75
+ import json, sys
76
+ path, tool, fp = sys.argv[1], sys.argv[2], sys.argv[3]
77
+ event = {"message": {"role": "assistant",
78
+ "content": [{"type": "tool_use", "name": tool, "input": {"file_path": fp}}]}}
79
+ with open(path, "a", encoding="utf-8") as f:
80
+ f.write(json.dumps(event) + "\n")
81
+ PY
82
+ }
83
+
84
+ # --- project root setup (Foundation) -----------------------------------------
85
+
86
+ # Seed a tempdir with the layout memory_stop.sh expects:
87
+ # <root>/.claude/hooks/lib/ symlink to the real lib (for common.sh)
88
+ # <root>/.claude/memory/_pending.md skeleton body
89
+ # <root>/.claude/state/logs/ empty (log_line writes here)
90
+ # Prints the root path on stdout.
91
+ seed_project() {
92
+ local root; root="$(mktemp -d)"
93
+ mkdir -p "$root/.claude/memory" "$root/.claude/state/logs" "$root/.claude/hooks"
94
+ ln -s "$REPO_ROOT/.claude/hooks/lib" "$root/.claude/hooks/lib"
95
+ cat > "$root/.claude/memory/_pending.md" <<'EOF'
96
+ ---
97
+ owners: [memory_stop.sh writes; /memory-flush clears]
98
+ category: auto-extracted candidates awaiting curation
99
+ verifies-against: none
100
+ ---
101
+
102
+ # Pending memory candidates
103
+
104
+ ---
105
+ EOF
106
+ printf '%s' "$root"
107
+ }
108
+
109
+ # Invoke the hook against $1 = project root, $2 = transcript path.
110
+ # The hook is invoked exactly as Claude Code invokes it — JSON payload on
111
+ # stdin, project dir via env. Output to stdout/stderr is suppressed; tests
112
+ # inspect _pending.md afterward.
113
+ run_hook() {
114
+ local root="$1" transcript="$2"
115
+ printf '%s' "{\"transcript_path\":\"$transcript\"}" \
116
+ | CLAUDE_PROJECT_DIR="$root" bash "$HOOK" >/dev/null 2>&1 || true
117
+ }
118
+
119
+ # Helper: read the pending file path for a given project root.
120
+ pending_path() {
121
+ printf '%s' "$1/.claude/memory/_pending.md"
122
+ }
123
+
124
+ # Strip lines that legitimately vary between runs (session timestamps + edit
125
+ # timestamps), so byte-parity comparisons are stable across invocations.
126
+ canonicalize_pending() {
127
+ grep -vE '^<!-- session [0-9TZ:-]+ -->$|^- Source: file written/edited at ' "$1"
128
+ }
129
+
130
+ run() {
131
+ local name="$1"
132
+ echo "RUN $name"
133
+ if "$name"; then
134
+ PASS=$((PASS+1)); echo "PASS $name"
135
+ else
136
+ FAIL=$((FAIL+1)); FAILED+=("$name"); echo "FAIL $name"
137
+ fi
138
+ }
139
+
140
+ # --- AC-001: user-prompt anchored TODO emits backlog candidate ---------------
141
+
142
+ test_when_user_prompt_has_anchored_todo_then_backlog_candidate_emitted() {
143
+ local root; root="$(seed_project)"; trap "rm -rf $root" RETURN
144
+ local tx="$root/transcript.jsonl"
145
+ append_text_event "$tx" "user" "TODO: add retry to webhook worker"
146
+ run_hook "$root" "$tx"
147
+ local pf; pf="$(pending_path "$root")"
148
+
149
+ assert_grep_count "$pf" '^## CANDIDATE: backlog → ' 1 "AC-001 expected exactly 1 backlog CANDIDATE" || return 1
150
+ assert_file_contains "$pf" "- Role: user" "AC-001 role=user missing" || return 1
151
+ assert_file_contains "$pf" "- Source: user-instruction" "AC-001 source=user-instruction missing" || return 1
152
+ assert_file_contains "$pf" "TODO: add retry to webhook worker" "AC-001 verbatim missing" || return 1
153
+
154
+ # Key shape: <slug>-<4-char-hash>
155
+ local key
156
+ key="$(grep -oE '^## CANDIDATE: backlog → \S+' "$pf" | head -1 | sed 's|^## CANDIDATE: backlog → ||')"
157
+ echo "$key" | grep -qE -- '-[0-9a-f]{4}$' || { fail "AC-001 key '$key' missing 4-char hash suffix"; return 1; }
158
+ }
159
+
160
+ # --- AC-002: assistant-text anchored intent emits candidate with distinct source ---
161
+
162
+ test_when_assistant_text_has_anchored_lets_also_then_backlog_candidate_with_assistant_deferral_source() {
163
+ local root; root="$(seed_project)"; trap "rm -rf $root" RETURN
164
+ local tx="$root/transcript.jsonl"
165
+ append_text_event "$tx" "assistant" "Let's also test the empty-state flow"
166
+ run_hook "$root" "$tx"
167
+ local pf; pf="$(pending_path "$root")"
168
+
169
+ assert_grep_count "$pf" '^## CANDIDATE: backlog → ' 1 "AC-002 expected exactly 1 backlog CANDIDATE" || return 1
170
+ assert_file_contains "$pf" "- Role: assistant" "AC-002 role=assistant missing" || return 1
171
+ assert_file_contains "$pf" "- Source: assistant-deferral" "AC-002 source=assistant-deferral missing" || return 1
172
+ assert_file_contains "$pf" "Let's also test the empty-state flow" "AC-002 verbatim missing" || return 1
173
+ }
174
+
175
+ # --- AC-003: mid-sentence trigger MUST NOT emit ------------------------------
176
+
177
+ test_when_intent_mid_sentence_then_no_backlog_candidate() {
178
+ local root; root="$(seed_project)"; trap "rm -rf $root" RETURN
179
+ local tx="$root/transcript.jsonl"
180
+ append_text_event "$tx" "user" "we discussed the next section of the document is here and TODO appears mid-line"
181
+ run_hook "$root" "$tx"
182
+ local pf; pf="$(pending_path "$root")"
183
+
184
+ assert_grep_count "$pf" '^## CANDIDATE: backlog → ' 0 "AC-003 mid-sentence MUST NOT emit candidate" || return 1
185
+ }
186
+
187
+ # --- AC-003 noise filter: system-reminder block suppressed -------------------
188
+
189
+ test_when_intent_in_system_reminder_block_then_no_backlog_candidate() {
190
+ local root; root="$(seed_project)"; trap "rm -rf $root" RETURN
191
+ local tx="$root/transcript.jsonl"
192
+ append_text_event "$tx" "user" $'<system-reminder>\nTODO: should be filtered\n</system-reminder>'
193
+ run_hook "$root" "$tx"
194
+ local pf; pf="$(pending_path "$root")"
195
+
196
+ assert_grep_count "$pf" '^## CANDIDATE: backlog → ' 0 "AC-003 system-reminder block MUST be filtered" || return 1
197
+ }
198
+
199
+ # --- AC-001 bullet anchor: indented bullet emits -----------------------------
200
+
201
+ test_when_intent_at_indented_bullet_then_candidate_emitted() {
202
+ local root; root="$(seed_project)"; trap "rm -rf $root" RETURN
203
+ local tx="$root/transcript.jsonl"
204
+ append_text_event "$tx" "user" $'Some intro line.\n - TODO: handle the empty-state case'
205
+ run_hook "$root" "$tx"
206
+ local pf; pf="$(pending_path "$root")"
207
+
208
+ assert_grep_count "$pf" '^## CANDIDATE: backlog → ' 1 "AC-001 bullet-anchored intent should emit 1" || return 1
209
+ assert_file_contains "$pf" "TODO: handle the empty-state case" "AC-001 bullet verbatim missing" || return 1
210
+ }
211
+
212
+ # --- AC-001 zero-content guard: empty intent after trigger strip -------------
213
+
214
+ test_when_intent_text_empty_after_trigger_strip_then_no_candidate() {
215
+ local root; root="$(seed_project)"; trap "rm -rf $root" RETURN
216
+ local tx="$root/transcript.jsonl"
217
+ append_text_event "$tx" "user" "TODO:"
218
+ run_hook "$root" "$tx"
219
+ local pf; pf="$(pending_path "$root")"
220
+
221
+ assert_grep_count "$pf" '^## CANDIDATE: backlog → ' 0 "AC-001 zero-content intent MUST NOT emit" || return 1
222
+ }
223
+
224
+ # --- AC-010: same 8-word prefix → distinct keys via hash suffix --------------
225
+
226
+ test_when_two_intents_same_8word_prefix_then_distinct_keys() {
227
+ local root; root="$(seed_project)"; trap "rm -rf $root" RETURN
228
+ local tx="$root/transcript.jsonl"
229
+ append_text_event "$tx" "user" "next we should add retry logic"
230
+ append_text_event "$tx" "user" "next we should add retry tests"
231
+ run_hook "$root" "$tx"
232
+ local pf; pf="$(pending_path "$root")"
233
+
234
+ assert_grep_count "$pf" '^## CANDIDATE: backlog → ' 2 "AC-010 expected 2 distinct backlog candidates" || return 1
235
+
236
+ # Both keys present and distinct
237
+ local keys
238
+ keys="$(grep -oE '^## CANDIDATE: backlog → \S+' "$pf" | sort -u)"
239
+ local count; count="$(printf '%s\n' "$keys" | wc -l | tr -d ' ')"
240
+ [ "$count" -eq 2 ] || { fail "AC-010 expected 2 distinct keys; got $count :: $keys"; return 1; }
241
+ }
242
+
243
+ # --- AC-001/AC-002 dedup: same intent repeated in turn -----------------------
244
+
245
+ test_when_same_intent_repeated_in_turn_then_within_session_dedup_holds() {
246
+ local root; root="$(seed_project)"; trap "rm -rf $root" RETURN
247
+ local tx="$root/transcript.jsonl"
248
+ # Same intent: 2 user events + 1 assistant event = 2 distinct candidates
249
+ # (one per role-source combination).
250
+ append_text_event "$tx" "user" "TODO: dedup me please"
251
+ append_text_event "$tx" "user" "TODO: dedup me please"
252
+ append_text_event "$tx" "assistant" "TODO: dedup me please"
253
+ run_hook "$root" "$tx"
254
+ local pf; pf="$(pending_path "$root")"
255
+
256
+ assert_grep_count "$pf" '^## CANDIDATE: backlog → ' 2 "expected 2 (one per role); got drift" || return 1
257
+ assert_grep_count "$pf" '^- Source: user-instruction' 1 "expected 1 user-instruction candidate" || return 1
258
+ assert_grep_count "$pf" '^- Source: assistant-deferral' 1 "expected 1 assistant-deferral candidate" || return 1
259
+ }
260
+
261
+ # --- AC-004 byte-parity: file-touch path unchanged ---------------------------
262
+
263
+ test_when_turn_edits_files_no_intent_then_landmark_candidates_byte_identical() {
264
+ local root; root="$(seed_project)"; trap "rm -rf $root" RETURN
265
+ local tx="$root/transcript.jsonl"
266
+ # Three file-touch events, zero intent text. The shape that produces the
267
+ # captured baseline fixture.
268
+ append_tool_use_event "$tx" "Edit" "src/foo.py"
269
+ append_tool_use_event "$tx" "Edit" "src/bar.py"
270
+ append_tool_use_event "$tx" "Edit" "src/foo.py"
271
+ run_hook "$root" "$tx"
272
+ local pf; pf="$(pending_path "$root")"
273
+
274
+ if [ ! -f "$LANDMARK_BASELINE" ]; then
275
+ fail "AC-004 baseline fixture missing at $LANDMARK_BASELINE — implement worker must capture pre-extension output before adding intent extraction"
276
+ return 1
277
+ fi
278
+
279
+ # Backlog section MUST be empty for this input (no intent text)
280
+ assert_grep_count "$pf" '^## CANDIDATE: backlog → ' 0 "AC-004 no-intent input must produce zero backlog candidates" || return 1
281
+
282
+ # Canonicalized (timestamp-stripped) output MUST byte-match the baseline.
283
+ local actual_canon expected_canon
284
+ actual_canon="$(canonicalize_pending "$pf")"
285
+ expected_canon="$(canonicalize_pending "$LANDMARK_BASELINE")"
286
+ if [ "$actual_canon" = "$expected_canon" ]; then return 0; fi
287
+ fail "AC-004 landmark output diverged from baseline (canonicalized diff below)"
288
+ diff <(printf '%s' "$expected_canon") <(printf '%s' "$actual_canon") | head -30
289
+ return 1
290
+ }
291
+
292
+ # --- AC-012 regression trap: no-intent leaves backlog section unchanged ------
293
+
294
+ test_when_no_intent_text_in_turn_then_backlog_section_byte_identical_to_pre_change() {
295
+ local root; root="$(seed_project)"; trap "rm -rf $root" RETURN
296
+ local tx="$root/transcript.jsonl"
297
+ # Pure text content, no intent triggers. Hook should produce ZERO backlog
298
+ # candidates and leave the rest of _pending.md byte-stable.
299
+ append_text_event "$tx" "user" "general question about how the system works"
300
+ append_text_event "$tx" "assistant" "Here is a long explanation with no future-intent triggers in it."
301
+ run_hook "$root" "$tx"
302
+ local pf; pf="$(pending_path "$root")"
303
+
304
+ assert_grep_count "$pf" '^## CANDIDATE: backlog → ' 0 "AC-012 no-intent input must produce zero backlog candidates" || return 1
305
+ # Body below the front-matter separator should remain the skeleton form
306
+ # (no CANDIDATE blocks of any kind, since there are no file edits either).
307
+ assert_grep_count "$pf" '^## CANDIDATE:' 0 "AC-012 expected zero CANDIDATE blocks for pure text/no-edits turn" || return 1
308
+ }
309
+
310
+ # --- runner ------------------------------------------------------------------
311
+
312
+ run test_when_user_prompt_has_anchored_todo_then_backlog_candidate_emitted
313
+ run test_when_assistant_text_has_anchored_lets_also_then_backlog_candidate_with_assistant_deferral_source
314
+ run test_when_intent_mid_sentence_then_no_backlog_candidate
315
+ run test_when_intent_in_system_reminder_block_then_no_backlog_candidate
316
+ run test_when_intent_at_indented_bullet_then_candidate_emitted
317
+ run test_when_intent_text_empty_after_trigger_strip_then_no_candidate
318
+ run test_when_two_intents_same_8word_prefix_then_distinct_keys
319
+ run test_when_same_intent_repeated_in_turn_then_within_session_dedup_holds
320
+ run test_when_turn_edits_files_no_intent_then_landmark_candidates_byte_identical
321
+ run test_when_no_intent_text_in_turn_then_backlog_section_byte_identical_to_pre_change
322
+
323
+ echo "----"
324
+ echo "Passed: $PASS Failed: $FAIL"
325
+ if [ "$FAIL" -gt 0 ]; then
326
+ echo "Failed tests:"
327
+ for t in "${FAILED[@]}"; do echo " - $t"; done
328
+ fi
329
+ exit $((FAIL > 0))
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env bash
2
+ # Integration test for the regenerate-ac008.sh helper.
3
+ # Covers AC-001 from docs/specs/workflow-loop-closing-hygiene.md.
4
+ #
5
+ # Contract: bash .claude/hooks/tests/fixtures/regenerate-ac008.sh overwrites
6
+ # .claude/hooks/tests/fixtures/ac008_byte_equal_reference.txt with the live
7
+ # memory_session_start.sh output (header + table block through pending-questions.md),
8
+ # with HEAD normalized to the `n/a` sentinel. After regen, the AC-008 byte-equality
9
+ # test inside memory_session_start_test.sh exits PASS.
10
+
11
+ set -uo pipefail
12
+
13
+ HERE="$(cd "$(dirname "$0")" && pwd)"
14
+ REPO_ROOT="$(cd "$HERE/../../.." && pwd)"
15
+ HELPER="$REPO_ROOT/.claude/hooks/tests/fixtures/regenerate-ac008.sh"
16
+ FIXTURE="$REPO_ROOT/.claude/hooks/tests/fixtures/ac008_byte_equal_reference.txt"
17
+ TEST_RUNNER="$REPO_ROOT/.claude/hooks/tests/memory_session_start_test.sh"
18
+
19
+ PASS=0; FAIL=0; FAILED=()
20
+
21
+ fail() { echo " FAIL: $*"; return 1; }
22
+
23
+ run() {
24
+ local name="$1"
25
+ echo "RUN $name"
26
+ if "$name"; then
27
+ PASS=$((PASS+1)); echo "PASS $name"
28
+ else
29
+ FAIL=$((FAIL+1)); FAILED+=("$name"); echo "FAIL $name"
30
+ fi
31
+ }
32
+
33
+ # --- tests --------------------------------------------------------------------
34
+
35
+ test_when_regenerate_ac008_runs_then_existing_ac008_test_passes() {
36
+ if [ ! -f "$HELPER" ]; then
37
+ fail "AC-001 helper not found at $HELPER"
38
+ return 1
39
+ fi
40
+ if [ ! -x "$HELPER" ] && ! head -1 "$HELPER" | grep -q '^#!'; then
41
+ fail "AC-001 helper $HELPER is not executable and has no shebang"
42
+ return 1
43
+ fi
44
+ # Stash the existing fixture so we can restore it if the test fails mid-flow.
45
+ local stash; stash="$(mktemp)"
46
+ cp "$FIXTURE" "$stash" 2>/dev/null || true
47
+ # Run the regenerator; the fixture is overwritten with normalized capture.
48
+ ( cd "$REPO_ROOT" && bash "$HELPER" ) >/dev/null 2>&1 \
49
+ || { fail "AC-001 regenerate helper exited non-zero"; cp "$stash" "$FIXTURE" 2>/dev/null; rm -f "$stash"; return 1; }
50
+ # The AC-008 byte-equal test must now pass against the freshly regenerated fixture.
51
+ ( cd "$REPO_ROOT" && bash "$TEST_RUNNER" >/dev/null 2>&1 ) \
52
+ || { fail "AC-001 memory_session_start_test.sh exited non-zero after regen"; cp "$stash" "$FIXTURE" 2>/dev/null; rm -f "$stash"; return 1; }
53
+ rm -f "$stash"
54
+ return 0
55
+ }
56
+
57
+ test_when_regenerate_ac008_runs_twice_then_fixture_is_byte_identical() {
58
+ if [ ! -f "$HELPER" ]; then
59
+ fail "AC-001 helper not found at $HELPER"
60
+ return 1
61
+ fi
62
+ local first second
63
+ ( cd "$REPO_ROOT" && bash "$HELPER" ) >/dev/null 2>&1 \
64
+ || { fail "AC-001 first regen exited non-zero"; return 1; }
65
+ first="$(sha256sum "$FIXTURE" 2>/dev/null | awk '{print $1}')"
66
+ ( cd "$REPO_ROOT" && bash "$HELPER" ) >/dev/null 2>&1 \
67
+ || { fail "AC-001 second regen exited non-zero"; return 1; }
68
+ second="$(sha256sum "$FIXTURE" 2>/dev/null | awk '{print $1}')"
69
+ if [ "$first" = "$second" ]; then return 0; fi
70
+ fail "AC-001 fixture not byte-identical across two regens ($first vs $second)"
71
+ return 1
72
+ }
73
+
74
+ test_when_regenerate_ac008_runs_then_fixture_head_line_is_n_a_sentinel() {
75
+ if [ ! -f "$HELPER" ]; then
76
+ fail "AC-001 helper not found at $HELPER"
77
+ return 1
78
+ fi
79
+ ( cd "$REPO_ROOT" && bash "$HELPER" ) >/dev/null 2>&1 \
80
+ || { fail "AC-001 regen exited non-zero"; return 1; }
81
+ if grep -qE '^HEAD: `n/a`' "$FIXTURE"; then return 0; fi
82
+ fail "AC-001 fixture HEAD line is not the n/a sentinel"
83
+ grep -E '^HEAD:' "$FIXTURE" || true
84
+ return 1
85
+ }
86
+
87
+ # --- runner -------------------------------------------------------------------
88
+
89
+ run test_when_regenerate_ac008_runs_then_existing_ac008_test_passes
90
+ run test_when_regenerate_ac008_runs_twice_then_fixture_is_byte_identical
91
+ run test_when_regenerate_ac008_runs_then_fixture_head_line_is_n_a_sentinel
92
+
93
+ echo "----"
94
+ echo "Passed: $PASS Failed: $FAIL"
95
+ if [ "$FAIL" -gt 0 ]; then
96
+ echo "Failed tests:"
97
+ for t in "${FAILED[@]}"; do echo " - $t"; done
98
+ fi
99
+ exit $((FAIL > 0))
@@ -12,6 +12,7 @@ Persistent project knowledge that travels with the repo. Loaded into Claude's co
12
12
  | `landmines.md` | `security`, `integrate`, `scout` | Gotchas: "do not edit X without also editing Y" |
13
13
  | `conventions.md` | `scenario`, `implement` | Repo-specific test/code idioms (fixture patterns, naming, layout) |
14
14
  | `pending-questions.md` | any phase | Open questions the current session couldn't resolve |
15
+ | `backlog.md` | `/memory-flush` | Future-work intent captured automatically by `memory_stop.sh` (intent-line extraction from user prompts and assistant text). Stale-exempt. |
15
16
  | `_pending.md` | `memory_stop.sh` (writes), `/memory-flush` (clears) | Auto-extracted candidates awaiting curation. **Content gitignored**; the file structure is committed. |
16
17
  | `_resume.md` | `memory_pre_compact.sh` + `memory_stop.sh` (write), `memory_session_start.sh` (reads), `harness` (reads) | **Continuity** snapshot — last completed phase, next phase due, in-flight files, recent user prompts. Refreshed every turn-end and again before compaction. Re-injected at every session start (compact / clear / resume / startup). **Gitignored** — pure session state, not project knowledge. |
17
18
 
@@ -23,6 +24,7 @@ Every entry MUST carry a `source:` field declaring how the rule was learned. All
23
24
  |---|---|---|
24
25
  | `user-instruction` | The user stated a rule or directive in conversation | **Required** |
25
26
  | `user-feedback` | The user corrected behavior or affirmed a non-obvious approach | **Required** |
27
+ | `assistant-deferral` | Claude verbalized a deferred follow-up during conversation (captured by `memory_stop.sh` intent extraction into `backlog.md`) | **Required** (Claude's own sentence as verbatim) |
26
28
  | `incident` | Recovered from an actual failure or near-miss in this session | Recommended (incident-report quote) |
27
29
  | `inferred-from-code` | Derived by reading the codebase | Not applicable |
28
30
  | `library-pinned` | Came from a `context7` lookup | Not applicable (cited URL/version is the source) |
@@ -32,7 +34,7 @@ For `source: user-instruction` and `source: user-feedback`, the entry MUST inclu
32
34
 
33
35
  The verbatim is not a summary, not a paraphrase, and not in Claude's voice. It is the user's words. If the original turn is no longer available, the entry's source is `unrecorded` and the curator MUST flag it for the user to confirm or restate at the next opportunity.
34
36
 
35
- `/memory-flush` SHALL reject any candidate promotion to a canonical file when `source` is `user-instruction` or `user-feedback` and `verbatim:` is missing or empty.
37
+ `/memory-flush` SHALL reject any candidate promotion to a canonical file when `source` is `user-instruction`, `user-feedback`, or `assistant-deferral` and `verbatim:` is missing or empty.
36
38
 
37
39
  ## Per-entry shape (canonical files)
38
40
 
@@ -61,6 +63,7 @@ Multiple verbatim blocks are allowed (and encouraged) when the user clarifies or
61
63
  | `landmines.md` | `path:line` or short description slug |
62
64
  | `conventions.md` | short slug |
63
65
  | `pending-questions.md` | auto-numbered `Q-NNN` |
66
+ | `backlog.md` | `<8-word-kebab-slug>-<4-char-sha256>` (derived by `memory_stop.sh` from the intent verbatim) |
64
67
 
65
68
  ## Self-healing rules
66
69
 
@@ -78,7 +81,7 @@ Two optional, register-specific closure fields cause `/memory-flush` Step 0 to d
78
81
  | File | Field | Semantics |
79
82
  |---|---|---|
80
83
  | `pending-questions.md` | `resolved-at: <ISO date>` | The question has been answered; entry is closed. |
81
- | `landmarks.md`, `libraries.md`, `decisions.md`, `landmines.md`, `conventions.md` | `superseded-at: <ISO date>` | The fact is no longer true; entry is closed. |
84
+ | `landmarks.md`, `libraries.md`, `decisions.md`, `landmines.md`, `conventions.md`, `backlog.md` | `superseded-at: <ISO date>` | The fact (or, for `backlog.md`, the open intent) is no longer current; entry is closed. On `backlog.md` the body `status:` field (`picked-up` / `dropped`) disambiguates which transition triggered the close. |
82
85
 
83
86
  **Per-file invariant**: on `pending-questions.md`, `superseded-at:` MUST NOT appear; on the other five canonical files, `resolved-at:` MUST NOT appear. Mutually exclusive at the file level. Not enforced by audit — documented invariant only. The `/memory-flush` Step 0a sweep flags violations in its report rather than deleting.
84
87
 
@@ -92,6 +95,8 @@ A match without a corresponding structured closure field causes `/memory-flush`
92
95
 
93
96
  **Closure short-circuits decay (AC-005).** `memory_session_start.sh` excludes any entry carrying a closure field from the stale count. `stale` ≠ `closed`: a stale entry is *unverified*; closure is a separate, deliberate signal that the entry is no longer load-bearing.
94
97
 
98
+ **Automated closure-stamp on backlog pickup.** When `/triage` records a workflow that picks up a backlog entry (the `workflow.json → source_backlog_keys` array carries the entry's stable key), `/commit` Step 6 invokes `python3 .claude/skills/memory-flush/sweep.py --mode stamp-closure --memory-dir .claude/memory --backlog-keys <csv>` after `git commit` succeeds. The mode writes `status: picked-up` + `superseded-at: <today>` to each named entry; the next `/memory-flush` Step 0a auto-deletes them. `/commit` is the only caller of this mode; `sweep.py` is the only writer to `backlog.md` during closure-stamping — the curator-not-writer pattern is preserved through the actuator boundary.
99
+
95
100
  ## How memory gets updated
96
101
 
97
102
  Two paths:
@@ -105,4 +110,4 @@ Two paths:
105
110
 
106
111
  ## Continuity vs knowledge
107
112
 
108
- Six canonical files plus `_pending.md` hold **project knowledge** — facts about the codebase that survive multiple sessions and get re-verified on every cite. `_resume.md` is different: it's a **continuity snapshot** describing the *current session* — what we just touched, what the user just asked, what phase we're on. It's overwritten each turn and gitignored. The split keeps long-term knowledge clean of session-state noise.
113
+ Seven canonical files plus `_pending.md` hold **project knowledge** — facts about the codebase that survive multiple sessions and get re-verified on every cite. `_resume.md` is different: it's a **continuity snapshot** describing the *current session* — what we just touched, what the user just asked, what phase we're on. It's overwritten each turn and gitignored. The split keeps long-term knowledge clean of session-state noise.
@@ -0,0 +1,12 @@
1
+ ---
2
+ owners: [/memory-flush]
3
+ category: future-work intent
4
+ size-cap: 500
5
+ key: <slug>-<4char-hash>
6
+ verifies-against: none
7
+ stale-exempt: true
8
+ ---
9
+
10
+ # Backlog
11
+
12
+ (populated by /memory-flush from auto-extracted candidates)
@@ -185,7 +185,12 @@
185
185
  },
186
186
  "consent": {
187
187
  "commit_ttl_seconds": 300,
188
- "gate_marker_ttl_seconds": 120
188
+ "gate_marker_ttl_seconds": 120,
189
+ "push_ttl_seconds": 300
190
+ },
191
+ "git": {
192
+ "protected_branches": null,
193
+ "branch_pattern": null
189
194
  },
190
195
  "swarm": {
191
196
  "max_parallel": 4,
@@ -7,7 +7,7 @@
7
7
  "matcher": "Bash",
8
8
  "hooks": [
9
9
  { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/destructive_cmd_guard.sh" },
10
- { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/git_commit_guard.sh" },
10
+ { "type": "command", "command": "node $CLAUDE_PROJECT_DIR/.claude/hooks/git_commit_guard.mjs" },
11
11
  { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/process_lifecycle_guard.sh" }
12
12
  ]
13
13
  },
@@ -18,7 +18,7 @@
18
18
  { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/env_guard.sh" },
19
19
  { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/spec_approval_guard.sh" },
20
20
  { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/swarm_approval_guard.sh" },
21
- { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/git_commit_guard.sh" },
21
+ { "type": "command", "command": "node $CLAUDE_PROJECT_DIR/.claude/hooks/git_commit_guard.mjs" },
22
22
  { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/verify_pass_guard.sh" },
23
23
  { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/track_guard.sh" },
24
24
  { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/artifact_template_guard.sh" },
@@ -38,7 +38,7 @@
38
38
  "UserPromptSubmit": [
39
39
  {
40
40
  "hooks": [
41
- { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/consent_gate_grant.sh" }
41
+ { "type": "command", "command": "node $CLAUDE_PROJECT_DIR/.claude/hooks/consent_gate_grant.mjs" }
42
42
  ]
43
43
  }
44
44
  ],
@@ -90,7 +90,6 @@
90
90
  "Bash(git blame:*)"
91
91
  ],
92
92
  "deny": [
93
- "Bash(git push:*)",
94
93
  "Bash(git commit --amend:*)",
95
94
  "Bash(git reset --hard:*)",
96
95
  "Bash(git clean -f:*)",