@su-record/vibe 2.9.13 → 2.9.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.ko.md +2 -29
  2. package/README.md +2 -29
  3. package/dist/cli/commands/info.d.ts.map +1 -1
  4. package/dist/cli/commands/info.js +3 -3
  5. package/dist/cli/commands/info.js.map +1 -1
  6. package/dist/cli/postinstall/constants.d.ts.map +1 -1
  7. package/dist/cli/postinstall/constants.js +2 -0
  8. package/dist/cli/postinstall/constants.js.map +1 -1
  9. package/dist/cli/postinstall/main.d.ts.map +1 -1
  10. package/dist/cli/postinstall/main.js +62 -52
  11. package/dist/cli/postinstall/main.js.map +1 -1
  12. package/dist/cli/setup/ProjectSetup.d.ts.map +1 -1
  13. package/dist/cli/setup/ProjectSetup.js +15 -0
  14. package/dist/cli/setup/ProjectSetup.js.map +1 -1
  15. package/dist/cli/utils/cli-detector.d.ts +14 -1
  16. package/dist/cli/utils/cli-detector.d.ts.map +1 -1
  17. package/dist/cli/utils/cli-detector.js +36 -1
  18. package/dist/cli/utils/cli-detector.js.map +1 -1
  19. package/dist/infra/lib/llm-availability.d.ts +5 -2
  20. package/dist/infra/lib/llm-availability.d.ts.map +1 -1
  21. package/dist/infra/lib/llm-availability.js +11 -4
  22. package/dist/infra/lib/llm-availability.js.map +1 -1
  23. package/dist/infra/orchestrator/LLMCluster.d.ts +11 -2
  24. package/dist/infra/orchestrator/LLMCluster.d.ts.map +1 -1
  25. package/dist/infra/orchestrator/LLMCluster.js +25 -5
  26. package/dist/infra/orchestrator/LLMCluster.js.map +1 -1
  27. package/hooks/hooks.json +10 -58
  28. package/hooks/scripts/__tests__/pre-tool-guard.test.js +4 -3
  29. package/hooks/scripts/__tests__/sentinel-guard.test.js +6 -8
  30. package/hooks/scripts/figma-guard.js +2 -3
  31. package/hooks/scripts/lib/dispatcher.js +83 -0
  32. package/hooks/scripts/llm-orchestrate.js +84 -11
  33. package/hooks/scripts/post-edit-dispatcher.js +24 -0
  34. package/hooks/scripts/pre-tool-dispatcher.js +31 -0
  35. package/hooks/scripts/pre-tool-guard.js +2 -3
  36. package/hooks/scripts/prompt-dispatcher.js +5 -0
  37. package/hooks/scripts/sentinel-guard.js +2 -3
  38. package/hooks/scripts/stop-dispatcher.js +27 -0
  39. package/package.json +1 -1
  40. package/skills/rob-pike/SKILL.md +64 -0
  41. package/skills/systematic-debugging/SKILL.md +140 -0
@@ -1,11 +1,11 @@
1
1
  /**
2
- * UserPromptSubmit Hook - LLM 오케스트레이션 (GPT/Gemini)
2
+ * UserPromptSubmit Hook - LLM 오케스트레이션 (GPT/Gemini/Claude)
3
3
  *
4
4
  * Usage:
5
5
  * node llm-orchestrate.js <provider> <mode> "prompt"
6
6
  * node llm-orchestrate.js <provider> <mode> "systemPrompt" "prompt"
7
7
  *
8
- * provider: gpt | gemini
8
+ * provider: gpt | gemini | claude
9
9
  * mode: orchestrate | orchestrate-json | image | analyze-image
10
10
  *
11
11
  * Image Mode:
@@ -13,7 +13,7 @@
13
13
  * node llm-orchestrate.js gemini image "prompt" --output "./image.png" --size "1920x1080"
14
14
  *
15
15
  * Features:
16
- * - CLI-based: GPT → codex exec, Gemini → gemini -p
16
+ * - CLI-based: GPT → codex exec, Gemini → gemini -p, Claude → claude --print
17
17
  * - Exponential backoff retry (3 attempts)
18
18
  * - Auto fallback: gpt ↔ gemini
19
19
  * - Overload/rate-limit detection
@@ -103,9 +103,31 @@ function resolveModel(providerName, config) {
103
103
  if (providerName === 'gpt-codex') return config.models?.gptCodex || 'gpt-5.3-codex';
104
104
  if (providerName === 'gpt') return config.models?.gpt || 'gpt-5.4';
105
105
  if (providerName === 'gemini') return config.models?.gemini || 'gemini-3.1-pro-preview';
106
+ if (providerName === 'claude') return 'claude';
106
107
  return providerName;
107
108
  }
108
109
 
110
+ /**
111
+ * 주관 LLM 자동 감지 — Claude가 보조로 사용되어야 하는 환경인지 판별
112
+ *
113
+ * true인 경우:
114
+ * - vibe-codex: ANTHROPIC_BASE_URL이 localhost (프록시 모드)
115
+ * - coco: ~/.coco/ 존재 또는 COCO_HOME 설정
116
+ * - 명시적: VIBE_SECONDARY_LLM=claude
117
+ */
118
+ function useClaudeAsSecondary() {
119
+ // 1. 명시적 환경변수
120
+ if (process.env.VIBE_SECONDARY_LLM === 'claude') return true;
121
+ // 2. vibe-codex 프록시 모드
122
+ const baseUrl = process.env.ANTHROPIC_BASE_URL || '';
123
+ if (baseUrl.includes('localhost') || baseUrl.includes('127.0.0.1')) return true;
124
+ // 3. coco 환경
125
+ if (process.env.COCO_HOME) return true;
126
+ const cocoDir = path.join(os.homedir(), '.coco');
127
+ if (fs.existsSync(cocoDir)) return true;
128
+ return false;
129
+ }
130
+
109
131
  // Errors that should skip retry and go to fallback immediately
110
132
  const SKIP_RETRY_PATTERNS = [
111
133
  /rate.?limit/i,
@@ -283,6 +305,43 @@ function callGeminiCli(prompt, sysPrompt, jsonMode, model, timeoutMs) {
283
305
  });
284
306
  }
285
307
 
308
+ function callClaudeCli(prompt, sysPrompt, jsonMode, timeoutMs) {
309
+ const fullPrompt = buildCliPrompt(prompt, sysPrompt, jsonMode);
310
+ const args = ['--print', '--dangerously-skip-permissions'];
311
+ const effectiveTimeout = timeoutMs || CLI_TIMEOUT_MS;
312
+
313
+ // 재귀 가드 — 자식 Claude 세션의 UserPromptSubmit hook이 또 claude CLI를
314
+ // spawn하는 포크 폭탄을 차단 (prompt-dispatcher.js가 이 env를 보고 즉시 종료).
315
+ const currentDepth = parseInt(process.env.VIBE_HOOK_DEPTH || '0', 10);
316
+ const childEnv = { ...process.env, VIBE_HOOK_DEPTH: String(currentDepth + 1) };
317
+
318
+ return new Promise((resolve, reject) => {
319
+ const proc = spawnCli('claude', args, {
320
+ stdio: ['pipe', 'pipe', 'pipe'],
321
+ timeout: effectiveTimeout,
322
+ env: childEnv,
323
+ });
324
+ proc.stdin.end(fullPrompt);
325
+
326
+ let stdout = '';
327
+ let stderr = '';
328
+ proc.stdout.on('data', (d) => { stdout += d.toString(); });
329
+ proc.stderr.on('data', (d) => { stderr += d.toString(); });
330
+
331
+ proc.on('close', (code) => {
332
+ if (code === 0 && stdout.trim()) {
333
+ resolve(stdout.trim());
334
+ } else {
335
+ reject(new Error(`claude cli failed (code ${code}): ${(stderr || stdout).slice(0, 500)}`));
336
+ }
337
+ });
338
+
339
+ proc.on('error', (err) => {
340
+ reject(new Error(`claude cli spawn error: ${err.message}`));
341
+ });
342
+ });
343
+ }
344
+
286
345
  async function callProvider(providerName, prompt, sysPrompt, jsonMode, timeoutMs) {
287
346
  const vibeConfig = readVibeConfig();
288
347
 
@@ -303,6 +362,10 @@ async function callProvider(providerName, prompt, sysPrompt, jsonMode, timeoutMs
303
362
  return await callGeminiCli(prompt, sysPrompt, jsonMode, model, timeoutMs);
304
363
  }
305
364
 
365
+ if (providerName === 'claude') {
366
+ return await callClaudeCli(prompt, sysPrompt, jsonMode, timeoutMs);
367
+ }
368
+
306
369
  throw new Error(`Unknown provider: ${providerName}`);
307
370
  }
308
371
 
@@ -526,14 +589,24 @@ async function main() {
526
589
  }
527
590
 
528
591
  // Provider chain: primary → cross fallback
529
- // WHY GPT → Gemini (not reverse): GPT is the primary code/reasoning model;
530
- // Gemini serves as cross-vendor fallback so a single vendor outage never
531
- // blocks the user. When Gemini is primary (e.g. web-search), GPT is fallback.
532
- const providerLabels = { gpt: 'GPT', 'gpt-codex': 'GPT Codex', gemini: 'Gemini' };
533
- const isGpt = provider === 'gpt' || provider === 'gpt-codex';
534
- const providerChain = isGpt
535
- ? [provider, 'gemini']
536
- : ['gemini', 'gpt'];
592
+ // 프록시 모드 (주관=GPT): 보조로 Claude CLI 사용
593
+ // 직접 모드 (주관=Claude): 보조로 GPT/Gemini 사용
594
+ const providerLabels = { gpt: 'GPT', 'gpt-codex': 'GPT Codex', gemini: 'Gemini', claude: 'Claude' };
595
+ const isGpt = provider === 'gpt' || provider === 'gpt-codex' || provider === 'gpt-spark';
596
+ const isClaude = provider === 'claude';
597
+ const claudeSecondary = useClaudeAsSecondary();
598
+
599
+ let providerChain;
600
+ if (isClaude) {
601
+ // 명시적 claude 호출
602
+ providerChain = ['claude', 'gemini'];
603
+ } else if (isGpt) {
604
+ // GPT 주관 → claude fallback (vibe-codex/coco), gemini fallback (직접 모드)
605
+ providerChain = claudeSecondary ? [provider, 'claude'] : [provider, 'gemini'];
606
+ } else {
607
+ // gemini 주관 → claude fallback (vibe-codex/coco), gpt fallback (직접 모드)
608
+ providerChain = claudeSecondary ? ['gemini', 'claude'] : ['gemini', 'gpt'];
609
+ }
537
610
 
538
611
  const vibeConfig = readVibeConfig();
539
612
 
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * PostToolUse dispatcher — Write/Edit 이후 순차 실행.
4
+ *
5
+ * 기존: PostToolUse.Write|Edit 배열에 3개 스크립트가 병렬 spawn (프로세스 피크 3배)
6
+ * + PostToolUse.Edit 추가로 post-edit.js 1개 더
7
+ * 현재: 단일 디스패처에서 순차 실행. config.hooks.{name}.enabled로 개별 토글.
8
+ *
9
+ * 실행 순서:
10
+ * 1. auto-format — 코드 스타일 정규화
11
+ * 2. code-check — 린트/품질 검사
12
+ * 3. auto-test — 관련 테스트 실행
13
+ * 4. post-edit — Edit 전용 후처리 (Write에서는 스크립트 내부에서 스킵)
14
+ *
15
+ * 실패 격리: 한 스크립트 실패해도 다음은 계속 진행.
16
+ */
17
+ import { dispatch } from './lib/dispatcher.js';
18
+
19
+ await dispatch([
20
+ { name: 'auto-format', script: 'auto-format.js' },
21
+ { name: 'code-check', script: 'code-check.js' },
22
+ { name: 'auto-test', script: 'auto-test.js' },
23
+ { name: 'post-edit', script: 'post-edit.js' },
24
+ ]);
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * PreToolUse dispatcher — Bash/Edit/Write 공용.
4
+ *
5
+ * 기존: matcher별로 2~3개 스크립트가 병렬 spawn.
6
+ * - Bash: sentinel-guard + pre-tool-guard + command-log
7
+ * - Edit: sentinel-guard + pre-tool-guard
8
+ * - Write: sentinel-guard + pre-tool-guard
9
+ * 현재: 단일 디스패처가 tool name을 인자로 받아 순차 실행.
10
+ *
11
+ * Deny 시맨틱 보존:
12
+ * sentinel-guard / pre-tool-guard가 exit 2(deny)를 반환하면 dispatcher도
13
+ * 즉시 exit 2로 상위에 전파 → Claude Code가 도구 실행을 차단.
14
+ *
15
+ * 사용법: node pre-tool-dispatcher.js <Bash|Edit|Write>
16
+ */
17
+ import { dispatch } from './lib/dispatcher.js';
18
+
19
+ const toolName = process.argv[2] || '';
20
+
21
+ const steps = [
22
+ { name: 'sentinel-guard', script: 'sentinel-guard.js', args: [toolName], denyOnExit2: true },
23
+ { name: 'pre-tool-guard', script: 'pre-tool-guard.js', args: [toolName], denyOnExit2: true },
24
+ ];
25
+
26
+ // command-log은 Bash 전용
27
+ if (toolName === 'Bash') {
28
+ steps.push({ name: 'command-log', script: 'command-log.js' });
29
+ }
30
+
31
+ await dispatch(steps);
@@ -160,10 +160,9 @@ function formatOutput(toolName, validation) {
160
160
  function readStdinSync() {
161
161
  try {
162
162
  if (process.stdin.isTTY) return null;
163
- const fd = fs.openSync('/dev/stdin', 'r');
163
+ // fd 0을 직접 사용 (Windows는 '/dev/stdin' 없음)
164
164
  const buf = Buffer.alloc(65536);
165
- const bytesRead = fs.readSync(fd, buf, 0, buf.length, null);
166
- fs.closeSync(fd);
165
+ const bytesRead = fs.readSync(0, buf, 0, buf.length, null);
167
166
  if (bytesRead > 0) {
168
167
  return JSON.parse(buf.toString('utf-8', 0, bytesRead));
169
168
  }
@@ -16,6 +16,11 @@ import path from 'path';
16
16
 
17
17
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
18
18
 
19
+ // 재귀 가드 — 자식 Claude 세션에서 이 hook이 다시 실행되는 것 차단.
20
+ // llm-orchestrate.js의 callClaudeCli가 VIBE_HOOK_DEPTH=1을 주입하므로,
21
+ // 값이 있으면 즉시 종료해 프로세스 폭탄을 막는다.
22
+ if (process.env.VIBE_HOOK_DEPTH) process.exit(0);
23
+
19
24
  // stdin에서 prompt 읽기
20
25
  let inputData = '';
21
26
  for await (const chunk of process.stdin) {
@@ -98,10 +98,9 @@ import fs from 'fs';
98
98
  function readStdinSync() {
99
99
  try {
100
100
  if (process.stdin.isTTY) return null;
101
- const fd = fs.openSync('/dev/stdin', 'r');
101
+ // fd 0을 직접 사용 (Windows는 '/dev/stdin' 없음)
102
102
  const buf = Buffer.alloc(65536);
103
- const bytesRead = fs.readSync(fd, buf, 0, buf.length, null);
104
- fs.closeSync(fd);
103
+ const bytesRead = fs.readSync(0, buf, 0, buf.length, null);
105
104
  if (bytesRead > 0) {
106
105
  return JSON.parse(buf.toString('utf-8', 0, bytesRead));
107
106
  }
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Stop dispatcher — Claude 응답 종료 시 4개 스크립트 순차 실행.
4
+ *
5
+ * 기존: Stop 배열에 4개 병렬 spawn (codex-review-gate + stop-notify + auto-commit + devlog-gen)
6
+ * → auto-commit의 git cascade와 겹쳐 프로세스 폭주 유발 가능.
7
+ * 현재: 단일 디스패처에서 순차 실행.
8
+ *
9
+ * 실행 순서:
10
+ * 1. codex-review-gate — 리뷰 필요 여부 판단 (stdout → Claude 지시 주입)
11
+ * 2. stop-notify — 완료 알림
12
+ * 3. auto-commit — 변경 자동 커밋 (git hook cascade 주의)
13
+ * 4. devlog-gen — 개발 로그 기록
14
+ *
15
+ * 재귀 가드 상속: callClaudeCli가 VIBE_HOOK_DEPTH=1 env를 자식에 주입했다면
16
+ * 이 Stop dispatcher도 건너뛴다 (자식 세션에서 auto-commit 등이 돌 이유 없음).
17
+ */
18
+ import { dispatch } from './lib/dispatcher.js';
19
+
20
+ if (process.env.VIBE_HOOK_DEPTH) process.exit(0);
21
+
22
+ await dispatch([
23
+ { name: 'codex-review-gate', script: 'codex-review-gate.js' },
24
+ { name: 'stop-notify', script: 'stop-notify.js' },
25
+ { name: 'auto-commit', script: 'auto-commit.js' },
26
+ { name: 'devlog-gen', script: 'devlog-gen.js' },
27
+ ]);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@su-record/vibe",
3
- "version": "2.9.13",
3
+ "version": "2.9.15",
4
4
  "description": "AI Coding Framework for Claude Code — 56 agents, 45 skills, multi-LLM orchestration",
5
5
  "type": "module",
6
6
  "main": "dist/cli/index.js",
@@ -0,0 +1,64 @@
1
+ ---
2
+ name: rob-pike
3
+ tier: core
4
+ description: "Rob Pike's 5 Rules — block premature optimization. Auto-activates on optimize, slow, performance, cache, parallelize keywords."
5
+ triggers: [optimize, slow, performance, cache, parallelize, bottleneck, speed up, faster, latency, benchmark]
6
+ priority: 90
7
+ ---
8
+
9
+ # Rob Pike's 5 Rules of Programming
10
+
11
+ ## The Rules
12
+
13
+ 1. **You can't tell where a program is going to spend its time.** Bottlenecks occur in surprising places. Don't guess — prove it.
14
+ 2. **Measure.** Don't tune for speed until you've measured. Even then, don't unless one part of the code overwhelms the rest.
15
+ 3. **Fancy algorithms are slow when n is small, and n is usually small.** Big-O doesn't matter when constants dominate. Use Rule 2 first.
16
+ 4. **Fancy algorithms are buggier than simple ones.** Use simple algorithms and simple data structures.
17
+ 5. **Data dominates.** Choose the right data structures and the algorithms become self-evident. "Write stupid code that uses smart objects."
18
+
19
+ ## Before Any Optimization
20
+
21
+ ### Step 0: Check for Existing Instrumentation
22
+
23
+ Before asking "have you measured?", determine whether measurement is even **possible** right now.
24
+
25
+ **Scan the codebase** for signs of existing instrumentation:
26
+ - Logging: logger imports, log calls, structured logging libraries
27
+ - Profiling: profiler imports, benchmark files, tracing setup
28
+ - Timing: duration measurements, stopwatch patterns, timing decorators
29
+ - APM/Observability: metrics exports, spans, trace contexts
30
+
31
+ **Then ask the user:**
32
+
33
+ 1. If instrumentation **exists**: "I found logging/profiling in [locations]. Are there specific areas you suspect are slow, or should we look at what the existing measurements tell us?"
34
+ 2. If instrumentation is **missing**: "There's no measurement in place. Before optimizing anything — where do you suspect the bottleneck is? Let's add measurement there first, then let the data decide."
35
+
36
+ ### Step 1: Ask the Measurement Questions
37
+
38
+ Stop and ask these questions in order:
39
+
40
+ 1. **"Have I measured?"** — If no, measure first.
41
+ 2. **"Does one part overwhelm the rest?"** — If no single area dominates, nothing worth optimizing.
42
+ 3. **"What's n?"** — If n is small (and it usually is), the simple O(n^2) approach likely beats the clever O(n log n) one.
43
+ 4. **"Is this a data structure problem?"** — Before changing the algorithm, consider whether a different data structure makes the problem trivial.
44
+ 5. **"Is the added complexity worth it?"** — Simple code that is 10% slower is almost always preferable to clever code that is fragile.
45
+
46
+ ## Anti-Patterns to Block
47
+
48
+ | Impulse | Rule violated | Response |
49
+ |---|---|---|
50
+ | "This loop looks slow, let me optimize it" | Rule 1 | Have you profiled? The bottleneck may be elsewhere entirely. |
51
+ | "Let me add a cache here" | Rule 2 | Measure first. Does this path actually dominate runtime? |
52
+ | "Let me use a B-tree / trie / skip list" | Rule 3 | What's n? If small, a sorted slice + binary search wins. |
53
+ | "Let me implement a custom allocator" | Rule 4 | Start simple. Measure. Only get fancy if data forces you. |
54
+ | "The algorithm is O(n^2), needs fixing" | Rule 3 | What's n? O(n^2) with n=100 is 10us. Measure first. |
55
+ | "Let me parallelize this" | Rule 2 | Is this actually CPU-bound? Measure. Often it's I/O. |
56
+
57
+ ## When Optimization IS Justified
58
+
59
+ Proceed only when ALL of these are true:
60
+
61
+ - You have measurement data showing a specific bottleneck
62
+ - That bottleneck dominates overall runtime (not just 5-10% of it)
63
+ - The proposed fix is the simplest change that addresses the measured problem
64
+ - You will re-measure after the change to confirm improvement
@@ -0,0 +1,140 @@
1
+ ---
2
+ name: systematic-debugging
3
+ tier: core
4
+ description: "Enforce reproduce-first, root-cause-first, failing-test-first debugging. Auto-activates on bug, error, fail, broken, crash, flaky keywords."
5
+ triggers: [bug, error, fail, broken, crash, flaky, not working, regression, unexpected, stack trace, exception, debug]
6
+ priority: 90
7
+ ---
8
+
9
+ # Systematic Debugging
10
+
11
+ ## Hard Gates
12
+
13
+ These rules have NO exceptions:
14
+
15
+ 1. **Never fix before reproducing or observing the failure.**
16
+ 2. **State a root-cause hypothesis before changing code.**
17
+ 3. **Write a failing test (or equivalent) before fixing.**
18
+ 4. **Test one hypothesis at a time.**
19
+ 5. **No "while I'm here" refactoring during a fix.**
20
+ 6. **3 failed fixes → suspect structural issue, stop patching.**
21
+
22
+ These excuses are NOT allowed:
23
+ - "It looks simple, I'll just fix it"
24
+ - "No time, let me patch and move on"
25
+ - "This seems like the issue, let me just change it"
26
+
27
+ ## Workflow
28
+
29
+ Follow this order strictly.
30
+
31
+ ### Phase 1. Define The Problem
32
+
33
+ ```text
34
+ Problem: <expected> but got <actual> under <condition>
35
+ ```
36
+
37
+ Good: "Product detail API returns 500 when brand is null."
38
+ Bad: "Serializer is broken because brand mapping seems wrong."
39
+
40
+ ### Phase 2. Reproduce Or Instrument
41
+
42
+ Priority:
43
+ 1. Reproduce with existing test
44
+ 2. Minimal integration test
45
+ 3. Unit test
46
+ 4. Reproduction script/command
47
+ 5. Add logging/instrumentation to observe
48
+
49
+ Rules:
50
+ - Make reproduction path as small as possible.
51
+ - If UI-only bug, prefer reproducing at a lower layer.
52
+ - If flaky, add logging for inputs, timing, concurrency conditions.
53
+ - If can't reproduce, do NOT proceed to fix — increase observability first.
54
+
55
+ ### Phase 3. Gather Evidence
56
+
57
+ Collect observable facts only:
58
+ - Full error message and stack trace
59
+ - Failing input values
60
+ - Recent changed files/commits
61
+ - Environment/config differences
62
+ - Call path and data flow
63
+
64
+ At each boundary (controller → service → repository), check:
65
+ - What came in?
66
+ - What went out?
67
+ - What was transformed?
68
+ - Under what condition does it break?
69
+
70
+ Do NOT fix before locating the problem.
71
+
72
+ ### Phase 4. Isolate Root Cause
73
+
74
+ State exactly one candidate:
75
+
76
+ ```text
77
+ Hypothesis: <root cause> because <evidence>
78
+ ```
79
+
80
+ Good hypothesis conditions:
81
+ - Points to a single cause
82
+ - Connected to observed evidence
83
+ - Falsifiable with a small experiment
84
+
85
+ Bad: "Something async seems wrong" / "The whole serializer area is unstable"
86
+
87
+ ### Phase 5. Lock The Failure
88
+
89
+ Before fixing, lock the failure:
90
+ 1. Automated failing test (preferred)
91
+ 2. Add regression case to existing test
92
+ 3. Minimal reproduction script
93
+ 4. Temporary log/assertion guard
94
+
95
+ The test MUST fail before fix, pass after fix.
96
+
97
+ ### Phase 6. Single Fix
98
+
99
+ Allowed:
100
+ - Minimal code change addressing the root cause
101
+ - Minimal supporting changes for verification
102
+
103
+ Forbidden:
104
+ - Bundling multiple "related" fixes
105
+ - Refactoring alongside the fix
106
+ - Formatting/renaming/cleanup
107
+ - Adding null-guards without evidence
108
+ - Swallowing exceptions
109
+
110
+ If fix fails → go back to Phase 3. Previous hypothesis was wrong.
111
+
112
+ ### Phase 7. Verify And Close
113
+
114
+ ALL must be true:
115
+ 1. Original reproduction path no longer fails
116
+ 2. New failing guard now passes
117
+ 3. Related tests don't break
118
+ 4. Fix addresses cause, not symptom
119
+
120
+ For flaky bugs: single pass is not enough. Repeat or vary conditions.
121
+
122
+ ## Red Flags — Stop Immediately
123
+
124
+ If you think any of these, STOP and go back:
125
+
126
+ - "Let me just change this one line"
127
+ - "I'll check logs later, let me fix first"
128
+ - "I'll add the test later"
129
+ - "Let me fix this and that together"
130
+ - "The error is gone so it doesn't matter what caused it"
131
+
132
+ ## Completion Checklist
133
+
134
+ - [ ] Problem defined in one sentence
135
+ - [ ] Failure reproduced or made observable
136
+ - [ ] Evidence collected
137
+ - [ ] Single root-cause hypothesis stated
138
+ - [ ] Failing guard created before fix
139
+ - [ ] Single fix applied
140
+ - [ ] Verified via same reproduction path