@ai-content-space/loopx 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +343 -56
  2. package/README.zh-CN.md +392 -0
  3. package/package.json +4 -1
  4. package/plugins/loopx/.codex-plugin/plugin.json +1 -1
  5. package/plugins/loopx/scripts/plugin-install.test.mjs +1 -0
  6. package/plugins/loopx/skills/archive/SKILL.md +39 -0
  7. package/plugins/loopx/skills/build/SKILL.md +111 -9
  8. package/plugins/loopx/skills/clarify/SKILL.md +121 -1
  9. package/plugins/loopx/skills/debug/SKILL.md +296 -0
  10. package/plugins/loopx/skills/debug/condition-based-waiting.md +115 -0
  11. package/plugins/loopx/skills/debug/defense-in-depth.md +122 -0
  12. package/plugins/loopx/skills/debug/find-polluter.sh +63 -0
  13. package/plugins/loopx/skills/debug/root-cause-tracing.md +169 -0
  14. package/plugins/loopx/skills/go-style/SKILL.md +71 -0
  15. package/plugins/loopx/skills/kratos/SKILL.md +74 -0
  16. package/plugins/loopx/skills/kratos/references/advanced-features.md +314 -0
  17. package/plugins/loopx/skills/kratos/references/architecture.md +488 -0
  18. package/plugins/loopx/skills/kratos/references/configuration.md +399 -0
  19. package/plugins/loopx/skills/kratos/references/http-customization.md +512 -0
  20. package/plugins/loopx/skills/kratos/references/middleware-logging.md +400 -0
  21. package/plugins/loopx/skills/kratos/references/proto-api-design.md +432 -0
  22. package/plugins/loopx/skills/kratos/references/security-auth.md +411 -0
  23. package/plugins/loopx/skills/kratos/references/troubleshooting.md +385 -0
  24. package/plugins/loopx/skills/plan/SKILL.md +22 -2
  25. package/plugins/loopx/skills/review/SKILL.md +98 -1
  26. package/plugins/loopx/skills/tdd/SKILL.md +371 -0
  27. package/plugins/loopx/skills/tdd/testing-anti-patterns.md +299 -0
  28. package/plugins/loopx/skills/verify/SKILL.md +139 -0
  29. package/scripts/codex-stop-hook.mjs +71 -0
  30. package/scripts/codex-workflow-hook.mjs +153 -0
  31. package/skills/archive/SKILL.md +39 -0
  32. package/skills/build/SKILL.md +111 -9
  33. package/skills/clarify/SKILL.md +121 -1
  34. package/skills/debug/SKILL.md +296 -0
  35. package/skills/debug/condition-based-waiting.md +115 -0
  36. package/skills/debug/defense-in-depth.md +122 -0
  37. package/skills/debug/find-polluter.sh +63 -0
  38. package/skills/debug/root-cause-tracing.md +169 -0
  39. package/skills/go-style/SKILL.md +71 -0
  40. package/skills/kratos/SKILL.md +74 -0
  41. package/skills/kratos/references/advanced-features.md +314 -0
  42. package/skills/kratos/references/architecture.md +488 -0
  43. package/skills/kratos/references/configuration.md +399 -0
  44. package/skills/kratos/references/http-customization.md +512 -0
  45. package/skills/kratos/references/middleware-logging.md +400 -0
  46. package/skills/kratos/references/proto-api-design.md +432 -0
  47. package/skills/kratos/references/security-auth.md +411 -0
  48. package/skills/kratos/references/troubleshooting.md +385 -0
  49. package/skills/plan/SKILL.md +22 -2
  50. package/skills/review/SKILL.md +98 -1
  51. package/skills/tdd/SKILL.md +371 -0
  52. package/skills/tdd/testing-anti-patterns.md +299 -0
  53. package/skills/verify/SKILL.md +139 -0
  54. package/src/build-runtime.mjs +303 -26
  55. package/src/build-stop-gate.mjs +94 -0
  56. package/src/cli.mjs +51 -8
  57. package/src/codex-exec-runtime.mjs +105 -5
  58. package/src/context-manifest.mjs +172 -0
  59. package/src/install-discovery.mjs +352 -5
  60. package/src/next-skill.mjs +85 -0
  61. package/src/plan-runtime.mjs +100 -122
  62. package/src/review-runtime.mjs +378 -0
  63. package/src/runtime-maintenance.mjs +428 -14
  64. package/src/template-governance.mjs +223 -0
  65. package/src/workflow.mjs +1947 -118
  66. package/src/workspace-context.mjs +166 -0
  67. package/src/workspace-memory.mjs +69 -0
  68. package/templates/plan.md +6 -0
@@ -38,10 +38,12 @@ Most implementation drift happens before coding begins. Teams often think they n
38
38
 
39
39
  <Core_Principles>
40
40
  - Ask one question at a time.
41
+ - Prefer bounded multiple-choice questions when the option space is known; use open-ended questions only when the option space is genuinely unknown.
41
42
  - Prefer the highest-leverage unresolved question, not broad coverage.
42
43
  - Keep digging on the same thread until one assumption, one boundary, or one tradeoff becomes clearer.
43
44
  - Treat every answer as a claim to pressure-test, not a final truth to copy down.
44
45
  - Make `Non-goals` and `Decision Boundaries` mandatory gates.
46
+ - Default to YAGNI: shrink speculative scope unless the user gives a concrete reason it belongs in the first pass.
45
47
  - Do not stop at “requirements”; shape the solution enough that the next stage has a coherent starting design.
46
48
  </Core_Principles>
47
49
 
@@ -80,10 +82,20 @@ Do not mark a clarify spec handoff-ready by prose alone. Update the frontmatter
80
82
  </Runtime_State_Machine>
81
83
 
82
84
  <Execution_Policy>
85
+ - Always run a preflight context intake before the first interview question.
86
+ - If supplied context is too large for safe prompt use, first request or create a concise prompt-safe summary that preserves goals, constraints, success criteria, non-goals, decision boundaries, and source references.
83
87
  - Explore repo context before asking the user about internals.
84
88
  - Prefer evidence-backed clarification in brownfield work:
85
89
  - “I found X in Y. Should this clarify spec preserve that pattern?”
90
+ - Route facts before judgment:
91
+ - discoverable codebase facts should be inspected directly
92
+ - evidence-backed inferences should be confirmed with the user
93
+ - product intent, tradeoffs, scope, non-goals, and decision boundaries must be treated as human decisions
86
94
  - Ask about intent and boundaries before implementation detail.
95
+ - Respect stage priority:
96
+ 1. intent, outcome, scope, non-goals, decision boundaries
97
+ 2. constraints and success criteria
98
+ 3. brownfield grounding and integration details
87
99
  - Stay on the same thread when the answer is still weak instead of rotating dimensions just for coverage.
88
100
  - Revisit at least one earlier answer with an explicit assumption, evidence, or tradeoff follow-up before crystallizing.
89
101
  - If the task is too large for one coherent spec, decompose it before pretending it is ready.
@@ -111,6 +123,45 @@ When an answer is still weak, prefer one of these next:
111
123
  4. If the answer is still describing symptoms, reframe toward root cause.
112
124
  </Pressure_Patterns>
113
125
 
126
+ <Socratic_Questioning>
127
+ `clarify` should be Socratic without being vague.
128
+
129
+ - Ask one focused round at a time.
130
+ - Prefer bounded choices when they reduce user effort:
131
+ - use single-choice when one answer should drive the next branch
132
+ - use multi-choice when several constraints, non-goals, or success checks may all apply
133
+ - include `Other` only when the known options are likely incomplete
134
+ - Lead with the recommended option when repo evidence or prior answers support it, but make the tradeoff visible.
135
+ - Do not hide a branching interview tree inside one overloaded question. If an option would require a follow-up, ask that follow-up next.
136
+ - After each answer, decide whether the next highest-value move is:
137
+ - deeper pressure on the same thread
138
+ - zooming out to another unresolved breadth track
139
+ - crystallizing the spec
140
+ </Socratic_Questioning>
141
+
142
+ <Breadth_Ledger>
143
+ Maintain a lightweight breadth ledger across independent ambiguity tracks:
144
+
145
+ - scope
146
+ - constraints
147
+ - outputs / deliverables
148
+ - verification and success criteria
149
+ - brownfield integration
150
+ - user-mentioned workstreams or stakeholder requirements
151
+
152
+ The ledger is a guard, not a rotation rule. Stay deep on the current thread until it has been pressure-tested, then zoom out only when another material track remains unresolved and would change downstream execution.
153
+ </Breadth_Ledger>
154
+
155
+ <Challenge_Modes>
156
+ Use these assumption stress tests when applicable:
157
+
158
+ - **Contrarian**: challenge a core assumption when an answer rests on an untested belief.
159
+ - **Simplifier**: probe the smallest viable first pass when scope expands faster than outcome clarity.
160
+ - **Ontologist**: reframe toward essence/root cause when the user keeps describing symptoms or when ambiguity stalls.
161
+
162
+ Track which challenge modes have been used in the ambiguity register. Do not repeat a mode mechanically.
163
+ </Challenge_Modes>
164
+
114
165
  <Design_Shaping>
115
166
  `clarify` should not stop at “what do you want?” Once the intent is understandable, it should also shape the task enough that the downstream plan is not starting from zero.
116
167
 
@@ -120,10 +171,51 @@ When there is a real design choice:
120
171
  - lead with the recommended one
121
172
  - explain tradeoffs briefly
122
173
  - right-size the design to the task
174
+ - identify likely component boundaries, data flow, or user-facing flow when that would materially affect planning
175
+ - reject speculative features unless they are necessary for the stated outcome
123
176
 
124
177
  The goal is not to produce a full architecture doc here. The goal is to make the clarify spec design-ready.
125
178
  </Design_Shaping>
126
179
 
180
+ <Incremental_Validation>
181
+ For non-trivial designs, validate the design in small sections before writing the final clarify spec.
182
+
183
+ Present a compact design summary and ask whether it matches the user's intent. When relevant, validate these sections separately:
184
+
185
+ - user-facing behavior or workflow
186
+ - component boundaries / ownership
187
+ - data flow or API contract
188
+ - error handling and edge cases
189
+ - test and verification shape
190
+ - explicitly deferred work
191
+
192
+ If the user rejects a section, continue the interview loop instead of writing a handoff-ready spec.
193
+ </Incremental_Validation>
194
+
195
+ <Practical_Closure_Audit>
196
+ Treat a low ambiguity score as permission to audit closure, not as automatic permission to stop.
197
+
198
+ Before crystallizing, ask:
199
+
200
+ - Would one more question materially change implementation, test strategy, or scope?
201
+ - Are non-goals and decision boundaries explicit enough for downstream agents?
202
+ - Has at least one assumption or tradeoff been pressure-tested?
203
+ - Is remaining uncertainty residual risk rather than actionable ambiguity?
204
+
205
+ If remaining uncertainty would not change execution, crystallize the spec and preserve it as residual risk instead of opening a low-value branch.
206
+ </Practical_Closure_Audit>
207
+
208
+ <Spec_Self_Review>
209
+ Before marking a clarify spec handoff-ready, perform a self-review pass:
210
+
211
+ - remove placeholders such as `TBD`, `TODO`, `REPLACE_ME`, or vague “etc.”
212
+ - check for internal contradictions
213
+ - check whether the scope is still too broad for one coherent execution package
214
+ - check whether any requirement can be interpreted two materially different ways
215
+ - verify that non-goals, decision boundaries, acceptance criteria, and residual risks are explicit
216
+ - verify that brownfield evidence is labeled separately from inference
217
+ </Spec_Self_Review>
218
+
127
219
  <Process>
128
220
 
129
221
  ## 1. Explore Context
@@ -131,12 +223,22 @@ The goal is not to produce a full architecture doc here. The goal is to make the
131
223
  - Read relevant files, docs, and current patterns first.
132
224
  - Classify the work as brownfield or greenfield.
133
225
  - For brownfield work, collect concrete evidence before questioning.
226
+ - Create or update a compact context snapshot for the task when the conversation, source docs, or repo evidence would otherwise be too large to carry safely.
134
227
 
135
228
  ## 2. Interview
136
229
 
137
230
  - Ask one question per round.
231
+ - Prefer bounded choices for known option spaces; use open-ended questions only when the valid answers cannot be enumerated.
232
+ - Before asking each question, show a compact status line with:
233
+ - `round`: current round and max rounds
234
+ - `ambiguity_score`: current score
235
+ - `target`: selected profile threshold
236
+ - `open_items`: unresolved ambiguity count
237
+ - Also state the current focus dimension and whether the round is fact confirmation or human judgment.
238
+ - After the user answers and the round is updated, show the revised `ambiguity_score`, whether it moved up/down/unchanged, and the main reason for that change before asking the next question.
138
239
  - After each answered round, update `current_round`, the ambiguity register, and `ambiguity_score`.
139
- - Target the weakest unresolved dimension.
240
+ - Target the weakest unresolved dimension within the stage-priority order.
241
+ - Maintain the breadth ledger; do not rotate dimensions just for coverage.
140
242
  - Keep `Non-goals` and `Decision Boundaries` explicit from early in the process.
141
243
  - Respect the selected profile:
142
244
  - `standard`: stop only when the clarify spec is handoff-ready or `15` rounds are exhausted
@@ -154,6 +256,13 @@ The goal is not to produce a full architecture doc here. The goal is to make the
154
256
  - Where needed, propose a small set of options.
155
257
  - Recommend one approach.
156
258
  - Clarify what that approach implies for scope and downstream execution.
259
+ - Apply incremental validation for non-trivial designs before finalizing the spec.
260
+
261
+ ## 4.5. Closure and Self-Review
262
+
263
+ - Run the practical closure audit.
264
+ - Run the spec self-review checklist before marking handoff-ready.
265
+ - If the round cap is reached or the user chooses to proceed despite ambiguity, preserve an explicit residual-risk warning in the spec and handoff recommendation.
157
266
 
158
267
  ## 5. Write the Clarify Spec
159
268
 
@@ -179,8 +288,11 @@ The clarify spec should include:
179
288
  - constraints
180
289
  - success criteria
181
290
  - assumptions exposed and resolved
291
+ - pressure-pass findings
182
292
  - brownfield evidence vs inference
293
+ - breadth ledger / unresolved tracks, if any
183
294
  - design direction / preferred approach
295
+ - residual risks
184
296
  - explicit next handoff recommendation
185
297
 
186
298
  ## 6. Handoff
@@ -191,6 +303,12 @@ After the clarify spec is ready:
191
303
  - hand off to `build` only if the user explicitly wants direct execution and the task is already concrete enough
192
304
  - hand off to `autopilot` only when the scope is sufficiently tight for a bounded end-to-end run
193
305
 
306
+ Preferred explicit handoff contract:
307
+
308
+ - Recommended invocation: `$plan <slug>`
309
+ - Artifact-pinned invocation when needed: `$plan --direct .loopx/specs/clarify-<slug>-<timestamp>.md`
310
+ - Consumer behavior: treat the clarify spec as the source of truth for intent, non-goals, decision boundaries, constraints, and design direction; do not reopen clarification by default
311
+
194
312
  `clarify` itself does not implement the feature.
195
313
 
196
314
  </Process>
@@ -199,6 +317,8 @@ After the clarify spec is ready:
199
317
  - `Non-goals` are explicit
200
318
  - `Decision Boundaries` are explicit
201
319
  - At least one pressure-pass follow-up has revisited an earlier answer
320
+ - The practical closure audit has passed
321
+ - The spec self-review pass has removed placeholders, contradictions, and material ambiguity
202
322
  - A written clarify spec exists
203
323
  - The task is small enough and clear enough for real downstream handoff
204
324
  - The selected profile threshold is met:
@@ -0,0 +1,296 @@
1
+ ---
2
+ name: debug
3
+ description: Use when encountering any bug, test failure, or unexpected behavior, before proposing fixes
4
+ ---
5
+
6
+ # Systematic Debugging
7
+
8
+ ## Overview
9
+
10
+ Random fixes waste time and create new bugs. Quick patches mask underlying issues.
11
+
12
+ **Core principle:** ALWAYS find root cause before attempting fixes. Symptom fixes are failure.
13
+
14
+ **Violating the letter of this process is violating the spirit of debugging.**
15
+
16
+ ## The Iron Law
17
+
18
+ ```
19
+ NO FIXES WITHOUT ROOT CAUSE INVESTIGATION FIRST
20
+ ```
21
+
22
+ If you haven't completed Phase 1, you cannot propose fixes.
23
+
24
+ ## When to Use
25
+
26
+ Use for ANY technical issue:
27
+ - Test failures
28
+ - Bugs in production
29
+ - Unexpected behavior
30
+ - Performance problems
31
+ - Build failures
32
+ - Integration issues
33
+
34
+ **Use this ESPECIALLY when:**
35
+ - Under time pressure (emergencies make guessing tempting)
36
+ - "Just one quick fix" seems obvious
37
+ - You've already tried multiple fixes
38
+ - Previous fix didn't work
39
+ - You don't fully understand the issue
40
+
41
+ **Don't skip when:**
42
+ - Issue seems simple (simple bugs have root causes too)
43
+ - You're in a hurry (rushing guarantees rework)
44
+ - Manager wants it fixed NOW (systematic is faster than thrashing)
45
+
46
+ ## The Four Phases
47
+
48
+ You MUST complete each phase before proceeding to the next.
49
+
50
+ ### Phase 1: Root Cause Investigation
51
+
52
+ **BEFORE attempting ANY fix:**
53
+
54
+ 1. **Read Error Messages Carefully**
55
+ - Don't skip past errors or warnings
56
+ - They often contain the exact solution
57
+ - Read stack traces completely
58
+ - Note line numbers, file paths, error codes
59
+
60
+ 2. **Reproduce Consistently**
61
+ - Can you trigger it reliably?
62
+ - What are the exact steps?
63
+ - Does it happen every time?
64
+ - If not reproducible → gather more data, don't guess
65
+
66
+ 3. **Check Recent Changes**
67
+ - What changed that could cause this?
68
+ - Git diff, recent commits
69
+ - New dependencies, config changes
70
+ - Environmental differences
71
+
72
+ 4. **Gather Evidence in Multi-Component Systems**
73
+
74
+ **WHEN system has multiple components (CI → build → signing, API → service → database):**
75
+
76
+ **BEFORE proposing fixes, add diagnostic instrumentation:**
77
+ ```
78
+ For EACH component boundary:
79
+ - Log what data enters component
80
+ - Log what data exits component
81
+ - Verify environment/config propagation
82
+ - Check state at each layer
83
+
84
+ Run once to gather evidence showing WHERE it breaks
85
+ THEN analyze evidence to identify failing component
86
+ THEN investigate that specific component
87
+ ```
88
+
89
+ **Example (multi-layer system):**
90
+ ```bash
91
+ # Layer 1: Workflow
92
+ echo "=== Secrets available in workflow: ==="
93
+ echo "IDENTITY: ${IDENTITY:+SET}${IDENTITY:-UNSET}"
94
+
95
+ # Layer 2: Build script
96
+ echo "=== Env vars in build script: ==="
97
+ env | grep IDENTITY || echo "IDENTITY not in environment"
98
+
99
+ # Layer 3: Signing script
100
+ echo "=== Keychain state: ==="
101
+ security list-keychains
102
+ security find-identity -v
103
+
104
+ # Layer 4: Actual signing
105
+ codesign --sign "$IDENTITY" --verbose=4 "$APP"
106
+ ```
107
+
108
+ **This reveals:** Which layer fails (secrets → workflow ✓, workflow → build ✗)
109
+
110
+ 5. **Trace Data Flow**
111
+
112
+ **WHEN error is deep in call stack:**
113
+
114
+ See `root-cause-tracing.md` in this directory for the complete backward tracing technique.
115
+
116
+ **Quick version:**
117
+ - Where does bad value originate?
118
+ - What called this with bad value?
119
+ - Keep tracing up until you find the source
120
+ - Fix at source, not at symptom
121
+
122
+ ### Phase 2: Pattern Analysis
123
+
124
+ **Find the pattern before fixing:**
125
+
126
+ 1. **Find Working Examples**
127
+ - Locate similar working code in same codebase
128
+ - What works that's similar to what's broken?
129
+
130
+ 2. **Compare Against References**
131
+ - If implementing pattern, read reference implementation COMPLETELY
132
+ - Don't skim - read every line
133
+ - Understand the pattern fully before applying
134
+
135
+ 3. **Identify Differences**
136
+ - What's different between working and broken?
137
+ - List every difference, however small
138
+ - Don't assume "that can't matter"
139
+
140
+ 4. **Understand Dependencies**
141
+ - What other components does this need?
142
+ - What settings, config, environment?
143
+ - What assumptions does it make?
144
+
145
+ ### Phase 3: Hypothesis and Testing
146
+
147
+ **Scientific method:**
148
+
149
+ 1. **Form Single Hypothesis**
150
+ - State clearly: "I think X is the root cause because Y"
151
+ - Write it down
152
+ - Be specific, not vague
153
+
154
+ 2. **Test Minimally**
155
+ - Make the SMALLEST possible change to test hypothesis
156
+ - One variable at a time
157
+ - Don't fix multiple things at once
158
+
159
+ 3. **Verify Before Continuing**
160
+ - Did it work? Yes → Phase 4
161
+ - Didn't work? Form NEW hypothesis
162
+ - DON'T add more fixes on top
163
+
164
+ 4. **When You Don't Know**
165
+ - Say "I don't understand X"
166
+ - Don't pretend to know
167
+ - Ask for help
168
+ - Research more
169
+
170
+ ### Phase 4: Implementation
171
+
172
+ **Fix the root cause, not the symptom:**
173
+
174
+ 1. **Create Failing Test Case**
175
+ - Simplest possible reproduction
176
+ - Automated test if possible
177
+ - One-off test script if no framework
178
+ - MUST have before fixing
179
+ - Use the `tdd` skill for writing proper failing tests
180
+
181
+ 2. **Implement Single Fix**
182
+ - Address the root cause identified
183
+ - ONE change at a time
184
+ - No "while I'm here" improvements
185
+ - No bundled refactoring
186
+
187
+ 3. **Verify Fix**
188
+ - Test passes now?
189
+ - No other tests broken?
190
+ - Issue actually resolved?
191
+
192
+ 4. **If Fix Doesn't Work**
193
+ - STOP
194
+ - Count: How many fixes have you tried?
195
+ - If < 3: Return to Phase 1, re-analyze with new information
196
+ - **If ≥ 3: STOP and question the architecture (step 5 below)**
197
+ - DON'T attempt Fix #4 without architectural discussion
198
+
199
+ 5. **If 3+ Fixes Failed: Question Architecture**
200
+
201
+ **Pattern indicating architectural problem:**
202
+ - Each fix reveals new shared state/coupling/problem in different place
203
+ - Fixes require "massive refactoring" to implement
204
+ - Each fix creates new symptoms elsewhere
205
+
206
+ **STOP and question fundamentals:**
207
+ - Is this pattern fundamentally sound?
208
+ - Are we "sticking with it through sheer inertia"?
209
+ - Should we refactor architecture vs. continue fixing symptoms?
210
+
211
+ **Discuss with your human partner before attempting more fixes**
212
+
213
+ This is NOT a failed hypothesis - this is a wrong architecture.
214
+
215
+ ## Red Flags - STOP and Follow Process
216
+
217
+ If you catch yourself thinking:
218
+ - "Quick fix for now, investigate later"
219
+ - "Just try changing X and see if it works"
220
+ - "Add multiple changes, run tests"
221
+ - "Skip the test, I'll manually verify"
222
+ - "It's probably X, let me fix that"
223
+ - "I don't fully understand but this might work"
224
+ - "Pattern says X but I'll adapt it differently"
225
+ - "Here are the main problems: [lists fixes without investigation]"
226
+ - Proposing solutions before tracing data flow
227
+ - **"One more fix attempt" (when already tried 2+)**
228
+ - **Each fix reveals new problem in different place**
229
+
230
+ **ALL of these mean: STOP. Return to Phase 1.**
231
+
232
+ **If 3+ fixes failed:** Question the architecture (see Phase 4.5)
233
+
234
+ ## your human partner's Signals You're Doing It Wrong
235
+
236
+ **Watch for these redirections:**
237
+ - "Is that not happening?" - You assumed without verifying
238
+ - "Will it show us...?" - You should have added evidence gathering
239
+ - "Stop guessing" - You're proposing fixes without understanding
240
+ - "Ultrathink this" - Question fundamentals, not just symptoms
241
+ - "We're stuck?" (frustrated) - Your approach isn't working
242
+
243
+ **When you see these:** STOP. Return to Phase 1.
244
+
245
+ ## Common Rationalizations
246
+
247
+ | Excuse | Reality |
248
+ |--------|---------|
249
+ | "Issue is simple, don't need process" | Simple issues have root causes too. Process is fast for simple bugs. |
250
+ | "Emergency, no time for process" | Systematic debugging is FASTER than guess-and-check thrashing. |
251
+ | "Just try this first, then investigate" | First fix sets the pattern. Do it right from the start. |
252
+ | "I'll write test after confirming fix works" | Untested fixes don't stick. Test first proves it. |
253
+ | "Multiple fixes at once saves time" | Can't isolate what worked. Causes new bugs. |
254
+ | "Reference too long, I'll adapt the pattern" | Partial understanding guarantees bugs. Read it completely. |
255
+ | "I see the problem, let me fix it" | Seeing symptoms ≠ understanding root cause. |
256
+ | "One more fix attempt" (after 2+ failures) | 3+ failures = architectural problem. Question pattern, don't fix again. |
257
+
258
+ ## Quick Reference
259
+
260
+ | Phase | Key Activities | Success Criteria |
261
+ |-------|---------------|------------------|
262
+ | **1. Root Cause** | Read errors, reproduce, check changes, gather evidence | Understand WHAT and WHY |
263
+ | **2. Pattern** | Find working examples, compare | Identify differences |
264
+ | **3. Hypothesis** | Form theory, test minimally | Confirmed or new hypothesis |
265
+ | **4. Implementation** | Create test, fix, verify | Bug resolved, tests pass |
266
+
267
+ ## When Process Reveals "No Root Cause"
268
+
269
+ If systematic investigation reveals issue is truly environmental, timing-dependent, or external:
270
+
271
+ 1. You've completed the process
272
+ 2. Document what you investigated
273
+ 3. Implement appropriate handling (retry, timeout, error message)
274
+ 4. Add monitoring/logging for future investigation
275
+
276
+ **But:** 95% of "no root cause" cases are incomplete investigation.
277
+
278
+ ## Supporting Techniques
279
+
280
+ These techniques are part of systematic debugging and available in this directory:
281
+
282
+ - **`root-cause-tracing.md`** - Trace bugs backward through call stack to find original trigger
283
+ - **`defense-in-depth.md`** - Add validation at multiple layers after finding root cause
284
+ - **`condition-based-waiting.md`** - Replace arbitrary timeouts with condition polling
285
+
286
+ **Related skills:**
287
+ - **tdd** - For creating failing test case (Phase 4, Step 1)
288
+ - **verify** - Verify fix worked before claiming success
289
+
290
+ ## Real-World Impact
291
+
292
+ From debugging sessions:
293
+ - Systematic approach: 15-30 minutes to fix
294
+ - Random fixes approach: 2-3 hours of thrashing
295
+ - First-time fix rate: 95% vs 40%
296
+ - New bugs introduced: Near zero vs common
@@ -0,0 +1,115 @@
1
+ # Condition-Based Waiting
2
+
3
+ ## Overview
4
+
5
+ Flaky tests often guess at timing with arbitrary delays. This creates race conditions where tests pass on fast machines but fail under load or in CI.
6
+
7
+ **Core principle:** Wait for the actual condition you care about, not a guess about how long it takes.
8
+
9
+ ## When to Use
10
+
11
+ ```dot
12
+ digraph when_to_use {
13
+ "Test uses setTimeout/sleep?" [shape=diamond];
14
+ "Testing timing behavior?" [shape=diamond];
15
+ "Document WHY timeout needed" [shape=box];
16
+ "Use condition-based waiting" [shape=box];
17
+
18
+ "Test uses setTimeout/sleep?" -> "Testing timing behavior?" [label="yes"];
19
+ "Testing timing behavior?" -> "Document WHY timeout needed" [label="yes"];
20
+ "Testing timing behavior?" -> "Use condition-based waiting" [label="no"];
21
+ }
22
+ ```
23
+
24
+ **Use when:**
25
+ - Tests have arbitrary delays (`setTimeout`, `sleep`, `time.sleep()`)
26
+ - Tests are flaky (pass sometimes, fail under load)
27
+ - Tests timeout when run in parallel
28
+ - Waiting for async operations to complete
29
+
30
+ **Don't use when:**
31
+ - Testing actual timing behavior (debounce, throttle intervals)
32
+ - Always document WHY if using arbitrary timeout
33
+
34
+ ## Core Pattern
35
+
36
+ ```typescript
37
+ // ❌ BEFORE: Guessing at timing
38
+ await new Promise(r => setTimeout(r, 50));
39
+ const result = getResult();
40
+ expect(result).toBeDefined();
41
+
42
+ // ✅ AFTER: Waiting for condition
43
+ await waitFor(() => getResult() !== undefined);
44
+ const result = getResult();
45
+ expect(result).toBeDefined();
46
+ ```
47
+
48
+ ## Quick Patterns
49
+
50
+ | Scenario | Pattern |
51
+ |----------|---------|
52
+ | Wait for event | `waitFor(() => events.find(e => e.type === 'DONE'))` |
53
+ | Wait for state | `waitFor(() => machine.state === 'ready')` |
54
+ | Wait for count | `waitFor(() => items.length >= 5)` |
55
+ | Wait for file | `waitFor(() => fs.existsSync(path))` |
56
+ | Complex condition | `waitFor(() => obj.ready && obj.value > 10)` |
57
+
58
+ ## Implementation
59
+
60
+ Generic polling function:
61
+ ```typescript
62
+ async function waitFor<T>(
63
+ condition: () => T | undefined | null | false,
64
+ description: string,
65
+ timeoutMs = 5000
66
+ ): Promise<T> {
67
+ const startTime = Date.now();
68
+
69
+ while (true) {
70
+ const result = condition();
71
+ if (result) return result;
72
+
73
+ if (Date.now() - startTime > timeoutMs) {
74
+ throw new Error(`Timeout waiting for ${description} after ${timeoutMs}ms`);
75
+ }
76
+
77
+ await new Promise(r => setTimeout(r, 10)); // Poll every 10ms
78
+ }
79
+ }
80
+ ```
81
+
82
+ See `condition-based-waiting-example.ts` in this directory for complete implementation with domain-specific helpers (`waitForEvent`, `waitForEventCount`, `waitForEventMatch`) from actual debugging session.
83
+
84
+ ## Common Mistakes
85
+
86
+ **❌ Polling too fast:** `setTimeout(check, 1)` - wastes CPU
87
+ **✅ Fix:** Poll every 10ms
88
+
89
+ **❌ No timeout:** Loop forever if condition never met
90
+ **✅ Fix:** Always include timeout with clear error
91
+
92
+ **❌ Stale data:** Cache state before loop
93
+ **✅ Fix:** Call getter inside loop for fresh data
94
+
95
+ ## When Arbitrary Timeout IS Correct
96
+
97
+ ```typescript
98
+ // Tool ticks every 100ms - need 2 ticks to verify partial output
99
+ await waitForEvent(manager, 'TOOL_STARTED'); // First: wait for condition
100
+ await new Promise(r => setTimeout(r, 200)); // Then: wait for timed behavior
101
+ // 200ms = 2 ticks at 100ms intervals - documented and justified
102
+ ```
103
+
104
+ **Requirements:**
105
+ 1. First wait for triggering condition
106
+ 2. Based on known timing (not guessing)
107
+ 3. Comment explaining WHY
108
+
109
+ ## Real-World Impact
110
+
111
+ From debugging session (2025-10-03):
112
+ - Fixed 15 flaky tests across 3 files
113
+ - Pass rate: 60% → 100%
114
+ - Execution time: 40% faster
115
+ - No more race conditions