@simplysm/sd-claude 13.0.78 → 13.0.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/claude/rules/sd-claude-rules.md +4 -63
- package/claude/rules/sd-simplysm-usage.md +7 -0
- package/claude/sd-session-start.sh +10 -0
- package/claude/skills/sd-api-review/SKILL.md +89 -0
- package/claude/skills/sd-check/SKILL.md +55 -57
- package/claude/skills/sd-commit/SKILL.md +37 -42
- package/claude/skills/sd-debug/SKILL.md +75 -265
- package/claude/skills/sd-document/SKILL.md +63 -53
- package/claude/skills/sd-document/_common.py +94 -0
- package/claude/skills/sd-document/extract_docx.py +19 -48
- package/claude/skills/sd-document/extract_pdf.py +22 -50
- package/claude/skills/sd-document/extract_pptx.py +17 -40
- package/claude/skills/sd-document/extract_xlsx.py +19 -40
- package/claude/skills/sd-email-analyze/SKILL.md +23 -31
- package/claude/skills/sd-email-analyze/email-analyzer.py +79 -65
- package/claude/skills/sd-init/SKILL.md +133 -0
- package/claude/skills/sd-plan/SKILL.md +69 -120
- package/claude/skills/sd-readme/SKILL.md +106 -131
- package/claude/skills/sd-review/SKILL.md +38 -155
- package/claude/skills/sd-simplify/SKILL.md +59 -0
- package/package.json +3 -2
- package/README.md +0 -297
- package/claude/refs/sd-angular.md +0 -127
- package/claude/refs/sd-code-conventions.md +0 -155
- package/claude/refs/sd-directories.md +0 -7
- package/claude/refs/sd-library-issue.md +0 -7
- package/claude/refs/sd-migration.md +0 -7
- package/claude/refs/sd-orm-v12.md +0 -81
- package/claude/refs/sd-orm.md +0 -23
- package/claude/refs/sd-service.md +0 -5
- package/claude/refs/sd-simplysm-docs.md +0 -52
- package/claude/refs/sd-solid.md +0 -68
- package/claude/refs/sd-workflow.md +0 -25
- package/claude/rules/sd-refs-linker.md +0 -52
- package/claude/sd-statusline.js +0 -296
- package/claude/skills/sd-api-name-review/SKILL.md +0 -154
- package/claude/skills/sd-brainstorm/SKILL.md +0 -215
- package/claude/skills/sd-debug/condition-based-waiting-example.ts +0 -158
- package/claude/skills/sd-debug/condition-based-waiting.md +0 -114
- package/claude/skills/sd-debug/defense-in-depth.md +0 -128
- package/claude/skills/sd-debug/find-polluter.sh +0 -64
- package/claude/skills/sd-debug/root-cause-tracing.md +0 -168
- package/claude/skills/sd-discuss/SKILL.md +0 -91
- package/claude/skills/sd-explore/SKILL.md +0 -118
- package/claude/skills/sd-plan-dev/SKILL.md +0 -294
- package/claude/skills/sd-plan-dev/code-quality-reviewer-prompt.md +0 -49
- package/claude/skills/sd-plan-dev/final-review-prompt.md +0 -50
- package/claude/skills/sd-plan-dev/implementer-prompt.md +0 -60
- package/claude/skills/sd-plan-dev/spec-reviewer-prompt.md +0 -45
- package/claude/skills/sd-review/api-reviewer-prompt.md +0 -75
- package/claude/skills/sd-review/code-reviewer-prompt.md +0 -82
- package/claude/skills/sd-review/convention-checker-prompt.md +0 -61
- package/claude/skills/sd-review/refactoring-analyzer-prompt.md +0 -92
- package/claude/skills/sd-skill/SKILL.md +0 -417
- package/claude/skills/sd-skill/anthropic-best-practices.md +0 -156
- package/claude/skills/sd-skill/cso-guide.md +0 -161
- package/claude/skills/sd-skill/examples/CLAUDE_MD_TESTING.md +0 -200
- package/claude/skills/sd-skill/persuasion-principles.md +0 -220
- package/claude/skills/sd-skill/testing-skills-with-subagents.md +0 -408
- package/claude/skills/sd-skill/writing-guide.md +0 -159
- package/claude/skills/sd-tdd/SKILL.md +0 -385
- package/claude/skills/sd-tdd/testing-anti-patterns.md +0 -317
- package/claude/skills/sd-use/SKILL.md +0 -67
- package/claude/skills/sd-worktree/SKILL.md +0 -78
|
@@ -1,215 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: sd-brainstorm
|
|
3
|
-
description: "You MUST use this before any creative work - creating features, building components, adding functionality, or modifying behavior. Explores user intent, requirements and design before implementation."
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
# Brainstorming Ideas Into Designs
|
|
7
|
-
|
|
8
|
-
## Overview
|
|
9
|
-
|
|
10
|
-
Help turn ideas into fully formed designs and specs through natural collaborative dialogue.
|
|
11
|
-
|
|
12
|
-
Start by understanding the current project context, then ask questions one at a time to refine the idea. Once you understand what you're building, present the design in small sections (200-300 words), checking after each section whether it looks right so far.
|
|
13
|
-
|
|
14
|
-
## The Process
|
|
15
|
-
|
|
16
|
-
**Understanding the idea:**
|
|
17
|
-
- Check out the current project state first (files, docs, recent commits).
|
|
18
|
-
- Ask questions one at a time to refine the idea
|
|
19
|
-
- Prefer multiple choice questions when possible, but open-ended is fine too
|
|
20
|
-
- Only one question per message - if a topic needs more exploration, break it into multiple questions
|
|
21
|
-
- Focus on understanding: purpose, constraints, success criteria
|
|
22
|
-
|
|
23
|
-
**When a main design document is provided as context:**
|
|
24
|
-
|
|
25
|
-
```mermaid
|
|
26
|
-
flowchart TD
|
|
27
|
-
A{"Main design with<br>section plan in context?"}
|
|
28
|
-
A -->|no| B[Normal brainstorm]
|
|
29
|
-
A -->|yes| C{Section specified?}
|
|
30
|
-
C -->|no| D["Show section progress<br>Ask which section<br>(suggest next incomplete)"]
|
|
31
|
-
C -->|yes| E{"Prerequisites<br>complete?"}
|
|
32
|
-
E -->|yes| F[Proceed with section]
|
|
33
|
-
E -->|no| G["Warn prerequisites incomplete<br>Ask: proceed anyway<br>or complete first?"]
|
|
34
|
-
G -->|"user: proceed"| F
|
|
35
|
-
```
|
|
36
|
-
|
|
37
|
-
When proceeding with a section:
|
|
38
|
-
|
|
39
|
-
1. **Read the main design** — understand goals, overall structure, and the target section's scope
|
|
40
|
-
2. **Read actual code** — check the current codebase state for what previous sections have built. Reference the **actual code**, NOT previous section design documents. Code may have diverged from earlier designs during implementation.
|
|
41
|
-
3. **Scope the brainstorm** — limit questions, gap review, approaches, and design presentation to the target section only. Do not re-question decisions already established in the main design.
|
|
42
|
-
4. **Conflict detection** — if the main design's direction conflicts with the actual code state, alert the user and ask for direction before proceeding.
|
|
43
|
-
5. After the design is complete, save as `docs/plans/YYYY-MM-DD-<topic>-section-N-design.md`
|
|
44
|
-
6. Update the main design document: mark the section `[ ]` → `[x]` in the section plan
|
|
45
|
-
7. Commit both files, then proceed to the normal **Next Steps Guide** (Path A/B)
|
|
46
|
-
|
|
47
|
-
**Gap review loop:**
|
|
48
|
-
|
|
49
|
-
When you think you've asked enough, **STOP and run a gap review before moving on.**
|
|
50
|
-
|
|
51
|
-
Tell the user you're running a gap review, then check ALL categories. For each ✅, you MUST **cite specific evidence** (which Q&A, code reference, or explicit user requirement). "I already know" is not evidence.
|
|
52
|
-
|
|
53
|
-
| Category | Check for... |
|
|
54
|
-
|----------|-------------|
|
|
55
|
-
| Scope | What's in? What's explicitly out? |
|
|
56
|
-
| User flows | All inputs, outputs, feedback, navigation |
|
|
57
|
-
| Edge cases | Empty states, errors, limits, concurrency, undo |
|
|
58
|
-
| Data | Shape, validation, persistence, migration, relationships |
|
|
59
|
-
| Integration | How does this connect to existing code/systems? |
|
|
60
|
-
| Non-functional | Performance, accessibility, security, i18n |
|
|
61
|
-
| Assumptions | Anything you assumed but never confirmed |
|
|
62
|
-
|
|
63
|
-
Output format — cite evidence for each:
|
|
64
|
-
- `✅ Scope — [Q2: user confirmed X / code at file:line / requirement doc says Y]`
|
|
65
|
-
- `❓ Edge cases — gap: [what's missing]`
|
|
66
|
-
|
|
67
|
-
If evidence is vague ("obvious", "I already know", "common sense") → mark as ❓, not ✅.
|
|
68
|
-
|
|
69
|
-
- If ANY ❓ exists → ask about it. After the user answers, **run the full checklist again from scratch**.
|
|
70
|
-
- Only when ALL categories show ✅ with cited evidence → proceed to exploring approaches.
|
|
71
|
-
|
|
72
|
-
**All-✅ on first run is PROHIBITED — not "suspicious", prohibited.**
|
|
73
|
-
If your first gap review shows all ✅:
|
|
74
|
-
1. You are rubber-stamping. Prior investigation ≠ complete design exploration.
|
|
75
|
-
2. Pick the 2 weakest categories (thinnest evidence).
|
|
76
|
-
3. Write one concrete unasked question per category.
|
|
77
|
-
4. Ask those questions, then re-run the full checklist from scratch.
|
|
78
|
-
|
|
79
|
-
| Excuse | Reality |
|
|
80
|
-
|--------|---------|
|
|
81
|
-
| "Requirements are already clear" | Clear requirements ≠ complete design. Edge cases, error states, integration points still need exploration. |
|
|
82
|
-
| "I already investigated the code" | Code investigation reveals what IS. Design exploration asks what SHOULD BE. Different activities. |
|
|
83
|
-
| "It's just a bug fix" | Bug fixes have edge cases: error states, concurrent access, timing changes, consumer compatibility. |
|
|
84
|
-
| "User is frustrated/in a hurry" | Rushing causes exactly the mistakes brainstorming prevents. Slow down. |
|
|
85
|
-
|
|
86
|
-
**Rules:**
|
|
87
|
-
- You MUST show the checklist to the user every time you run it. No silent/internal-only checks.
|
|
88
|
-
- Each run must re-examine ALL categories from zero — do not carry over previous results.
|
|
89
|
-
- When in doubt, ask. One extra question costs less than a flawed design.
|
|
90
|
-
|
|
91
|
-
**Exploring approaches:**
|
|
92
|
-
- Propose 2-3 different approaches with trade-offs
|
|
93
|
-
- Present options conversationally with your recommendation and reasoning
|
|
94
|
-
- Lead with your recommended option and explain why
|
|
95
|
-
|
|
96
|
-
**Scale assessment:**
|
|
97
|
-
|
|
98
|
-
After the approach is selected, assess scale (file count, logic complexity, number of distinct subsystems, scope of impact):
|
|
99
|
-
|
|
100
|
-
```mermaid
|
|
101
|
-
flowchart TD
|
|
102
|
-
A{"Assess design scale"}
|
|
103
|
-
A -->|manageable| B["Proceed to<br>After the Design<br>(Path A/B)"]
|
|
104
|
-
A -->|large| C["Propose to user:<br>proceed as-is OR<br>split into sections"]
|
|
105
|
-
C --> D{"User choice?"}
|
|
106
|
-
D -->|"proceed as-is"| B
|
|
107
|
-
D -->|split| E["Propose 2-3 section<br>division approaches<br>(by feature/layer/dependency)"]
|
|
108
|
-
E -->|"user selects"| F["Append section plan<br>to design doc<br>Save + commit"]
|
|
109
|
-
F --> G["Show section guide<br>Brainstorm ENDS"]
|
|
110
|
-
```
|
|
111
|
-
|
|
112
|
-
**How to present the split proposal:**
|
|
113
|
-
|
|
114
|
-
When proposing the split to the user, you MUST clearly explain what "section split" means:
|
|
115
|
-
|
|
116
|
-
- **Section split** = the design document is divided into sections, and each section goes through its own **separate brainstorm → plan → plan-dev → check → commit cycle**.
|
|
117
|
-
- This is NOT about implementation phasing (doing some changes before others). It's about breaking the design work itself into independently deliverable chunks.
|
|
118
|
-
- Explain: "Splitting into sections means each section goes through its own brainstorm → plan → plan-dev cycle. Complete and commit one section before moving to the next."
|
|
119
|
-
- Contrast with: "Proceeding as-is means this single design document goes straight to plan → plan-dev."
|
|
120
|
-
|
|
121
|
-
**Section plan format** (append to existing design content as-is):
|
|
122
|
-
|
|
123
|
-
```markdown
|
|
124
|
-
---
|
|
125
|
-
|
|
126
|
-
## Section Plan
|
|
127
|
-
|
|
128
|
-
- [ ] Section 1: <name> — <scope summary>
|
|
129
|
-
- [ ] Section 2: <name> — <scope summary> (after section 1)
|
|
130
|
-
- [ ] Section 3: <name> — <scope summary> (after section 1, 2)
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
**Section guide** (shown instead of Path A/B, in user's configured language):
|
|
134
|
-
|
|
135
|
-
```
|
|
136
|
-
Design has been split into sections.
|
|
137
|
-
|
|
138
|
-
Main design: docs/plans/YYYY-MM-DD-<topic>-design.md
|
|
139
|
-
|
|
140
|
-
Section progress:
|
|
141
|
-
- [ ] Section 1: <name>
|
|
142
|
-
- [ ] Section 2: <name> (after section 1)
|
|
143
|
-
- [ ] Section 3: <name> (after section 1, 2)
|
|
144
|
-
|
|
145
|
-
Run each section in order:
|
|
146
|
-
sd-brainstorm docs/plans/YYYY-MM-DD-<topic>-design.md section 1
|
|
147
|
-
|
|
148
|
-
After each section's brainstorm completes, you can choose Path A/B
|
|
149
|
-
to run plan → plan-dev → check → commit.
|
|
150
|
-
```
|
|
151
|
-
|
|
152
|
-
Do NOT auto-proceed to any section.
|
|
153
|
-
|
|
154
|
-
## After the Design
|
|
155
|
-
|
|
156
|
-
**Documentation:**
|
|
157
|
-
- Write the validated design to `docs/plans/YYYY-MM-DD-<topic>-design.md`
|
|
158
|
-
- Commit the design document to git
|
|
159
|
-
|
|
160
|
-
**Next Steps Guide:**
|
|
161
|
-
|
|
162
|
-
Present the following two workflow paths so the user can see the full process and choose.
|
|
163
|
-
Display the guide in the **user's configured language** (follow the language settings from CLAUDE.md or system instructions).
|
|
164
|
-
|
|
165
|
-
Before presenting, check git status for uncommitted changes. If there are any uncommitted changes (staged, unstaged, or untracked files), append the warning line (shown below) at the end of the guide block.
|
|
166
|
-
|
|
167
|
-
```
|
|
168
|
-
Design complete! Here's how to proceed:
|
|
169
|
-
|
|
170
|
-
--- Path A: With branch isolation (recommended for features/large changes) ---
|
|
171
|
-
|
|
172
|
-
1. /sd-worktree add <name> — Create a worktree branch
|
|
173
|
-
2. /sd-plan — Break into detailed tasks
|
|
174
|
-
3. /sd-plan-dev — Execute tasks in parallel (includes TDD + review)
|
|
175
|
-
4. /sd-check — Verify (modified + dependents)
|
|
176
|
-
5. /sd-commit — Commit
|
|
177
|
-
6. /sd-worktree merge — Merge back to main
|
|
178
|
-
7. /sd-worktree clean — Remove worktree
|
|
179
|
-
|
|
180
|
-
--- Path B: Direct on current branch (quick fixes/small changes) ---
|
|
181
|
-
|
|
182
|
-
1. /sd-plan — Break into detailed tasks
|
|
183
|
-
2. /sd-plan-dev — Execute tasks in parallel (includes TDD + review)
|
|
184
|
-
3. /sd-check — Verify (modified + dependents)
|
|
185
|
-
4. /sd-commit — Commit
|
|
186
|
-
|
|
187
|
-
You can start from any step or skip steps as needed.
|
|
188
|
-
|
|
189
|
-
💡 "Path A: yolo" or "Path B: yolo" to auto-run all steps
|
|
190
|
-
|
|
191
|
-
⚠️ You have uncommitted changes. To use Path A, run `/sd-commit all` first.
|
|
192
|
-
```
|
|
193
|
-
|
|
194
|
-
- The last `⚠️` line is only shown when uncommitted changes exist. Omit it when working tree is clean.
|
|
195
|
-
- If the design does NOT involve code modifications, omit the `/sd-check` step from both paths.
|
|
196
|
-
|
|
197
|
-
- After presenting both paths, **recommend one** based on the design's scope:
|
|
198
|
-
- Path A recommended: new features, multi-file changes, architectural changes, anything that benefits from isolation
|
|
199
|
-
- Path B recommended: small bug fixes, single-file changes, config tweaks, minor adjustments
|
|
200
|
-
- Briefly explain why (1 sentence)
|
|
201
|
-
- Do NOT auto-proceed to any step. Present the overview with recommendation and wait for the user's choice.
|
|
202
|
-
- **Yolo mode**: If the user responds with "Path A: yolo" or "Path B: yolo" (or similar intent like "A yolo", "B auto"), execute all steps of the chosen path sequentially without stopping between steps.
|
|
203
|
-
- **Yolo sd-check — include dependents**: NEVER check only modified packages. Also check all packages that depend on them:
|
|
204
|
-
1. Identify modified packages from `git diff --name-only`
|
|
205
|
-
2. Trace reverse dependencies (packages that import from modified packages) using `package.json` or project dependency graph
|
|
206
|
-
3. Include integration/e2e tests that cover the modified packages
|
|
207
|
-
4. Run `/sd-check` with all affected paths, or `/sd-check` without path (whole project) when changes are widespread
|
|
208
|
-
|
|
209
|
-
## Key Principles
|
|
210
|
-
|
|
211
|
-
- **One question at a time** - Don't overwhelm with multiple questions
|
|
212
|
-
- **Multiple choice preferred** - Easier to answer than open-ended when possible
|
|
213
|
-
- **YAGNI ruthlessly** - Remove unnecessary features from all designs
|
|
214
|
-
- **Explore alternatives** - Always propose 2-3 approaches before settling
|
|
215
|
-
- **Be flexible** - Go back and clarify when something doesn't make sense
|
|
@@ -1,158 +0,0 @@
|
|
|
1
|
-
// Complete implementation of condition-based waiting utilities
|
|
2
|
-
// From: Lace test infrastructure improvements (2025-10-03)
|
|
3
|
-
// Context: Fixed 15 flaky tests by replacing arbitrary timeouts
|
|
4
|
-
|
|
5
|
-
import type { ThreadManager } from "~/threads/thread-manager";
|
|
6
|
-
import type { LaceEvent, LaceEventType } from "~/threads/types";
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Wait for a specific event type to appear in thread
|
|
10
|
-
*
|
|
11
|
-
* @param threadManager - The thread manager to query
|
|
12
|
-
* @param threadId - Thread to check for events
|
|
13
|
-
* @param eventType - Type of event to wait for
|
|
14
|
-
* @param timeoutMs - Maximum time to wait (default 5000ms)
|
|
15
|
-
* @returns Promise resolving to the first matching event
|
|
16
|
-
*
|
|
17
|
-
* Example:
|
|
18
|
-
* await waitForEvent(threadManager, agentThreadId, 'TOOL_RESULT');
|
|
19
|
-
*/
|
|
20
|
-
export function waitForEvent(
|
|
21
|
-
threadManager: ThreadManager,
|
|
22
|
-
threadId: string,
|
|
23
|
-
eventType: LaceEventType,
|
|
24
|
-
timeoutMs = 5000,
|
|
25
|
-
): Promise<LaceEvent> {
|
|
26
|
-
return new Promise((resolve, reject) => {
|
|
27
|
-
const startTime = Date.now();
|
|
28
|
-
|
|
29
|
-
const check = () => {
|
|
30
|
-
const events = threadManager.getEvents(threadId);
|
|
31
|
-
const event = events.find((e) => e.type === eventType);
|
|
32
|
-
|
|
33
|
-
if (event) {
|
|
34
|
-
resolve(event);
|
|
35
|
-
} else if (Date.now() - startTime > timeoutMs) {
|
|
36
|
-
reject(new Error(`Timeout waiting for ${eventType} event after ${timeoutMs}ms`));
|
|
37
|
-
} else {
|
|
38
|
-
setTimeout(check, 10); // Poll every 10ms for efficiency
|
|
39
|
-
}
|
|
40
|
-
};
|
|
41
|
-
|
|
42
|
-
check();
|
|
43
|
-
});
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
/**
|
|
47
|
-
* Wait for a specific number of events of a given type
|
|
48
|
-
*
|
|
49
|
-
* @param threadManager - The thread manager to query
|
|
50
|
-
* @param threadId - Thread to check for events
|
|
51
|
-
* @param eventType - Type of event to wait for
|
|
52
|
-
* @param count - Number of events to wait for
|
|
53
|
-
* @param timeoutMs - Maximum time to wait (default 5000ms)
|
|
54
|
-
* @returns Promise resolving to all matching events once count is reached
|
|
55
|
-
*
|
|
56
|
-
* Example:
|
|
57
|
-
* // Wait for 2 AGENT_MESSAGE events (initial response + continuation)
|
|
58
|
-
* await waitForEventCount(threadManager, agentThreadId, 'AGENT_MESSAGE', 2);
|
|
59
|
-
*/
|
|
60
|
-
export function waitForEventCount(
|
|
61
|
-
threadManager: ThreadManager,
|
|
62
|
-
threadId: string,
|
|
63
|
-
eventType: LaceEventType,
|
|
64
|
-
count: number,
|
|
65
|
-
timeoutMs = 5000,
|
|
66
|
-
): Promise<LaceEvent[]> {
|
|
67
|
-
return new Promise((resolve, reject) => {
|
|
68
|
-
const startTime = Date.now();
|
|
69
|
-
|
|
70
|
-
const check = () => {
|
|
71
|
-
const events = threadManager.getEvents(threadId);
|
|
72
|
-
const matchingEvents = events.filter((e) => e.type === eventType);
|
|
73
|
-
|
|
74
|
-
if (matchingEvents.length >= count) {
|
|
75
|
-
resolve(matchingEvents);
|
|
76
|
-
} else if (Date.now() - startTime > timeoutMs) {
|
|
77
|
-
reject(
|
|
78
|
-
new Error(
|
|
79
|
-
`Timeout waiting for ${count} ${eventType} events after ${timeoutMs}ms (got ${matchingEvents.length})`,
|
|
80
|
-
),
|
|
81
|
-
);
|
|
82
|
-
} else {
|
|
83
|
-
setTimeout(check, 10);
|
|
84
|
-
}
|
|
85
|
-
};
|
|
86
|
-
|
|
87
|
-
check();
|
|
88
|
-
});
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
/**
|
|
92
|
-
* Wait for an event matching a custom predicate
|
|
93
|
-
* Useful when you need to check event data, not just type
|
|
94
|
-
*
|
|
95
|
-
* @param threadManager - The thread manager to query
|
|
96
|
-
* @param threadId - Thread to check for events
|
|
97
|
-
* @param predicate - Function that returns true when event matches
|
|
98
|
-
* @param description - Human-readable description for error messages
|
|
99
|
-
* @param timeoutMs - Maximum time to wait (default 5000ms)
|
|
100
|
-
* @returns Promise resolving to the first matching event
|
|
101
|
-
*
|
|
102
|
-
* Example:
|
|
103
|
-
* // Wait for TOOL_RESULT with specific ID
|
|
104
|
-
* await waitForEventMatch(
|
|
105
|
-
* threadManager,
|
|
106
|
-
* agentThreadId,
|
|
107
|
-
* (e) => e.type === 'TOOL_RESULT' && e.data.id === 'call_123',
|
|
108
|
-
* 'TOOL_RESULT with id=call_123'
|
|
109
|
-
* );
|
|
110
|
-
*/
|
|
111
|
-
export function waitForEventMatch(
|
|
112
|
-
threadManager: ThreadManager,
|
|
113
|
-
threadId: string,
|
|
114
|
-
predicate: (event: LaceEvent) => boolean,
|
|
115
|
-
description: string,
|
|
116
|
-
timeoutMs = 5000,
|
|
117
|
-
): Promise<LaceEvent> {
|
|
118
|
-
return new Promise((resolve, reject) => {
|
|
119
|
-
const startTime = Date.now();
|
|
120
|
-
|
|
121
|
-
const check = () => {
|
|
122
|
-
const events = threadManager.getEvents(threadId);
|
|
123
|
-
const event = events.find(predicate);
|
|
124
|
-
|
|
125
|
-
if (event) {
|
|
126
|
-
resolve(event);
|
|
127
|
-
} else if (Date.now() - startTime > timeoutMs) {
|
|
128
|
-
reject(new Error(`Timeout waiting for ${description} after ${timeoutMs}ms`));
|
|
129
|
-
} else {
|
|
130
|
-
setTimeout(check, 10);
|
|
131
|
-
}
|
|
132
|
-
};
|
|
133
|
-
|
|
134
|
-
check();
|
|
135
|
-
});
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
// Usage example from actual debugging session:
|
|
139
|
-
//
|
|
140
|
-
// BEFORE (flaky):
|
|
141
|
-
// ---------------
|
|
142
|
-
// const messagePromise = agent.sendMessage('Execute tools');
|
|
143
|
-
// await new Promise(r => setTimeout(r, 300)); // Hope tools start in 300ms
|
|
144
|
-
// agent.abort();
|
|
145
|
-
// await messagePromise;
|
|
146
|
-
// await new Promise(r => setTimeout(r, 50)); // Hope results arrive in 50ms
|
|
147
|
-
// expect(toolResults.length).toBe(2); // Fails randomly
|
|
148
|
-
//
|
|
149
|
-
// AFTER (reliable):
|
|
150
|
-
// ----------------
|
|
151
|
-
// const messagePromise = agent.sendMessage('Execute tools');
|
|
152
|
-
// await waitForEventCount(threadManager, threadId, 'TOOL_CALL', 2); // Wait for tools to start
|
|
153
|
-
// agent.abort();
|
|
154
|
-
// await messagePromise;
|
|
155
|
-
// await waitForEventCount(threadManager, threadId, 'TOOL_RESULT', 2); // Wait for results
|
|
156
|
-
// expect(toolResults.length).toBe(2); // Always succeeds
|
|
157
|
-
//
|
|
158
|
-
// Result: 60% pass rate → 100%, 40% faster execution
|
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
# Condition-Based Waiting
|
|
2
|
-
|
|
3
|
-
## Overview
|
|
4
|
-
|
|
5
|
-
Flaky tests often guess at timing with arbitrary delays. This creates race conditions where tests pass on fast machines but fail under load or in CI.
|
|
6
|
-
|
|
7
|
-
**Core principle:** Wait for the actual condition you care about, not a guess about how long it takes.
|
|
8
|
-
|
|
9
|
-
## When to Use
|
|
10
|
-
|
|
11
|
-
```mermaid
|
|
12
|
-
flowchart TD
|
|
13
|
-
A{"Test uses setTimeout/sleep?"} -->|yes| B{"Testing timing behavior?"}
|
|
14
|
-
B -->|yes| C[Document WHY timeout needed]
|
|
15
|
-
B -->|no| D[Use condition-based waiting]
|
|
16
|
-
```
|
|
17
|
-
|
|
18
|
-
**Use when:**
|
|
19
|
-
|
|
20
|
-
- Tests have arbitrary delays (`setTimeout`, `sleep`, `time.sleep()`)
|
|
21
|
-
- Tests are flaky (pass sometimes, fail under load)
|
|
22
|
-
- Tests timeout when run in parallel
|
|
23
|
-
- Waiting for async operations to complete
|
|
24
|
-
|
|
25
|
-
**Don't use when:**
|
|
26
|
-
|
|
27
|
-
- Testing actual timing behavior (debounce, throttle intervals)
|
|
28
|
-
- Always document WHY if using arbitrary timeout
|
|
29
|
-
|
|
30
|
-
## Core Pattern
|
|
31
|
-
|
|
32
|
-
```typescript
|
|
33
|
-
// ❌ BEFORE: Guessing at timing
|
|
34
|
-
await new Promise((r) => setTimeout(r, 50));
|
|
35
|
-
const result = getResult();
|
|
36
|
-
expect(result).toBeDefined();
|
|
37
|
-
|
|
38
|
-
// ✅ AFTER: Waiting for condition
|
|
39
|
-
await waitFor(() => getResult() !== undefined);
|
|
40
|
-
const result = getResult();
|
|
41
|
-
expect(result).toBeDefined();
|
|
42
|
-
```
|
|
43
|
-
|
|
44
|
-
## Quick Patterns
|
|
45
|
-
|
|
46
|
-
| Scenario | Pattern |
|
|
47
|
-
| ----------------- | ---------------------------------------------------- |
|
|
48
|
-
| Wait for event | `waitFor(() => events.find(e => e.type === 'DONE'))` |
|
|
49
|
-
| Wait for state | `waitFor(() => machine.state === 'ready')` |
|
|
50
|
-
| Wait for count | `waitFor(() => items.length >= 5)` |
|
|
51
|
-
| Wait for file | `waitFor(() => fs.existsSync(path))` |
|
|
52
|
-
| Complex condition | `waitFor(() => obj.ready && obj.value > 10)` |
|
|
53
|
-
|
|
54
|
-
## Implementation
|
|
55
|
-
|
|
56
|
-
Generic polling function:
|
|
57
|
-
|
|
58
|
-
```typescript
|
|
59
|
-
async function waitFor<T>(
|
|
60
|
-
condition: () => T | undefined | null | false,
|
|
61
|
-
description: string,
|
|
62
|
-
timeoutMs = 5000,
|
|
63
|
-
): Promise<T> {
|
|
64
|
-
const startTime = Date.now();
|
|
65
|
-
|
|
66
|
-
while (true) {
|
|
67
|
-
const result = condition();
|
|
68
|
-
if (result) return result;
|
|
69
|
-
|
|
70
|
-
if (Date.now() - startTime > timeoutMs) {
|
|
71
|
-
throw new Error(`Timeout waiting for ${description} after ${timeoutMs}ms`);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
await new Promise((r) => setTimeout(r, 10)); // Poll every 10ms
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
See `condition-based-waiting-example.ts` in this directory for complete implementation with domain-specific helpers (`waitForEvent`, `waitForEventCount`, `waitForEventMatch`) from actual debugging session.
|
|
80
|
-
|
|
81
|
-
## Common Mistakes
|
|
82
|
-
|
|
83
|
-
**❌ Polling too fast:** `setTimeout(check, 1)` - wastes CPU
|
|
84
|
-
**✅ Fix:** Poll every 10ms
|
|
85
|
-
|
|
86
|
-
**❌ No timeout:** Loop forever if condition never met
|
|
87
|
-
**✅ Fix:** Always include timeout with clear error
|
|
88
|
-
|
|
89
|
-
**❌ Stale data:** Cache state before loop
|
|
90
|
-
**✅ Fix:** Call getter inside loop for fresh data
|
|
91
|
-
|
|
92
|
-
## When Arbitrary Timeout IS Correct
|
|
93
|
-
|
|
94
|
-
```typescript
|
|
95
|
-
// Tool ticks every 100ms - need 2 ticks to verify partial output
|
|
96
|
-
await waitForEvent(manager, "TOOL_STARTED"); // First: wait for condition
|
|
97
|
-
await new Promise((r) => setTimeout(r, 200)); // Then: wait for timed behavior
|
|
98
|
-
// 200ms = 2 ticks at 100ms intervals - documented and justified
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
**Requirements:**
|
|
102
|
-
|
|
103
|
-
1. First wait for triggering condition
|
|
104
|
-
2. Based on known timing (not guessing)
|
|
105
|
-
3. Comment explaining WHY
|
|
106
|
-
|
|
107
|
-
## Real-World Impact
|
|
108
|
-
|
|
109
|
-
From debugging session (2025-10-03):
|
|
110
|
-
|
|
111
|
-
- Fixed 15 flaky tests across 3 files
|
|
112
|
-
- Pass rate: 60% → 100%
|
|
113
|
-
- Execution time: 40% faster
|
|
114
|
-
- No more race conditions
|
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
# Defense-in-Depth Validation
|
|
2
|
-
|
|
3
|
-
## Overview
|
|
4
|
-
|
|
5
|
-
When you fix a bug caused by invalid data, adding validation at one place feels sufficient. But that single check can be bypassed by different code paths, refactoring, or mocks.
|
|
6
|
-
|
|
7
|
-
**Core principle:** Validate at EVERY layer data passes through. Make the bug structurally impossible.
|
|
8
|
-
|
|
9
|
-
## Why Multiple Layers
|
|
10
|
-
|
|
11
|
-
Single validation: "We fixed the bug"
|
|
12
|
-
Multiple layers: "We made the bug impossible"
|
|
13
|
-
|
|
14
|
-
Different layers catch different cases:
|
|
15
|
-
|
|
16
|
-
- Entry validation catches most bugs
|
|
17
|
-
- Business logic catches edge cases
|
|
18
|
-
- Environment guards prevent context-specific dangers
|
|
19
|
-
- Debug logging helps when other layers fail
|
|
20
|
-
|
|
21
|
-
## The Four Layers
|
|
22
|
-
|
|
23
|
-
### Layer 1: Entry Point Validation
|
|
24
|
-
|
|
25
|
-
**Purpose:** Reject obviously invalid input at API boundary
|
|
26
|
-
|
|
27
|
-
```typescript
|
|
28
|
-
function createProject(name: string, workingDirectory: string) {
|
|
29
|
-
if (!workingDirectory || workingDirectory.trim() === "") {
|
|
30
|
-
throw new Error("workingDirectory cannot be empty");
|
|
31
|
-
}
|
|
32
|
-
if (!existsSync(workingDirectory)) {
|
|
33
|
-
throw new Error(`workingDirectory does not exist: ${workingDirectory}`);
|
|
34
|
-
}
|
|
35
|
-
if (!statSync(workingDirectory).isDirectory()) {
|
|
36
|
-
throw new Error(`workingDirectory is not a directory: ${workingDirectory}`);
|
|
37
|
-
}
|
|
38
|
-
// ... proceed
|
|
39
|
-
}
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
### Layer 2: Business Logic Validation
|
|
43
|
-
|
|
44
|
-
**Purpose:** Ensure data makes sense for this operation
|
|
45
|
-
|
|
46
|
-
```typescript
|
|
47
|
-
function initializeWorkspace(projectDir: string, sessionId: string) {
|
|
48
|
-
if (!projectDir) {
|
|
49
|
-
throw new Error("projectDir required for workspace initialization");
|
|
50
|
-
}
|
|
51
|
-
// ... proceed
|
|
52
|
-
}
|
|
53
|
-
```
|
|
54
|
-
|
|
55
|
-
### Layer 3: Environment Guards
|
|
56
|
-
|
|
57
|
-
**Purpose:** Prevent dangerous operations in specific contexts
|
|
58
|
-
|
|
59
|
-
```typescript
|
|
60
|
-
async function gitInit(directory: string) {
|
|
61
|
-
// In tests, refuse git init outside temp directories
|
|
62
|
-
if (process.env.NODE_ENV === "test") {
|
|
63
|
-
const normalized = normalize(resolve(directory));
|
|
64
|
-
const tmpDir = normalize(resolve(tmpdir()));
|
|
65
|
-
|
|
66
|
-
if (!normalized.startsWith(tmpDir)) {
|
|
67
|
-
throw new Error(`Refusing git init outside temp dir during tests: ${directory}`);
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
// ... proceed
|
|
71
|
-
}
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
### Layer 4: Debug Instrumentation
|
|
75
|
-
|
|
76
|
-
**Purpose:** Capture context for forensics
|
|
77
|
-
|
|
78
|
-
```typescript
|
|
79
|
-
async function gitInit(directory: string) {
|
|
80
|
-
const stack = new Error().stack;
|
|
81
|
-
logger.debug("About to git init", {
|
|
82
|
-
directory,
|
|
83
|
-
cwd: process.cwd(),
|
|
84
|
-
stack,
|
|
85
|
-
});
|
|
86
|
-
// ... proceed
|
|
87
|
-
}
|
|
88
|
-
```
|
|
89
|
-
|
|
90
|
-
## Applying the Pattern
|
|
91
|
-
|
|
92
|
-
When you find a bug:
|
|
93
|
-
|
|
94
|
-
1. **Trace the data flow** - Where does bad value originate? Where used?
|
|
95
|
-
2. **Map all checkpoints** - List every point data passes through
|
|
96
|
-
3. **Add validation at each layer** - Entry, business, environment, debug
|
|
97
|
-
4. **Test each layer** - Try to bypass layer 1, verify layer 2 catches it
|
|
98
|
-
|
|
99
|
-
## Example from Session
|
|
100
|
-
|
|
101
|
-
Bug: Empty `projectDir` caused `git init` in source code
|
|
102
|
-
|
|
103
|
-
**Data flow:**
|
|
104
|
-
|
|
105
|
-
1. Test setup → empty string
|
|
106
|
-
2. `Project.create(name, '')`
|
|
107
|
-
3. `WorkspaceManager.createWorkspace('')`
|
|
108
|
-
4. `git init` runs in `process.cwd()`
|
|
109
|
-
|
|
110
|
-
**Four layers added:**
|
|
111
|
-
|
|
112
|
-
- Layer 1: `Project.create()` validates not empty/exists/writable
|
|
113
|
-
- Layer 2: `WorkspaceManager` validates projectDir not empty
|
|
114
|
-
- Layer 3: `WorktreeManager` refuses git init outside tmpdir in tests
|
|
115
|
-
- Layer 4: Stack trace logging before git init
|
|
116
|
-
|
|
117
|
-
**Result:** All 1847 tests passed, bug impossible to reproduce
|
|
118
|
-
|
|
119
|
-
## Key Insight
|
|
120
|
-
|
|
121
|
-
All four layers were necessary. During testing, each layer caught bugs the others missed:
|
|
122
|
-
|
|
123
|
-
- Different code paths bypassed entry validation
|
|
124
|
-
- Mocks bypassed business logic checks
|
|
125
|
-
- Edge cases on different platforms needed environment guards
|
|
126
|
-
- Debug logging identified structural misuse
|
|
127
|
-
|
|
128
|
-
**Don't stop at one validation point.** Add checks at every layer.
|
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# Bisection script for finding which test creates unwanted files/directories
|
|
3
|
-
# Usage: ./find-polluter.sh <file_or_dir_to_check> <test_pattern>
|
|
4
|
-
# Example: ./find-polluter.sh '.git' 'src/**/*.test.ts'
|
|
5
|
-
|
|
6
|
-
set -e
|
|
7
|
-
|
|
8
|
-
if [ "$#" -ne 2 ]; then
|
|
9
|
-
echo "Usage: $0 <file_or_dir_to_check> <test_pattern>"
|
|
10
|
-
echo "Example: $0 '.git' 'src/**/*.test.ts'"
|
|
11
|
-
exit 1
|
|
12
|
-
fi
|
|
13
|
-
|
|
14
|
-
CHECK_PATH="$1"
|
|
15
|
-
TEST_PATTERN="$2"
|
|
16
|
-
|
|
17
|
-
# Detect package manager: pnpm -> yarn -> npm (default)
|
|
18
|
-
if [ -f "pnpm-lock.yaml" ]; then PM="pnpm"
|
|
19
|
-
elif [ -f "yarn.lock" ]; then PM="yarn"
|
|
20
|
-
else PM="npm"
|
|
21
|
-
fi
|
|
22
|
-
|
|
23
|
-
# Find all test files matching pattern
|
|
24
|
-
readarray -t TEST_FILES < <(find . -path "$TEST_PATTERN" -type f)
|
|
25
|
-
|
|
26
|
-
if [ ${#TEST_FILES[@]} -eq 0 ]; then
|
|
27
|
-
echo "No test files found matching pattern: $TEST_PATTERN"
|
|
28
|
-
exit 1
|
|
29
|
-
fi
|
|
30
|
-
|
|
31
|
-
echo "Found ${#TEST_FILES[@]} test files"
|
|
32
|
-
echo "Checking for pollution: $CHECK_PATH"
|
|
33
|
-
echo ""
|
|
34
|
-
|
|
35
|
-
for test_file in "${TEST_FILES[@]}"; do
|
|
36
|
-
# Skip if pollution already exists
|
|
37
|
-
if [ -e "$CHECK_PATH" ]; then
|
|
38
|
-
echo "⚠️ Pollution already exists, skipping to avoid false positive"
|
|
39
|
-
echo " Please remove $CHECK_PATH and re-run"
|
|
40
|
-
exit 1
|
|
41
|
-
fi
|
|
42
|
-
|
|
43
|
-
echo "Testing: $test_file"
|
|
44
|
-
|
|
45
|
-
# Run the test
|
|
46
|
-
$PM test "$test_file" > /dev/null 2>&1 || true
|
|
47
|
-
|
|
48
|
-
# Check if pollution appeared
|
|
49
|
-
if [ -e "$CHECK_PATH" ]; then
|
|
50
|
-
echo ""
|
|
51
|
-
echo "🔴 FOUND POLLUTER: $test_file"
|
|
52
|
-
echo ""
|
|
53
|
-
echo "This test created: $CHECK_PATH"
|
|
54
|
-
ls -la "$CHECK_PATH" 2>/dev/null || echo "(path exists but can't stat)"
|
|
55
|
-
echo ""
|
|
56
|
-
echo "Investigate with:"
|
|
57
|
-
echo " $PM test '$test_file' -- --reporter=verbose"
|
|
58
|
-
echo " git diff"
|
|
59
|
-
exit 0
|
|
60
|
-
fi
|
|
61
|
-
done
|
|
62
|
-
|
|
63
|
-
echo ""
|
|
64
|
-
echo "✅ No polluter found - all ${#TEST_FILES[@]} tests are clean"
|