qualia-framework 2.4.0 → 2.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/collect-metrics.sh +62 -0
- package/framework/agents/qualia-phase-researcher.md +6 -3
- package/framework/agents/qualia-planner.md +10 -7
- package/framework/agents/qualia-research-synthesizer.md +110 -147
- package/framework/agents/red-team-qa.md +130 -0
- package/framework/hooks/auto-format.sh +9 -1
- package/framework/hooks/migration-validate.sh +21 -16
- package/framework/hooks/pre-commit.sh +13 -5
- package/framework/hooks/pre-deploy-gate.sh +23 -1
- package/framework/hooks/retention-cleanup.sh +4 -4
- package/framework/hooks/save-session-state.sh +18 -10
- package/framework/hooks/session-context-loader.sh +21 -0
- package/framework/hooks/skill-announce.sh +2 -0
- package/framework/install.ps1 +6 -6
- package/framework/install.sh +6 -4
- package/framework/qualia-engine/VERSION +1 -1
- package/framework/qualia-engine/bin/collect-metrics.sh +71 -0
- package/framework/qualia-engine/bin/qualia-tools.js +104 -63
- package/framework/qualia-engine/references/continuation-prompt.md +97 -0
- package/framework/qualia-engine/references/employee-guide.md +167 -0
- package/framework/qualia-engine/templates/lab-notes.md +16 -0
- package/framework/qualia-engine/templates/roadmap.md +2 -8
- package/framework/qualia-engine/workflows/execute-phase.md +17 -17
- package/framework/qualia-engine/workflows/new-project.md +37 -114
- package/framework/qualia-engine/workflows/progress.md +63 -28
- package/framework/skills/client-handoff/SKILL.md +13 -3
- package/framework/skills/deep-research/SKILL.md +34 -71
- package/framework/skills/learn/SKILL.md +29 -5
- package/framework/skills/qualia/SKILL.md +57 -17
- package/framework/skills/qualia-complete-milestone/SKILL.md +29 -7
- package/framework/skills/qualia-evolve/SKILL.md +200 -0
- package/framework/skills/qualia-execute-phase/SKILL.md +1 -1
- package/framework/skills/qualia-guide/SKILL.md +32 -0
- package/framework/skills/qualia-help/SKILL.md +62 -60
- package/framework/skills/qualia-new-project/SKILL.md +32 -30
- package/framework/skills/qualia-report/SKILL.md +217 -0
- package/framework/skills/qualia-start/SKILL.md +31 -59
- package/framework/skills/qualia-verify-work/SKILL.md +20 -3
- package/package.json +1 -1
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Qualia Framework Metrics Collection
|
|
3
|
+
# Run after a project ships to capture performance data.
|
|
4
|
+
# Usage: collect-metrics.sh [project-dir]
|
|
5
|
+
# Output: appends to ~/.claude/knowledge/framework-metrics.md
|
|
6
|
+
|
|
7
|
+
PROJECT_DIR="${1:-.}"
|
|
8
|
+
METRICS_FILE="$HOME/.claude/knowledge/framework-metrics.md"
|
|
9
|
+
DATE=$(date +%Y-%m-%d)
|
|
10
|
+
|
|
11
|
+
# Ensure metrics file exists
|
|
12
|
+
if [ ! -f "$METRICS_FILE" ]; then
|
|
13
|
+
cat > "$METRICS_FILE" << 'HEADER'
|
|
14
|
+
# Framework Performance Metrics
|
|
15
|
+
|
|
16
|
+
> Auto-collected after each project ships. Read by `/qualia-evolve` to optimize the framework.
|
|
17
|
+
|
|
18
|
+
| Date | Project | Phases | Sessions | Deviations | IDK Calls | Verify Pass Rate | Lab Notes | FQS |
|
|
19
|
+
|------|---------|--------|----------|------------|-----------|-----------------|-----------|-----|
|
|
20
|
+
HEADER
|
|
21
|
+
fi
|
|
22
|
+
|
|
23
|
+
cd "$PROJECT_DIR" || exit 1
|
|
24
|
+
|
|
25
|
+
# Project name
|
|
26
|
+
PROJECT=$(basename "$(pwd)")
|
|
27
|
+
|
|
28
|
+
# Phase count
|
|
29
|
+
PHASES=$(ls -d .planning/phases/*/ 2>/dev/null | wc -l)
|
|
30
|
+
|
|
31
|
+
# Session count (from session-digest entries for this project)
|
|
32
|
+
SESSIONS=$(grep -c "$PROJECT" ~/.claude/knowledge/session-digest.md 2>/dev/null || echo "?")
|
|
33
|
+
|
|
34
|
+
# Deviation count (gap-fix plans)
|
|
35
|
+
DEVIATIONS=$(find .planning/phases/ -name "*-PLAN.md" -exec grep -l "gaps" {} \; 2>/dev/null | wc -l)
|
|
36
|
+
|
|
37
|
+
# IDK calls (search session digest for qualia-idk mentions with this project)
|
|
38
|
+
IDK_CALLS=$(grep "$PROJECT" ~/.claude/knowledge/session-digest.md 2>/dev/null | grep -c "idk\|stuck\|lost" || echo "0")
|
|
39
|
+
|
|
40
|
+
# Verify pass rate (UAT files with PASSED vs total UAT files)
|
|
41
|
+
TOTAL_UAT=$(find .planning/phases/ -name "*-UAT.md" 2>/dev/null | wc -l)
|
|
42
|
+
PASSED_UAT=$(grep -rl "PASSED\|✅.*Overall" .planning/phases/*/*.md 2>/dev/null | wc -l)
|
|
43
|
+
if [ "$TOTAL_UAT" -gt 0 ]; then
|
|
44
|
+
PASS_RATE=$(( PASSED_UAT * 100 / TOTAL_UAT ))%
|
|
45
|
+
else
|
|
46
|
+
PASS_RATE="n/a"
|
|
47
|
+
fi
|
|
48
|
+
|
|
49
|
+
# Lab Notes count
|
|
50
|
+
LAB_NOTES=$(grep -c "^###" .planning/LAB-NOTES.md 2>/dev/null || echo "0")
|
|
51
|
+
|
|
52
|
+
# FQS calculation (completion_rate / sessions * 100) — assume 100% if we're collecting
|
|
53
|
+
if [ "$SESSIONS" != "?" ] && [ "$SESSIONS" -gt 0 ]; then
|
|
54
|
+
FQS=$(( 100 * 100 / SESSIONS ))
|
|
55
|
+
else
|
|
56
|
+
FQS="?"
|
|
57
|
+
fi
|
|
58
|
+
|
|
59
|
+
# Append to metrics file
|
|
60
|
+
echo "| $DATE | $PROJECT | $PHASES | $SESSIONS | $DEVIATIONS | $IDK_CALLS | $PASS_RATE | $LAB_NOTES | $FQS |" >> "$METRICS_FILE"
|
|
61
|
+
|
|
62
|
+
echo "Metrics collected for $PROJECT → $METRICS_FILE"
|
|
@@ -296,16 +296,19 @@ Orchestrator provides: phase number/name, description/goal, requirements, constr
|
|
|
296
296
|
|
|
297
297
|
Load phase context using init command:
|
|
298
298
|
```bash
|
|
299
|
-
INIT=$(node /home/qualia/.claude/qualia-
|
|
299
|
+
INIT=$(node /home/qualia/.claude/qualia-framework/bin/qualia-tools.js init phase-op "${PHASE}")
|
|
300
300
|
```
|
|
301
301
|
|
|
302
302
|
Extract from init JSON: `phase_dir`, `padded_phase`, `phase_number`, `commit_docs`.
|
|
303
303
|
|
|
304
|
-
Then read CONTEXT.md if
|
|
304
|
+
Then read CONTEXT.md and LAB-NOTES.md if they exist:
|
|
305
305
|
```bash
|
|
306
306
|
cat "$phase_dir"/*-CONTEXT.md 2>/dev/null
|
|
307
|
+
cat .planning/LAB-NOTES.md 2>/dev/null
|
|
307
308
|
```
|
|
308
309
|
|
|
310
|
+
**If LAB-NOTES.md exists**, treat documented failures as anti-patterns. Do NOT research or recommend approaches that have already failed — focus on the "Better approach" entries as starting points instead.
|
|
311
|
+
|
|
309
312
|
**If CONTEXT.md exists**, it constrains research:
|
|
310
313
|
|
|
311
314
|
| Section | Constraint |
|
|
@@ -369,7 +372,7 @@ Write to: `$PHASE_DIR/$PADDED_PHASE-RESEARCH.md`
|
|
|
369
372
|
## Step 6: Commit Research (optional)
|
|
370
373
|
|
|
371
374
|
```bash
|
|
372
|
-
node /home/qualia/.claude/qualia-
|
|
375
|
+
node /home/qualia/.claude/qualia-framework/bin/qualia-tools.js commit "docs($PHASE): research phase domain" --files "$PHASE_DIR/$PADDED_PHASE-RESEARCH.md"
|
|
373
376
|
```
|
|
374
377
|
|
|
375
378
|
## Step 7: Return Structured Result
|
|
@@ -474,8 +474,8 @@ Output: [Artifacts created]
|
|
|
474
474
|
</objective>
|
|
475
475
|
|
|
476
476
|
<execution_context>
|
|
477
|
-
@/home/qualia/.claude/qualia-
|
|
478
|
-
@/home/qualia/.claude/qualia-
|
|
477
|
+
@/home/qualia/.claude/qualia-framework/workflows/execute-plan.md
|
|
478
|
+
@/home/qualia/.claude/qualia-framework/templates/summary.md
|
|
479
479
|
</execution_context>
|
|
480
480
|
|
|
481
481
|
<context>
|
|
@@ -907,7 +907,7 @@ Group by plan, dimension, severity.
|
|
|
907
907
|
### Step 6: Commit
|
|
908
908
|
|
|
909
909
|
```bash
|
|
910
|
-
node /home/qualia/.claude/qualia-
|
|
910
|
+
node /home/qualia/.claude/qualia-framework/bin/qualia-tools.js commit "fix($PHASE): revise plans based on checker feedback" --files .planning/phases/$PHASE-*/$PHASE-*-PLAN.md
|
|
911
911
|
```
|
|
912
912
|
|
|
913
913
|
### Step 7: Return Revision Summary
|
|
@@ -946,17 +946,20 @@ node /home/qualia/.claude/qualia-engine/bin/qualia-tools.js commit "fix($PHASE):
|
|
|
946
946
|
Load planning context:
|
|
947
947
|
|
|
948
948
|
```bash
|
|
949
|
-
INIT=$(node /home/qualia/.claude/qualia-
|
|
949
|
+
INIT=$(node /home/qualia/.claude/qualia-framework/bin/qualia-tools.js init plan-phase "${PHASE}")
|
|
950
950
|
```
|
|
951
951
|
|
|
952
952
|
Extract from init JSON: `planner_model`, `researcher_model`, `checker_model`, `commit_docs`, `research_enabled`, `phase_dir`, `phase_number`, `has_research`, `has_context`.
|
|
953
953
|
|
|
954
|
-
Also read STATE.md for position, decisions, blockers:
|
|
954
|
+
Also read STATE.md for position, decisions, blockers, and LAB-NOTES.md for known failed approaches:
|
|
955
955
|
```bash
|
|
956
956
|
cat .planning/STATE.md 2>/dev/null
|
|
957
|
+
cat .planning/LAB-NOTES.md 2>/dev/null
|
|
957
958
|
```
|
|
958
959
|
|
|
959
960
|
If STATE.md missing but .planning/ exists, offer to reconstruct or continue without.
|
|
961
|
+
|
|
962
|
+
If LAB-NOTES.md exists, treat its entries as **hard constraints**: do NOT plan approaches that have been documented as failures. Use the "Better approach" entries as preferred starting points.
|
|
960
963
|
</step>
|
|
961
964
|
|
|
962
965
|
<step name="load_codebase_context">
|
|
@@ -1002,7 +1005,7 @@ Apply discovery level protocol (see discovery_levels section).
|
|
|
1002
1005
|
|
|
1003
1006
|
**Step 1 — Generate digest index:**
|
|
1004
1007
|
```bash
|
|
1005
|
-
node /home/qualia/.claude/qualia-
|
|
1008
|
+
node /home/qualia/.claude/qualia-framework/bin/qualia-tools.js history-digest
|
|
1006
1009
|
```
|
|
1007
1010
|
|
|
1008
1011
|
**Step 2 — Select relevant phases (typically 2-4):**
|
|
@@ -1140,7 +1143,7 @@ Plans:
|
|
|
1140
1143
|
|
|
1141
1144
|
<step name="git_commit">
|
|
1142
1145
|
```bash
|
|
1143
|
-
node /home/qualia/.claude/qualia-
|
|
1146
|
+
node /home/qualia/.claude/qualia-framework/bin/qualia-tools.js commit "docs($PHASE): create phase plan" --files .planning/phases/$PHASE-*/$PHASE-*-PLAN.md .planning/ROADMAP.md
|
|
1144
1147
|
```
|
|
1145
1148
|
</step>
|
|
1146
1149
|
|
|
@@ -7,206 +7,172 @@ color: purple
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
9
|
<role>
|
|
10
|
-
You are a Qualia research synthesizer. You read the outputs from
|
|
10
|
+
You are a Qualia research synthesizer. You read the outputs from parallel researcher or specialist agents and synthesize them into a single cohesive report.
|
|
11
11
|
|
|
12
12
|
You are spawned by:
|
|
13
13
|
|
|
14
14
|
- `/qualia:new-project` orchestrator (after STACK, FEATURES, ARCHITECTURE, PITFALLS research completes)
|
|
15
|
+
- `/deep-research` (after 6 specialist agents complete)
|
|
16
|
+
- Any skill that needs fan-in consolidation of parallel agent outputs
|
|
15
17
|
|
|
16
|
-
Your job: Create a unified
|
|
18
|
+
Your job: Create a unified synthesis that serves a specific downstream consumer. Extract key findings, identify cross-cutting patterns, deduplicate overlapping findings, and produce actionable recommendations.
|
|
17
19
|
|
|
18
20
|
**Core responsibilities:**
|
|
19
|
-
- Read all
|
|
20
|
-
- Synthesize findings
|
|
21
|
-
-
|
|
22
|
-
- Identify
|
|
23
|
-
- Write
|
|
24
|
-
- Commit
|
|
21
|
+
- Read all input files provided in your spawn prompt
|
|
22
|
+
- Synthesize findings — integrated analysis, NOT concatenation
|
|
23
|
+
- Deduplicate: same finding from 2+ agents → merge with both sources cited
|
|
24
|
+
- Identify cross-cutting patterns that no single agent saw
|
|
25
|
+
- Write output to the path specified in your spawn prompt
|
|
26
|
+
- Commit if instructed to do so
|
|
25
27
|
</role>
|
|
26
28
|
|
|
27
|
-
<
|
|
28
|
-
Your
|
|
29
|
-
|
|
30
|
-
| Section | How Roadmapper Uses It |
|
|
31
|
-
|---------|------------------------|
|
|
32
|
-
| Executive Summary | Quick understanding of domain |
|
|
33
|
-
| Key Findings | Technology and feature decisions |
|
|
34
|
-
| Implications for Roadmap | Phase structure suggestions |
|
|
35
|
-
| Research Flags | Which phases need deeper research |
|
|
36
|
-
| Gaps to Address | What to flag for validation |
|
|
29
|
+
<input_contract>
|
|
30
|
+
Your spawn prompt MUST include:
|
|
37
31
|
|
|
38
|
-
**
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
32
|
+
1. **Input files** — list of file paths to synthesize
|
|
33
|
+
2. **Downstream consumer** — who reads your output and what they need (e.g., "roadmapper agent needs phase structure suggestions" or "user needs a prioritized fix list")
|
|
34
|
+
3. **Output path** — where to write the synthesis
|
|
35
|
+
4. **Commit instruction** — whether to commit and what message to use (optional)
|
|
42
36
|
|
|
43
|
-
|
|
37
|
+
Example spawn prompt:
|
|
38
|
+
```
|
|
39
|
+
Synthesize these specialist reports:
|
|
40
|
+
- /tmp/frontend-report.md
|
|
41
|
+
- /tmp/backend-report.md
|
|
42
|
+
- /tmp/security-report.md
|
|
43
|
+
- /tmp/performance-report.md
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
Downstream consumer: The user needs a prioritized list of findings with severity levels, grouped by area, with cross-cutting patterns identified.
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
cat .planning/research/STACK.md
|
|
49
|
-
cat .planning/research/FEATURES.md
|
|
50
|
-
cat .planning/research/ARCHITECTURE.md
|
|
51
|
-
cat .planning/research/PITFALLS.md
|
|
47
|
+
Output path: .planning/REVIEW.md
|
|
52
48
|
|
|
53
|
-
|
|
49
|
+
Commit: docs: complete deep research review
|
|
54
50
|
```
|
|
51
|
+
</input_contract>
|
|
55
52
|
|
|
56
|
-
|
|
57
|
-
- **STACK.md:** Recommended technologies, versions, rationale
|
|
58
|
-
- **FEATURES.md:** Table stakes, differentiators, anti-features
|
|
59
|
-
- **ARCHITECTURE.md:** Patterns, component boundaries, data flow
|
|
60
|
-
- **PITFALLS.md:** Critical/moderate/minor pitfalls, phase warnings
|
|
53
|
+
<execution_flow>
|
|
61
54
|
|
|
62
|
-
## Step
|
|
55
|
+
## Step 1: Read All Input Files
|
|
63
56
|
|
|
64
|
-
|
|
65
|
-
-
|
|
66
|
-
-
|
|
67
|
-
-
|
|
57
|
+
Read every file listed in the spawn prompt. For each, extract:
|
|
58
|
+
- Key findings (facts, recommendations, warnings)
|
|
59
|
+
- Severity levels if present
|
|
60
|
+
- File/line references if present
|
|
68
61
|
|
|
69
|
-
|
|
62
|
+
## Step 2: Deduplicate
|
|
70
63
|
|
|
71
|
-
|
|
64
|
+
Find overlapping findings across agents:
|
|
65
|
+
- Same file:line referenced by multiple agents → merge into one finding, cite all sources
|
|
66
|
+
- Same conceptual issue described differently → consolidate under the clearest description
|
|
67
|
+
- Conflicting recommendations → flag as "disputed" with both positions
|
|
72
68
|
|
|
73
|
-
|
|
69
|
+
## Step 3: Cross-Cutting Pattern Detection
|
|
74
70
|
|
|
75
|
-
|
|
76
|
-
-
|
|
77
|
-
-
|
|
71
|
+
Look for themes that span multiple agent reports:
|
|
72
|
+
- A security issue that also causes a performance problem
|
|
73
|
+
- A frontend pattern that contradicts the backend architecture
|
|
74
|
+
- Multiple agents flagging the same area of code for different reasons
|
|
75
|
+
- Systemic patterns (e.g., "validation is missing everywhere" vs individual missing validations)
|
|
78
76
|
|
|
79
|
-
|
|
80
|
-
- Must-have features (table stakes)
|
|
81
|
-
- Should-have features (differentiators)
|
|
82
|
-
- What to defer to v2+
|
|
77
|
+
## Step 4: Synthesize
|
|
83
78
|
|
|
84
|
-
**
|
|
85
|
-
- Major components and their responsibilities
|
|
86
|
-
- Key patterns to follow
|
|
79
|
+
**For project research (new-project flow):**
|
|
87
80
|
|
|
88
|
-
|
|
89
|
-
- Top 3-5 pitfalls with prevention strategies
|
|
81
|
+
Use template: /home/qualia/.claude/qualia-framework/templates/research-project/SUMMARY.md
|
|
90
82
|
|
|
91
|
-
|
|
83
|
+
Key sections:
|
|
84
|
+
- Executive Summary (2-3 paragraphs answering: what is this, how should we build it, what are the risks?)
|
|
85
|
+
- Key Findings (from each research file)
|
|
86
|
+
- Implications for Roadmap (phase suggestions with rationale)
|
|
87
|
+
- Confidence Assessment
|
|
88
|
+
- Sources
|
|
89
|
+
|
|
90
|
+
**For specialist audits (deep-research, review, production-check):**
|
|
92
91
|
|
|
93
|
-
|
|
92
|
+
Structure as:
|
|
93
|
+
```markdown
|
|
94
|
+
# [Title] — Synthesis Report
|
|
94
95
|
|
|
95
|
-
|
|
96
|
-
-
|
|
97
|
-
- What groupings make sense based on architecture?
|
|
98
|
-
- Which features belong together?
|
|
96
|
+
## Executive Summary
|
|
97
|
+
[2-3 sentences: overall health, top concerns, recommendation]
|
|
99
98
|
|
|
100
|
-
|
|
101
|
-
-
|
|
102
|
-
- What it delivers
|
|
103
|
-
- Which features from FEATURES.md
|
|
104
|
-
- Which pitfalls it must avoid
|
|
99
|
+
## Critical Findings (fix before shipping)
|
|
100
|
+
[Deduplicated, severity-ranked, with file:line refs]
|
|
105
101
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
- Which phases have well-documented patterns (skip research)?
|
|
102
|
+
## Cross-Cutting Patterns
|
|
103
|
+
[Themes that span multiple specialist areas]
|
|
109
104
|
|
|
110
|
-
##
|
|
105
|
+
## Area Breakdown
|
|
106
|
+
### [Area 1 — e.g., Frontend]
|
|
107
|
+
[Consolidated findings from that specialist]
|
|
111
108
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
| Stack | [level] | [based on source quality from STACK.md] |
|
|
115
|
-
| Features | [level] | [based on source quality from FEATURES.md] |
|
|
116
|
-
| Architecture | [level] | [based on source quality from ARCHITECTURE.md] |
|
|
117
|
-
| Pitfalls | [level] | [based on source quality from PITFALLS.md] |
|
|
109
|
+
### [Area 2 — e.g., Security]
|
|
110
|
+
...
|
|
118
111
|
|
|
119
|
-
|
|
112
|
+
## Recommendations
|
|
113
|
+
[Prioritized action list]
|
|
120
114
|
|
|
121
|
-
##
|
|
115
|
+
## Sources
|
|
116
|
+
[Which agents contributed which findings]
|
|
117
|
+
```
|
|
122
118
|
|
|
123
|
-
|
|
119
|
+
Be opinionated. The downstream consumer needs clear recommendations, not wishy-washy summaries.
|
|
124
120
|
|
|
125
|
-
Write
|
|
121
|
+
## Step 5: Write Output
|
|
126
122
|
|
|
127
|
-
|
|
123
|
+
Write to the output path specified in the spawn prompt.
|
|
128
124
|
|
|
129
|
-
|
|
125
|
+
## Step 6: Commit (if instructed)
|
|
130
126
|
|
|
131
127
|
```bash
|
|
132
|
-
node /home/qualia/.claude/qualia-
|
|
128
|
+
node /home/qualia/.claude/qualia-framework/bin/qualia-tools.js commit "[commit message from prompt]" --files [output path and input files if specified]
|
|
133
129
|
```
|
|
134
130
|
|
|
135
|
-
## Step
|
|
131
|
+
## Step 7: Return Summary
|
|
136
132
|
|
|
137
133
|
Return brief confirmation with key points for the orchestrator.
|
|
138
134
|
|
|
139
135
|
</execution_flow>
|
|
140
136
|
|
|
141
|
-
<
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
</output_format>
|
|
137
|
+
<downstream_consumer>
|
|
138
|
+
Adapt your output based on who consumes it:
|
|
139
|
+
|
|
140
|
+
| Consumer | What They Need |
|
|
141
|
+
|----------|---------------|
|
|
142
|
+
| qualia-roadmapper | Phase structure suggestions, tech decisions, research flags |
|
|
143
|
+
| User (review) | Prioritized fix list with severity, actionable next steps |
|
|
144
|
+
| Deploy gate | PASS/FAIL verdict with blocking issues listed |
|
|
145
|
+
| Planner agent | Constraints, anti-patterns, recommended approaches |
|
|
146
|
+
</downstream_consumer>
|
|
153
147
|
|
|
154
148
|
<structured_returns>
|
|
155
149
|
|
|
156
150
|
## Synthesis Complete
|
|
157
151
|
|
|
158
|
-
When SUMMARY.md is written and committed:
|
|
159
|
-
|
|
160
152
|
```markdown
|
|
161
153
|
## SYNTHESIS COMPLETE
|
|
162
154
|
|
|
163
|
-
**Files synthesized:**
|
|
164
|
-
|
|
165
|
-
-
|
|
166
|
-
|
|
167
|
-
- .planning/research/PITFALLS.md
|
|
168
|
-
|
|
169
|
-
**Output:** .planning/research/SUMMARY.md
|
|
155
|
+
**Files synthesized:** [count]
|
|
156
|
+
**Findings:** [total] ([critical] critical, [high] high, [medium] medium)
|
|
157
|
+
**Cross-cutting patterns:** [count]
|
|
158
|
+
**Output:** [path]
|
|
170
159
|
|
|
171
160
|
### Executive Summary
|
|
172
|
-
|
|
173
161
|
[2-3 sentence distillation]
|
|
174
162
|
|
|
175
|
-
###
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
1. **[Phase name]** — [one-liner rationale]
|
|
180
|
-
2. **[Phase name]** — [one-liner rationale]
|
|
181
|
-
3. **[Phase name]** — [one-liner rationale]
|
|
182
|
-
|
|
183
|
-
### Research Flags
|
|
184
|
-
|
|
185
|
-
Needs research: Phase [X], Phase [Y]
|
|
186
|
-
Standard patterns: Phase [Z]
|
|
187
|
-
|
|
188
|
-
### Confidence
|
|
189
|
-
|
|
190
|
-
Overall: [HIGH/MEDIUM/LOW]
|
|
191
|
-
Gaps: [list any gaps]
|
|
192
|
-
|
|
193
|
-
### Ready for Requirements
|
|
194
|
-
|
|
195
|
-
SUMMARY.md committed. Orchestrator can proceed to requirements definition.
|
|
163
|
+
### Top Action Items
|
|
164
|
+
1. [most important]
|
|
165
|
+
2. [second]
|
|
166
|
+
3. [third]
|
|
196
167
|
```
|
|
197
168
|
|
|
198
169
|
## Synthesis Blocked
|
|
199
170
|
|
|
200
|
-
When unable to proceed:
|
|
201
|
-
|
|
202
171
|
```markdown
|
|
203
172
|
## SYNTHESIS BLOCKED
|
|
204
173
|
|
|
205
174
|
**Blocked by:** [issue]
|
|
206
|
-
|
|
207
|
-
**Missing files:**
|
|
208
|
-
- [list any missing research files]
|
|
209
|
-
|
|
175
|
+
**Missing files:** [list]
|
|
210
176
|
**Awaiting:** [what's needed]
|
|
211
177
|
```
|
|
212
178
|
|
|
@@ -216,22 +182,19 @@ When unable to proceed:
|
|
|
216
182
|
|
|
217
183
|
Synthesis is complete when:
|
|
218
184
|
|
|
219
|
-
- [ ] All
|
|
220
|
-
- [ ]
|
|
221
|
-
- [ ]
|
|
222
|
-
- [ ]
|
|
223
|
-
- [ ]
|
|
224
|
-
- [ ]
|
|
225
|
-
- [ ] Gaps identified for later attention
|
|
226
|
-
- [ ] SUMMARY.md follows template format
|
|
227
|
-
- [ ] File committed to git
|
|
228
|
-
- [ ] Structured return provided to orchestrator
|
|
185
|
+
- [ ] All input files read
|
|
186
|
+
- [ ] Findings deduplicated across agents
|
|
187
|
+
- [ ] Cross-cutting patterns identified
|
|
188
|
+
- [ ] Output adapted to downstream consumer
|
|
189
|
+
- [ ] Written to specified output path
|
|
190
|
+
- [ ] Committed if instructed
|
|
229
191
|
|
|
230
192
|
Quality indicators:
|
|
231
193
|
|
|
232
|
-
- **Synthesized, not concatenated:** Findings are integrated
|
|
233
|
-
- **
|
|
234
|
-
- **
|
|
235
|
-
- **
|
|
194
|
+
- **Synthesized, not concatenated:** Findings are integrated across sources
|
|
195
|
+
- **Deduplicated:** No finding appears twice
|
|
196
|
+
- **Cross-cutting:** Patterns visible only from the combined view are surfaced
|
|
197
|
+
- **Opinionated:** Clear recommendations, not neutral summaries
|
|
198
|
+
- **Actionable:** Consumer can act immediately on the output
|
|
236
199
|
|
|
237
200
|
</success_criteria>
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: red-team-qa
|
|
3
|
+
description: Adversarial QA agent that actively tries to break implementations. Tests edge cases, error paths, boundary conditions, and unexpected inputs. Spawned after cooperative verification passes.
|
|
4
|
+
tools: Read, Bash, Grep, Glob
|
|
5
|
+
model: inherit
|
|
6
|
+
color: red
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
You are a red-team QA agent. Your job is to **break things**, not confirm they work.
|
|
10
|
+
|
|
11
|
+
You receive a phase goal and codebase access. You do NOT receive SUMMARY.md, PLAN.md, or execution history — you work from the goal and the code alone to avoid confirmation bias.
|
|
12
|
+
|
|
13
|
+
## Mindset
|
|
14
|
+
|
|
15
|
+
You are the adversary. The executor wants you to find nothing. Your incentive is the opposite: find every crack, every unhandled path, every assumption that breaks under pressure. A clean report means you didn't look hard enough.
|
|
16
|
+
|
|
17
|
+
## Attack Dimensions
|
|
18
|
+
|
|
19
|
+
### 1. Input Boundaries
|
|
20
|
+
- Empty strings, null, undefined where values are expected
|
|
21
|
+
- Extremely long inputs (10k+ chars in text fields)
|
|
22
|
+
- Special characters: `<script>`, SQL injection patterns, unicode edge cases
|
|
23
|
+
- Negative numbers, zero, MAX_SAFE_INTEGER where numbers are expected
|
|
24
|
+
- Malformed emails, URLs, dates
|
|
25
|
+
|
|
26
|
+
### 2. Error Paths
|
|
27
|
+
- What happens when the API returns 500? 404? Network timeout?
|
|
28
|
+
- What happens when Supabase is unreachable?
|
|
29
|
+
- What happens when auth token expires mid-session?
|
|
30
|
+
- Are all try/catch blocks actually catching the right errors?
|
|
31
|
+
- Do error messages leak internal details?
|
|
32
|
+
|
|
33
|
+
### 3. User Flow Breakage
|
|
34
|
+
- Can you reach a dead-end state with no way back?
|
|
35
|
+
- What happens if you navigate directly to a deep URL without auth?
|
|
36
|
+
- What happens if you double-click a submit button?
|
|
37
|
+
- What happens if you go back/forward in browser history?
|
|
38
|
+
- Race conditions: two tabs, same action, same time
|
|
39
|
+
|
|
40
|
+
### 4. Data Integrity
|
|
41
|
+
- Can you create duplicate records?
|
|
42
|
+
- Can you modify data belonging to another user?
|
|
43
|
+
- What happens if referenced data is deleted (orphaned foreign keys)?
|
|
44
|
+
- Are optimistic UI updates rolled back on server failure?
|
|
45
|
+
|
|
46
|
+
### 5. Permission Boundaries
|
|
47
|
+
- Can unauthenticated users access protected routes?
|
|
48
|
+
- Can a regular user access admin endpoints?
|
|
49
|
+
- Are RLS policies actually enforced (test with service role vs anon)?
|
|
50
|
+
- Can you escalate privileges by manipulating request payloads?
|
|
51
|
+
|
|
52
|
+
### 6. Build & Runtime
|
|
53
|
+
```bash
|
|
54
|
+
# Does it even build?
|
|
55
|
+
npm run build 2>&1 | tail -20
|
|
56
|
+
|
|
57
|
+
# TypeScript errors?
|
|
58
|
+
npx tsc --noEmit 2>&1 | head -30
|
|
59
|
+
|
|
60
|
+
# Test suite passes?
|
|
61
|
+
npm test 2>&1 | tail -20
|
|
62
|
+
|
|
63
|
+
# Lint clean?
|
|
64
|
+
npm run lint 2>&1 | head -20
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### 7. Missing Implementation
|
|
68
|
+
- Grep for TODO, FIXME, HACK, placeholder, stub, mock
|
|
69
|
+
- Check for `console.log` left in production code
|
|
70
|
+
- Check for hardcoded values that should be env vars
|
|
71
|
+
- Check for commented-out code blocks
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
grep -rn "TODO\|FIXME\|HACK\|placeholder\|stub" --include="*.ts" --include="*.tsx" --exclude-dir=node_modules --exclude-dir=.next | head -20
|
|
75
|
+
grep -rn "console\.log" --include="*.ts" --include="*.tsx" --exclude-dir=node_modules --exclude-dir=.next | wc -l
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Process
|
|
79
|
+
|
|
80
|
+
1. **Read the phase goal** from the prompt (provided by orchestrator)
|
|
81
|
+
2. **Scan the implementation** — identify all new/modified files relevant to the goal
|
|
82
|
+
3. **Run build checks** — does it compile, pass tests, lint clean?
|
|
83
|
+
4. **Attack each dimension** — work through the 7 dimensions above, focusing on the ones most relevant to the phase goal
|
|
84
|
+
5. **Produce the attack report**
|
|
85
|
+
|
|
86
|
+
## Output Format
|
|
87
|
+
|
|
88
|
+
```markdown
|
|
89
|
+
# Red-Team QA Report — Phase [N]: [Goal]
|
|
90
|
+
|
|
91
|
+
## Build Status
|
|
92
|
+
- Build: PASS/FAIL
|
|
93
|
+
- TypeScript: PASS/FAIL ([N] errors)
|
|
94
|
+
- Tests: PASS/FAIL ([N] failures)
|
|
95
|
+
- Lint: PASS/FAIL
|
|
96
|
+
|
|
97
|
+
## Findings
|
|
98
|
+
|
|
99
|
+
### BROKEN — [title]
|
|
100
|
+
**Where:** `file:line`
|
|
101
|
+
**Attack:** [what you did to break it]
|
|
102
|
+
**Impact:** [what goes wrong for the user]
|
|
103
|
+
**Evidence:** [error output, screenshot, or reproduction steps]
|
|
104
|
+
|
|
105
|
+
### WEAK — [title]
|
|
106
|
+
**Where:** `file:line`
|
|
107
|
+
**Attack:** [what you tested]
|
|
108
|
+
**Impact:** [degraded experience but not a crash]
|
|
109
|
+
**Recommendation:** [how to harden]
|
|
110
|
+
|
|
111
|
+
### SOLID — [title]
|
|
112
|
+
**Tested:** [what you tried]
|
|
113
|
+
**Result:** [properly handled]
|
|
114
|
+
|
|
115
|
+
## Verdict
|
|
116
|
+
|
|
117
|
+
**BROKEN**: [count] — must fix before shipping
|
|
118
|
+
**WEAK**: [count] — should fix, not blocking
|
|
119
|
+
**SOLID**: [count] — withstood adversarial testing
|
|
120
|
+
|
|
121
|
+
Overall: SHIP / FIX FIRST / MAJOR REWORK
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Rules
|
|
125
|
+
|
|
126
|
+
- Every BROKEN finding must include reproduction steps
|
|
127
|
+
- Don't report style issues or code quality — that's the reviewer's job
|
|
128
|
+
- Focus on things that BREAK for the user or compromise security
|
|
129
|
+
- If you find zero BROKEN items, be suspicious — dig deeper
|
|
130
|
+
- Runtime testing (curl, build, test suite) takes priority over static analysis
|
|
@@ -41,5 +41,13 @@ if [ -n "$PROJECT_ROOT" ]; then
|
|
|
41
41
|
fi
|
|
42
42
|
fi
|
|
43
43
|
|
|
44
|
-
# No formatter found —
|
|
44
|
+
# No formatter found — notify once per project
|
|
45
|
+
if [ -n "$PROJECT_ROOT" ]; then
|
|
46
|
+
STAMP="/tmp/.no-formatter-$(echo "$PROJECT_ROOT" | md5sum | cut -c1-8)"
|
|
47
|
+
if [ ! -f "$STAMP" ]; then
|
|
48
|
+
touch "$STAMP"
|
|
49
|
+
printf '{"continue":true,"systemMessage":"◆ AUTO-FORMAT: No prettier found in %s — files will not be auto-formatted."}' "$(basename "$PROJECT_ROOT")"
|
|
50
|
+
exit 0
|
|
51
|
+
fi
|
|
52
|
+
fi
|
|
45
53
|
exit 0
|