specflow-cc 1.12.0 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +74 -0
- package/README.md +22 -8
- package/agents/impl-reviewer.md +8 -0
- package/agents/sf-spec-executor-orchestrator.md +8 -0
- package/agents/spec-auditor.md +8 -0
- package/agents/spec-creator.md +13 -2
- package/agents/spec-executor-orchestrator.md +21 -0
- package/agents/spec-executor.md +8 -0
- package/agents/spec-reviser.md +8 -0
- package/agents/spec-splitter.md +11 -1
- package/bin/install.js +20 -0
- package/bin/lib/config.cjs +91 -0
- package/bin/lib/core.cjs +120 -0
- package/bin/lib/spec.cjs +130 -0
- package/bin/lib/state.cjs +241 -0
- package/bin/lib/verify.cjs +117 -0
- package/bin/sf-tools.cjs +103 -0
- package/commands/sf/audit.md +11 -69
- package/commands/sf/autopilot.md +601 -0
- package/commands/sf/done.md +32 -71
- package/commands/sf/health.md +220 -0
- package/commands/sf/help.md +16 -0
- package/commands/sf/review.md +11 -69
- package/commands/sf/revise.md +4 -7
- package/commands/sf/run.md +11 -69
- package/commands/sf/split.md +14 -3
- package/commands/sf/validate.md +154 -0
- package/hooks/context-monitor.js +121 -0
- package/hooks/statusline.js +17 -0
- package/package.json +1 -1
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: sf:validate
|
|
3
|
+
description: Validate implementation against spec's validation checklist
|
|
4
|
+
argument-hint: [SPEC-XXX]
|
|
5
|
+
allowed-tools:
|
|
6
|
+
- Read
|
|
7
|
+
- Bash
|
|
8
|
+
- Glob
|
|
9
|
+
- Grep
|
|
10
|
+
- Write
|
|
11
|
+
- AskUserQuestion
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
<purpose>
|
|
15
|
+
Run the validation checklist from a specification after implementation. Executes automated checks (test commands, grep verifications) and prompts for manual checks. Reports pass/fail per item and overall validation status.
|
|
16
|
+
</purpose>
|
|
17
|
+
|
|
18
|
+
<context>
|
|
19
|
+
@.specflow/STATE.md
|
|
20
|
+
</context>
|
|
21
|
+
|
|
22
|
+
<workflow>
|
|
23
|
+
|
|
24
|
+
## Step 1: Verify Initialization
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
[ -d .specflow ] && echo "OK" || echo "NOT_INITIALIZED"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
**If NOT_INITIALIZED:**
|
|
31
|
+
```
|
|
32
|
+
SpecFlow not initialized. Run `/sf:init` first.
|
|
33
|
+
```
|
|
34
|
+
Exit.
|
|
35
|
+
|
|
36
|
+
## Step 2: Resolve Specification
|
|
37
|
+
|
|
38
|
+
**If SPEC-XXX argument provided:** Use that spec.
|
|
39
|
+
**If no argument:** Use active spec from STATE.md.
|
|
40
|
+
|
|
41
|
+
Read the spec file from `.specflow/specs/SPEC-XXX.md`.
|
|
42
|
+
|
|
43
|
+
**If spec not found:** Check `.specflow/archive/SPEC-XXX.md`.
|
|
44
|
+
|
|
45
|
+
**If still not found:**
|
|
46
|
+
```
|
|
47
|
+
Specification {ID} not found.
|
|
48
|
+
```
|
|
49
|
+
Exit.
|
|
50
|
+
|
|
51
|
+
## Step 3: Extract Validation Checklist
|
|
52
|
+
|
|
53
|
+
Look for `## Validation Checklist` section in the spec.
|
|
54
|
+
|
|
55
|
+
**If section not found:**
|
|
56
|
+
```
|
|
57
|
+
No validation checklist found in {ID}.
|
|
58
|
+
|
|
59
|
+
The spec was created without a validation checklist.
|
|
60
|
+
You can:
|
|
61
|
+
1. Add one manually to the spec
|
|
62
|
+
2. Use `/sf:verify` for interactive human verification
|
|
63
|
+
3. Use `/sf:review` for AI-powered code review
|
|
64
|
+
```
|
|
65
|
+
Exit.
|
|
66
|
+
|
|
67
|
+
Parse checklist items. Each item should have:
|
|
68
|
+
- **Action:** What to do (command to run, endpoint to hit, UI to check)
|
|
69
|
+
- **Expected:** What should happen
|
|
70
|
+
|
|
71
|
+
## Step 4: Display Plan
|
|
72
|
+
|
|
73
|
+
**IMPORTANT:** Output the following directly as formatted text, NOT wrapped in a markdown code block:
|
|
74
|
+
|
|
75
|
+
```
|
|
76
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
77
|
+
VALIDATION: {SPEC-XXX}
|
|
78
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
79
|
+
|
|
80
|
+
**Spec:** {title}
|
|
81
|
+
**Checklist items:** {count}
|
|
82
|
+
|
|
83
|
+
Running validation...
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Step 5: Execute Checks
|
|
87
|
+
|
|
88
|
+
For each checklist item:
|
|
89
|
+
|
|
90
|
+
### 5.1 Automated Checks
|
|
91
|
+
|
|
92
|
+
If the item contains a runnable command (backtick-wrapped command, `npm test`, `curl`, etc.):
|
|
93
|
+
1. Execute the command
|
|
94
|
+
2. Check exit code and output against expected result
|
|
95
|
+
3. Record: PASS or FAIL with output
|
|
96
|
+
|
|
97
|
+
### 5.2 Code Verification Checks
|
|
98
|
+
|
|
99
|
+
If the item references code behavior (file exists, function exists, pattern present):
|
|
100
|
+
1. Use Glob/Grep/Read to verify
|
|
101
|
+
2. Record: PASS or FAIL
|
|
102
|
+
|
|
103
|
+
### 5.3 Manual Checks
|
|
104
|
+
|
|
105
|
+
If the item requires manual/visual verification (UI behavior, browser check):
|
|
106
|
+
1. Ask user via AskUserQuestion: "Did this pass? {item description}"
|
|
107
|
+
2. Record user's response
|
|
108
|
+
|
|
109
|
+
### 5.4 Track Results
|
|
110
|
+
|
|
111
|
+
For each item, record:
|
|
112
|
+
- Item description
|
|
113
|
+
- Type: automated / code / manual
|
|
114
|
+
- Result: PASS / FAIL / SKIP
|
|
115
|
+
- Details: output or user note
|
|
116
|
+
|
|
117
|
+
## Step 6: Display Results
|
|
118
|
+
|
|
119
|
+
**IMPORTANT:** Output the following directly as formatted text, NOT wrapped in a markdown code block:
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
123
|
+
VALIDATION RESULTS: {SPEC-XXX}
|
|
124
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
125
|
+
|
|
126
|
+
| # | Check | Type | Result |
|
|
127
|
+
|---|-------|------|--------|
|
|
128
|
+
| 1 | {description} | automated | PASS |
|
|
129
|
+
| 2 | {description} | code | PASS |
|
|
130
|
+
| 3 | {description} | manual | FAIL |
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
**Result:** {passed}/{total} checks passed
|
|
135
|
+
|
|
136
|
+
{If all passed:}
|
|
137
|
+
Validation PASSED. Ready for `/sf:done`.
|
|
138
|
+
|
|
139
|
+
{If any failed:}
|
|
140
|
+
Validation FAILED. Fix issues and re-run `/sf:validate`.
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
</workflow>
|
|
144
|
+
|
|
145
|
+
<success_criteria>
|
|
146
|
+
- [ ] Spec resolved (argument or active)
|
|
147
|
+
- [ ] Validation checklist extracted from spec
|
|
148
|
+
- [ ] Missing checklist handled gracefully
|
|
149
|
+
- [ ] Automated commands executed and verified
|
|
150
|
+
- [ ] Code checks performed via tools
|
|
151
|
+
- [ ] Manual checks prompted to user
|
|
152
|
+
- [ ] Clear pass/fail report displayed
|
|
153
|
+
- [ ] Guidance on next step provided
|
|
154
|
+
</success_criteria>
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Context Monitor - PostToolUse hook
|
|
3
|
+
// Reads context metrics from the statusline bridge file and injects
|
|
4
|
+
// warnings when context usage is high. Makes the AGENT aware of
|
|
5
|
+
// context limits (the statusline only shows the user).
|
|
6
|
+
//
|
|
7
|
+
// How it works:
|
|
8
|
+
// 1. The statusline hook writes metrics to /tmp/claude-ctx-{session_id}.json
|
|
9
|
+
// 2. This hook reads those metrics after each tool use
|
|
10
|
+
// 3. When remaining context drops below thresholds, it injects a warning
|
|
11
|
+
// as additionalContext, which the agent sees in its conversation
|
|
12
|
+
//
|
|
13
|
+
// Thresholds:
|
|
14
|
+
// WARNING (remaining <= 35%): Agent should wrap up current task
|
|
15
|
+
// CRITICAL (remaining <= 25%): Agent should stop immediately and save state
|
|
16
|
+
//
|
|
17
|
+
// Debounce: 5 tool uses between warnings to avoid spam
|
|
18
|
+
// Severity escalation bypasses debounce (WARNING -> CRITICAL fires immediately)
|
|
19
|
+
|
|
20
|
+
const fs = require('fs');
|
|
21
|
+
const os = require('os');
|
|
22
|
+
const path = require('path');
|
|
23
|
+
|
|
24
|
+
const WARNING_THRESHOLD = 35;
|
|
25
|
+
const CRITICAL_THRESHOLD = 25;
|
|
26
|
+
const STALE_SECONDS = 60;
|
|
27
|
+
const DEBOUNCE_CALLS = 5;
|
|
28
|
+
|
|
29
|
+
let input = '';
|
|
30
|
+
const stdinTimeout = setTimeout(() => process.exit(0), 3000);
|
|
31
|
+
process.stdin.setEncoding('utf8');
|
|
32
|
+
process.stdin.on('data', chunk => input += chunk);
|
|
33
|
+
process.stdin.on('end', () => {
|
|
34
|
+
clearTimeout(stdinTimeout);
|
|
35
|
+
try {
|
|
36
|
+
const data = JSON.parse(input);
|
|
37
|
+
const sessionId = data.session_id;
|
|
38
|
+
|
|
39
|
+
if (!sessionId) {
|
|
40
|
+
process.exit(0);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const tmpDir = os.tmpdir();
|
|
44
|
+
const metricsPath = path.join(tmpDir, `claude-ctx-${sessionId}.json`);
|
|
45
|
+
|
|
46
|
+
if (!fs.existsSync(metricsPath)) {
|
|
47
|
+
process.exit(0);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const metrics = JSON.parse(fs.readFileSync(metricsPath, 'utf8'));
|
|
51
|
+
const now = Math.floor(Date.now() / 1000);
|
|
52
|
+
|
|
53
|
+
if (metrics.timestamp && (now - metrics.timestamp) > STALE_SECONDS) {
|
|
54
|
+
process.exit(0);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const remaining = metrics.remaining_percentage;
|
|
58
|
+
const usedPct = metrics.used_pct;
|
|
59
|
+
|
|
60
|
+
if (remaining > WARNING_THRESHOLD) {
|
|
61
|
+
process.exit(0);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Debounce logic
|
|
65
|
+
const warnPath = path.join(tmpDir, `claude-ctx-${sessionId}-warned.json`);
|
|
66
|
+
let warnData = { callsSinceWarn: 0, lastLevel: null };
|
|
67
|
+
let firstWarn = true;
|
|
68
|
+
|
|
69
|
+
if (fs.existsSync(warnPath)) {
|
|
70
|
+
try {
|
|
71
|
+
warnData = JSON.parse(fs.readFileSync(warnPath, 'utf8'));
|
|
72
|
+
firstWarn = false;
|
|
73
|
+
} catch (e) {}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
warnData.callsSinceWarn = (warnData.callsSinceWarn || 0) + 1;
|
|
77
|
+
|
|
78
|
+
const isCritical = remaining <= CRITICAL_THRESHOLD;
|
|
79
|
+
const currentLevel = isCritical ? 'critical' : 'warning';
|
|
80
|
+
|
|
81
|
+
const severityEscalated = currentLevel === 'critical' && warnData.lastLevel === 'warning';
|
|
82
|
+
if (!firstWarn && warnData.callsSinceWarn < DEBOUNCE_CALLS && !severityEscalated) {
|
|
83
|
+
fs.writeFileSync(warnPath, JSON.stringify(warnData));
|
|
84
|
+
process.exit(0);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
warnData.callsSinceWarn = 0;
|
|
88
|
+
warnData.lastLevel = currentLevel;
|
|
89
|
+
fs.writeFileSync(warnPath, JSON.stringify(warnData));
|
|
90
|
+
|
|
91
|
+
// Detect if SpecFlow is active
|
|
92
|
+
const cwd = data.cwd || process.cwd();
|
|
93
|
+
const isSfActive = fs.existsSync(path.join(cwd, '.specflow', 'STATE.md'));
|
|
94
|
+
|
|
95
|
+
let message;
|
|
96
|
+
if (isCritical) {
|
|
97
|
+
message = isSfActive
|
|
98
|
+
? `CONTEXT CRITICAL: Usage at ${usedPct}%. Remaining: ${remaining}%. ` +
|
|
99
|
+
'Context is nearly exhausted. Do NOT start new complex work. ' +
|
|
100
|
+
'Inform the user so they can run /sf:pause at the next natural stopping point.'
|
|
101
|
+
: `CONTEXT CRITICAL: Usage at ${usedPct}%. Remaining: ${remaining}%. ` +
|
|
102
|
+
'Context is nearly exhausted. Inform the user that context is low and ask how they want to proceed.';
|
|
103
|
+
} else {
|
|
104
|
+
message = isSfActive
|
|
105
|
+
? `CONTEXT WARNING: Usage at ${usedPct}%. Remaining: ${remaining}%. ` +
|
|
106
|
+
'Context is getting limited. Avoid starting new complex work. ' +
|
|
107
|
+
'Inform the user so they can prepare to pause.'
|
|
108
|
+
: `CONTEXT WARNING: Usage at ${usedPct}%. Remaining: ${remaining}%. ` +
|
|
109
|
+
'Be aware that context is getting limited. Avoid unnecessary exploration or starting new complex work.';
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
process.stdout.write(JSON.stringify({
|
|
113
|
+
hookSpecificOutput: {
|
|
114
|
+
hookEventName: "PostToolUse",
|
|
115
|
+
additionalContext: message
|
|
116
|
+
}
|
|
117
|
+
}));
|
|
118
|
+
} catch (e) {
|
|
119
|
+
process.exit(0);
|
|
120
|
+
}
|
|
121
|
+
});
|
package/hooks/statusline.js
CHANGED
|
@@ -4,16 +4,20 @@
|
|
|
4
4
|
|
|
5
5
|
const fs = require('fs');
|
|
6
6
|
const path = require('path');
|
|
7
|
+
const os = require('os');
|
|
7
8
|
|
|
8
9
|
// Read JSON from stdin (Claude Code protocol)
|
|
9
10
|
let input = '';
|
|
11
|
+
const stdinTimeout = setTimeout(() => process.exit(0), 3000);
|
|
10
12
|
process.stdin.setEncoding('utf8');
|
|
11
13
|
process.stdin.on('data', chunk => input += chunk);
|
|
12
14
|
process.stdin.on('end', () => {
|
|
15
|
+
clearTimeout(stdinTimeout);
|
|
13
16
|
try {
|
|
14
17
|
const data = JSON.parse(input);
|
|
15
18
|
const model = data.model?.display_name || 'Claude';
|
|
16
19
|
const dir = data.workspace?.current_dir || process.cwd();
|
|
20
|
+
const session = data.session_id || '';
|
|
17
21
|
const remaining = data.context_window?.remaining_percentage;
|
|
18
22
|
|
|
19
23
|
// Context window display (shows USED percentage scaled to 80% limit)
|
|
@@ -25,6 +29,19 @@ process.stdin.on('end', () => {
|
|
|
25
29
|
// Scale: 80% real usage = 100% displayed
|
|
26
30
|
const used = Math.min(100, Math.round((rawUsed / 80) * 100));
|
|
27
31
|
|
|
32
|
+
// Write context metrics to bridge file for the context-monitor hook
|
|
33
|
+
if (session) {
|
|
34
|
+
try {
|
|
35
|
+
const bridgePath = path.join(os.tmpdir(), `claude-ctx-${session}.json`);
|
|
36
|
+
fs.writeFileSync(bridgePath, JSON.stringify({
|
|
37
|
+
session_id: session,
|
|
38
|
+
remaining_percentage: remaining,
|
|
39
|
+
used_pct: used,
|
|
40
|
+
timestamp: Math.floor(Date.now() / 1000)
|
|
41
|
+
}));
|
|
42
|
+
} catch (e) {}
|
|
43
|
+
}
|
|
44
|
+
|
|
28
45
|
// Build progress bar (10 segments)
|
|
29
46
|
const filled = Math.floor(used / 10);
|
|
30
47
|
const bar = '█'.repeat(filled) + '░'.repeat(10 - filled);
|