specweave 1.0.577 → 1.0.579
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -0
- package/bin/specweave.js +1 -0
- package/dist/src/cli/commands/auto.d.ts +28 -0
- package/dist/src/cli/commands/auto.d.ts.map +1 -1
- package/dist/src/cli/commands/auto.js +73 -0
- package/dist/src/cli/commands/auto.js.map +1 -1
- package/dist/src/cli/commands/create-increment.d.ts +8 -0
- package/dist/src/cli/commands/create-increment.d.ts.map +1 -1
- package/dist/src/cli/commands/create-increment.js +2 -1
- package/dist/src/cli/commands/create-increment.js.map +1 -1
- package/dist/src/cli/dispatcher.d.ts +29 -0
- package/dist/src/cli/dispatcher.d.ts.map +1 -0
- package/dist/src/cli/dispatcher.js +53 -0
- package/dist/src/cli/dispatcher.js.map +1 -0
- package/dist/src/core/cache/static-context-loader.d.ts +39 -0
- package/dist/src/core/cache/static-context-loader.d.ts.map +1 -0
- package/dist/src/core/cache/static-context-loader.js +49 -0
- package/dist/src/core/cache/static-context-loader.js.map +1 -0
- package/dist/src/core/config/types.d.ts +61 -0
- package/dist/src/core/config/types.d.ts.map +1 -1
- package/dist/src/core/config/types.js +16 -0
- package/dist/src/core/config/types.js.map +1 -1
- package/dist/src/core/doctor/checkers/configuration-checker.d.ts +1 -0
- package/dist/src/core/doctor/checkers/configuration-checker.d.ts.map +1 -1
- package/dist/src/core/doctor/checkers/configuration-checker.js +50 -0
- package/dist/src/core/doctor/checkers/configuration-checker.js.map +1 -1
- package/dist/src/core/hooks/pretooluse-guard.d.ts +37 -0
- package/dist/src/core/hooks/pretooluse-guard.d.ts.map +1 -0
- package/dist/src/core/hooks/pretooluse-guard.js +64 -0
- package/dist/src/core/hooks/pretooluse-guard.js.map +1 -0
- package/dist/src/core/increment/template-creator.d.ts +7 -0
- package/dist/src/core/increment/template-creator.d.ts.map +1 -1
- package/dist/src/core/increment/template-creator.js +2 -1
- package/dist/src/core/increment/template-creator.js.map +1 -1
- package/dist/src/core/skills/skill-judge.d.ts +56 -0
- package/dist/src/core/skills/skill-judge.d.ts.map +1 -1
- package/dist/src/core/skills/skill-judge.js +95 -8
- package/dist/src/core/skills/skill-judge.js.map +1 -1
- package/dist/src/core/team-lead/template-loader.d.ts +19 -0
- package/dist/src/core/team-lead/template-loader.d.ts.map +1 -0
- package/dist/src/core/team-lead/template-loader.js +33 -0
- package/dist/src/core/team-lead/template-loader.js.map +1 -0
- package/dist/src/core/telemetry/cache-metrics.d.ts +38 -0
- package/dist/src/core/telemetry/cache-metrics.d.ts.map +1 -0
- package/dist/src/core/telemetry/cache-metrics.js +65 -0
- package/dist/src/core/telemetry/cache-metrics.js.map +1 -0
- package/dist/src/core/validators/resource-base.d.ts +29 -0
- package/dist/src/core/validators/resource-base.d.ts.map +1 -0
- package/dist/src/core/validators/resource-base.js +52 -0
- package/dist/src/core/validators/resource-base.js.map +1 -0
- package/package.json +1 -1
- package/plugins/specweave/.lint/skill-lint.test.ts +91 -0
- package/plugins/specweave/.lint/skill-lint.ts +51 -0
- package/plugins/specweave/marketplace.json +114 -0
- package/plugins/specweave/skills/ado-multi-project/SKILL.md +9 -1
- package/plugins/specweave/skills/ado-sync/SKILL.md +16 -1
- package/plugins/specweave/skills/analytics/SKILL.md +45 -0
- package/plugins/specweave/skills/architect/SKILL.md +5 -19
- package/plugins/specweave/skills/auto/SKILL.md +41 -10
- package/plugins/specweave/skills/brainstorm/SKILL.md +15 -15
- package/plugins/specweave/skills/close-all/SKILL.md +21 -51
- package/plugins/specweave/skills/code-reviewer/SKILL.md +114 -57
- package/plugins/specweave/skills/do/SKILL.md +14 -5
- package/plugins/specweave/skills/done/SKILL.md +36 -1
- package/plugins/specweave/skills/github-issue-standard/SKILL.md +8 -224
- package/plugins/specweave/skills/github-multi-project/SKILL.md +9 -1
- package/plugins/specweave/skills/github-sync/SKILL.md +16 -1
- package/plugins/specweave/skills/grill/SKILL.md +56 -9
- package/plugins/specweave/skills/help/SKILL.md +32 -4
- package/plugins/specweave/skills/increment/SKILL.md +41 -8
- package/plugins/specweave/skills/jira-sync/SKILL.md +16 -1
- package/plugins/specweave/skills/judge-llm/SKILL.md +46 -14
- package/plugins/specweave/skills/multi-project/SKILL.md +370 -0
- package/plugins/specweave/skills/plan/SKILL.md +16 -1
- package/plugins/specweave/skills/pm/SKILL.md +12 -47
- package/plugins/specweave/skills/tdd-cycle/SKILL.md +38 -0
- package/plugins/specweave/skills/tdd-green/SKILL.md +16 -1
- package/plugins/specweave/skills/tdd-red/SKILL.md +16 -1
- package/plugins/specweave/skills/tdd-refactor/SKILL.md +16 -1
- package/plugins/specweave/skills/team-build/SKILL.md +16 -462
- package/plugins/specweave/skills/team-lead/SKILL.md +59 -44
- package/plugins/specweave/skills/team-lead/agents/_protocol.md +88 -0
- package/plugins/specweave/skills/team-lead/agents/architect.md +27 -36
- package/plugins/specweave/skills/team-lead/agents/backend.md +26 -56
- package/plugins/specweave/skills/team-lead/agents/database.md +23 -51
- package/plugins/specweave/skills/team-lead/agents/frontend.md +27 -57
- package/plugins/specweave/skills/team-lead/agents/pm.md +7 -11
- package/plugins/specweave/skills/team-lead/agents/researcher.md +29 -55
- package/plugins/specweave/skills/team-lead/agents/reviewer-security.md +43 -59
- package/plugins/specweave/skills/team-lead/agents/security.md +22 -48
- package/plugins/specweave/skills/team-lead/agents/testing.md +26 -53
- package/plugins/specweave/skills/team-merge/SKILL.md +13 -10
- package/plugins/specweave/skills/team-lead/agents/reviewer-logic.md +0 -82
- package/plugins/specweave/skills/team-lead/agents/reviewer-performance.md +0 -83
|
@@ -8,6 +8,48 @@ model: opus
|
|
|
8
8
|
|
|
9
9
|
# Code Grill Expert
|
|
10
10
|
|
|
11
|
+
## Tool-Use Rationale
|
|
12
|
+
|
|
13
|
+
- **Read**: Load the increment's `spec.md`, `rubric.md`, `tasks.md`, and the implementation files being interrogated so findings have real evidence.
|
|
14
|
+
- **Grep**: Search for AC IDs, try/catch sites, TODO markers, and patterns cited during grilling.
|
|
15
|
+
- **Glob**: Enumerate implementation and test files within the increment's scope to spot untested paths.
|
|
16
|
+
- **Bash**: Run `npx vitest run` (and related commands) to confirm whether a suspected bug actually trips a test.
|
|
17
|
+
|
|
18
|
+
## Model Configuration
|
|
19
|
+
|
|
20
|
+
**Default effort**: `xhigh` — recommended for all review tasks per Opus 4.7 conventions.
|
|
21
|
+
**Opt-in max**: `--effort max` enables maximum effort with a warning: "max effort risks overthinking on straightforward problems."
|
|
22
|
+
**Legacy mode**: Set `quality.thinkingBudget: "legacy"` in config to pass a fixed `thinking` parameter (for pre-4.7 models only).
|
|
23
|
+
|
|
24
|
+
## Prompt Caching
|
|
25
|
+
|
|
26
|
+
`sw:grill` uses Anthropic's ephemeral prompt caching to keep stable context hot between invocations. This cuts cost and latency when running grill back-to-back on the same increment (e.g. during the `sw:done` fix-loop).
|
|
27
|
+
|
|
28
|
+
**Files cached by default** (via `static-context-loader`):
|
|
29
|
+
- `CLAUDE.md` (project root)
|
|
30
|
+
- `.specweave/config.json`
|
|
31
|
+
- The active increment's `spec.md`
|
|
32
|
+
- The active increment's `rubric.md` (if present)
|
|
33
|
+
|
|
34
|
+
**Cache window**: 5-minute TTL (Anthropic's `cache_control: { type: "ephemeral" }` breakpoint). A second grill invocation within 5 minutes reads the cached prefix and only pays tokens for the dynamic tail.
|
|
35
|
+
|
|
36
|
+
**Extending the list**: Add paths to `cache.staticContextFiles` in `.specweave/config.json`:
|
|
37
|
+
```json
|
|
38
|
+
{
|
|
39
|
+
"cache": {
|
|
40
|
+
"staticContextFiles": [
|
|
41
|
+
"CLAUDE.md",
|
|
42
|
+
".specweave/config.json",
|
|
43
|
+
".specweave/docs/internal/specs/custom-rubric.md"
|
|
44
|
+
]
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**Disable caching**: Set `cache.staticContextFiles: []` in `.specweave/config.json`. Grill will still run, but without the prefix cache (full prompt tokens every call).
|
|
50
|
+
|
|
51
|
+
See `.specweave/docs/internal/specs/config-reference.md` and `opus-47-migration.md` for the full caching setup.
|
|
52
|
+
|
|
11
53
|
## Project Overrides
|
|
12
54
|
|
|
13
55
|
**Skill Memories**: If `.specweave/skill-memories/grill.md` exists, read and apply its learnings.
|
|
@@ -46,6 +88,10 @@ I approach code like a demanding tech lead:
|
|
|
46
88
|
|
|
47
89
|
## Grill Process
|
|
48
90
|
|
|
91
|
+
> **think carefully and step-by-step — this evaluation is harder than it looks**
|
|
92
|
+
|
|
93
|
+
Apply this adaptive-thinking prompt hint throughout every grill phase. On Opus 4.7 we no longer pass a `thinking` API parameter; the hint above triggers the model to reason deeply where it matters. When a finding looks obvious, look again — the hardest bugs are the ones hiding in plain sight.
|
|
94
|
+
|
|
49
95
|
### Phase 0: Spec Compliance Interrogation (ALWAYS RUNS)
|
|
50
96
|
|
|
51
97
|
**Rubric Integration**: If `rubric.md` exists in the increment directory:
|
|
@@ -157,17 +203,18 @@ Every finding from the grill process MUST be scored for confidence. This reduces
|
|
|
157
203
|
### Scoring System
|
|
158
204
|
|
|
159
205
|
- Each finding receives a confidence score from 0 to 100
|
|
160
|
-
- Only findings with confidence >=
|
|
206
|
+
- Only findings with confidence >= **50** are surfaced by default
|
|
161
207
|
- Findings below the threshold are silently dropped (they create noise, not value)
|
|
162
208
|
- Categories: **correctness** (bugs), **performance**, **security**, **maintainability**, **edge-case**
|
|
209
|
+
- Override via `quality.grillConfidenceThreshold` in `.specweave/config.json`. Pass `--threshold N` on the CLI for one-off overrides.
|
|
163
210
|
|
|
164
211
|
### Confidence Guidelines
|
|
165
212
|
|
|
166
213
|
| Score | Meaning | Action |
|
|
167
214
|
|-------|---------|--------|
|
|
168
215
|
| 90-100 | Certain bug/issue — reproducible or provably wrong | MUST fix before shipping |
|
|
169
|
-
|
|
|
170
|
-
| 50-
|
|
216
|
+
| 75-89 | Very likely issue — strong evidence but not 100% confirmed | SHOULD fix, review recommended |
|
|
217
|
+
| 50-74 | Possible issue — circumstantial evidence | Consider fixing — surfaced by default at the lowered threshold |
|
|
171
218
|
| <50 | Speculative — gut feeling, no hard evidence | Don't report (noise reduction) |
|
|
172
219
|
|
|
173
220
|
**How to score**: Base confidence on concrete evidence. Reading the code and seeing a null dereference path = 95. Suspecting a performance issue without profiling data = 60. "This might be a problem someday" = 30 (don't report).
|
|
@@ -192,8 +239,8 @@ Each finding in the grill report MUST use this structured format:
|
|
|
192
239
|
| Confidence Finding | Legacy Severity |
|
|
193
240
|
|---|---|
|
|
194
241
|
| critical (90-100 confidence) | BLOCKER / CRITICAL |
|
|
195
|
-
| high (
|
|
196
|
-
| medium (50-
|
|
242
|
+
| high (75-89 confidence) | MAJOR |
|
|
243
|
+
| medium (50-74 confidence) | MINOR (surfaced by default at the lowered 50-point threshold) |
|
|
197
244
|
| low (<50 confidence) | Not reported |
|
|
198
245
|
|
|
199
246
|
### Aggregated Summary
|
|
@@ -209,8 +256,8 @@ Total findings: {X} (above threshold)
|
|
|
209
256
|
Suppressed: {Y} (below confidence threshold)
|
|
210
257
|
|
|
211
258
|
Critical (must-fix, confidence 90+): {X}
|
|
212
|
-
High (should-fix, confidence
|
|
213
|
-
Medium (consider, confidence 50-
|
|
259
|
+
High (should-fix, confidence 75-89): {X}
|
|
260
|
+
Medium (consider, confidence 50-74): {X} (shown by default; previously --verbose only)
|
|
214
261
|
|
|
215
262
|
Ship readiness: READY | NOT READY | NEEDS REVIEW
|
|
216
263
|
|
|
@@ -225,11 +272,11 @@ Ship readiness: READY | NOT READY | NEEDS REVIEW
|
|
|
225
272
|
To see all findings including low-confidence ones:
|
|
226
273
|
|
|
227
274
|
```
|
|
228
|
-
sw:grill 0042 --verbose # Show findings with confidence >=
|
|
275
|
+
sw:grill 0042 --verbose # Show findings with confidence >= 30 (includes speculative)
|
|
229
276
|
sw:grill 0042 --threshold 30 # Show findings with confidence >= 30
|
|
230
277
|
```
|
|
231
278
|
|
|
232
|
-
Default threshold is
|
|
279
|
+
Default threshold is **50** (lowered in SpecWeave 1.1.0). Override via `quality.grillConfidenceThreshold` in `.specweave/config.json` or `--threshold N` on the CLI. Raise it for strictly actionable findings on a focused PR, or lower it further for a thorough pre-release review.
|
|
233
280
|
|
|
234
281
|
---
|
|
235
282
|
|
|
@@ -24,10 +24,41 @@ STATUS_JSON=$(specweave status --json 2>/dev/null || echo '{"increments":[]}')
|
|
|
24
24
|
|
|
25
25
|
# Get usage stats (if initialized)
|
|
26
26
|
ANALYTICS_JSON=$(specweave analytics --since 30d --json 2>/dev/null || echo '{}')
|
|
27
|
+
|
|
28
|
+
# Parse flags
|
|
29
|
+
SHOW_DEPRECATED=$(echo "$ARGUMENTS" | grep -c -- "--deprecated" || echo "0")
|
|
30
|
+
|
|
31
|
+
# Load marketplace.json so deprecated skills can be filtered
|
|
32
|
+
MARKETPLACE_JSON=$(cat "$(specweave root 2>/dev/null)/plugins/specweave/marketplace.json" 2>/dev/null || echo '{"skills":[]}')
|
|
27
33
|
```
|
|
28
34
|
|
|
29
35
|
If any command fails, skip that section gracefully — never show errors to the user.
|
|
30
36
|
|
|
37
|
+
## Deprecated skill filtering (v1.1.0+)
|
|
38
|
+
|
|
39
|
+
By default, `sw:help` **HIDES** skills with `"deprecated": true` in `plugins/specweave/marketplace.json` from the workflow-stage listing in Step 2 / Section C.
|
|
40
|
+
|
|
41
|
+
- Default invocation: `sw:help` → deprecated skills are **not listed**
|
|
42
|
+
- Opt-in invocation: `sw:help --deprecated` → deprecated skills are listed in a dedicated "DEPRECATED" section with their migration notes extracted from each SKILL.md
|
|
43
|
+
|
|
44
|
+
**Filtering logic**:
|
|
45
|
+
1. Parse `marketplace.json` to build the set of deprecated skill names: `deprecated = {s.name for s in marketplace.skills if s.deprecated == true}`
|
|
46
|
+
2. When rendering Section C (Skills by Workflow Stage), skip any skill whose name is in `deprecated` unless `--deprecated` was passed.
|
|
47
|
+
3. When `--deprecated` is passed, after Section C render an extra section:
|
|
48
|
+
```
|
|
49
|
+
DEPRECATED — Scheduled for removal
|
|
50
|
+
sw:github-sync → Use sw-github:sync-spec (removal: v1.3.0)
|
|
51
|
+
sw:jira-sync → Use sw-jira:push / sw-jira:pull (removal: v1.3.0)
|
|
52
|
+
sw:ado-sync → Use sw-ado:push / sw-ado:pull (removal: v1.3.0)
|
|
53
|
+
sw:tdd-red → Use sw:tdd-cycle --phase red (removal: v1.3.0)
|
|
54
|
+
sw:tdd-green → Use sw:tdd-cycle --phase green (removal: v1.3.0)
|
|
55
|
+
sw:tdd-refactor → Use sw:tdd-cycle --phase refactor (removal: v1.3.0)
|
|
56
|
+
sw:github-issue-standard → See .specweave/docs/internal/specs/github-issue-standard.md
|
|
57
|
+
```
|
|
58
|
+
4. Deprecated skills are still invokable directly (alias-routed in marketplace.json) — the filter only affects discovery listing, not invocation.
|
|
59
|
+
|
|
60
|
+
See `.specweave/docs/internal/specs/skill-deprecation-policy.md` for the full lifecycle policy.
|
|
61
|
+
|
|
31
62
|
## Step 2: Display Help
|
|
32
63
|
|
|
33
64
|
### If NOT initialized (no `.specweave/` directory)
|
|
@@ -90,10 +121,7 @@ IMPLEMENT — Build it
|
|
|
90
121
|
sw:do Execute tasks step by step
|
|
91
122
|
sw:auto Autonomous execution (unattended)
|
|
92
123
|
sw:team-lead Parallel multi-agent orchestration
|
|
93
|
-
sw:tdd-cycle Test-driven development (red-green-refactor)
|
|
94
|
-
sw:tdd-red Write failing tests first
|
|
95
|
-
sw:tdd-green Make failing tests pass
|
|
96
|
-
sw:tdd-refactor Refactor with test safety net
|
|
124
|
+
sw:tdd-cycle Test-driven development (red-green-refactor; use --phase red|green|refactor for single phase)
|
|
97
125
|
|
|
98
126
|
VERIFY — Check quality
|
|
99
127
|
sw:validate 130+ rule-based checks + AI quality assessment
|
|
@@ -2,10 +2,19 @@
|
|
|
2
2
|
description: Plan and create SpecWeave increments with PM and Architect agent collaboration. Use when starting new features, hotfixes, bugs, or any development work that needs specification and task breakdown. Creates spec.md, plan.md, tasks.md with proper AC-IDs and living docs integration.
|
|
3
3
|
argument-hint: "<feature-description>"
|
|
4
4
|
model: opus
|
|
5
|
+
effort: xhigh
|
|
5
6
|
---
|
|
6
7
|
|
|
8
|
+
**Effort**: `xhigh` (Opus 4.7 default for planning). Use `--effort max` for unusually complex architecture, accepting the overthinking risk.
|
|
9
|
+
|
|
7
10
|
# Plan Product Increment
|
|
8
11
|
|
|
12
|
+
## Tool-Use Rationale
|
|
13
|
+
|
|
14
|
+
- **Read**: Load `.specweave/config.json`, existing increments, and referenced living docs to inform scope and AC-IDs.
|
|
15
|
+
- **Write**: Produce the four increment artifacts (`metadata.json`, `spec.md`, `plan.md`, `tasks.md`) inside the increment directory.
|
|
16
|
+
- **Edit**: Refine AC-IDs, user-story numbering, and task dependencies after the single-agent draft is complete.
|
|
17
|
+
|
|
9
18
|
## CRITICAL: Plan Mode Required (BLOCKING)
|
|
10
19
|
|
|
11
20
|
**You MUST be in plan mode before proceeding.** If not, call `EnterPlanMode` now and wait for confirmation before continuing to Step 0A.
|
|
@@ -184,6 +193,18 @@ mkdir -p .specweave/increments/XXXX-name
|
|
|
184
193
|
|
|
185
194
|
Create files in order: metadata.json FIRST, then spec.md, plan.md, tasks.md.
|
|
186
195
|
|
|
196
|
+
## Flags
|
|
197
|
+
|
|
198
|
+
| Flag | Description | Default |
|
|
199
|
+
|------|-------------|---------|
|
|
200
|
+
| `--regenerate-plan` | Regenerate `plan.md` and `tasks.md` for an existing increment without re-running the PM interview. Useful when architecture changes after spec is finalized. Supersedes the deprecated standalone `sw:plan` skill. | false |
|
|
201
|
+
|
|
202
|
+
Example:
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
sw:increment --regenerate-plan 0014-checkout-flow
|
|
206
|
+
```
|
|
207
|
+
|
|
187
208
|
## Quick Reference
|
|
188
209
|
|
|
189
210
|
### Increment Types
|
|
@@ -251,23 +272,35 @@ The PM agent will:
|
|
|
251
272
|
**After PM agent returns**, read the interview state file to confirm all categories are covered
|
|
252
273
|
before proceeding to spec.md creation (especially when `enforcement: "strict"`).
|
|
253
274
|
|
|
254
|
-
## Step 4:
|
|
275
|
+
## Step 4: Single-Agent Planning (DEFAULT — 0669 AC-US4-01, AC-US4-02)
|
|
255
276
|
|
|
256
|
-
**
|
|
277
|
+
**Default path: one agent writes spec.md + plan.md + tasks.md + rubric.md sequentially.**
|
|
257
278
|
|
|
258
|
-
This is the default
|
|
279
|
+
This is now the default for ALL increments — no fan-out, no team-creation overhead, faster planning for the typical small-to-medium increment. It works with Claude Code, Cursor, OpenCode, Copilot, Aider, and any other AI tool.
|
|
259
280
|
|
|
260
281
|
1. Create the increment: `specweave create-increment --auto-id --name "feature-name" --title "Title" --description "Desc" --project "my-app"`
|
|
282
|
+
- Add `--parallel` to opt into 3-agent fan-out planning (see Step 4a).
|
|
261
283
|
2. Write `spec.md` with user stories and acceptance criteria (use the User Story Format above)
|
|
262
|
-
3. Write `plan.md` with architecture decisions and ADR references
|
|
263
|
-
4. Write `tasks.md` with BDD test plans (Given/When/Then) for each AC
|
|
264
|
-
5.
|
|
284
|
+
3. Write `plan.md` with architecture decisions and ADR references (must contain `## Design` and `## Rationale` headings)
|
|
285
|
+
4. Write `tasks.md` with BDD test plans (Given/When/Then) for each AC (`### T-NN` entries)
|
|
286
|
+
5. Write `rubric.md` with the per-increment quality contract (`## Quality Contract` heading)
|
|
287
|
+
6. Run: `specweave sync-living-docs {increment-id}`
|
|
265
288
|
|
|
266
289
|
Proceed to Step 5 after writing all files.
|
|
267
290
|
|
|
268
|
-
|
|
291
|
+
**Parity contract (enforced by `tests/integration/increment-single-agent-parity.test.ts`):** single-agent output MUST match the top-level structure of the 3-agent path — `spec.md` with an `Acceptance Criteria` section and `AC-US*-NN` IDs, `plan.md` with `Design` and `Rationale` headings, `tasks.md` with `### T-NN` tasks, and `rubric.md` with a `Quality Contract` heading.
|
|
292
|
+
|
|
293
|
+
### Step 4a: Opt-In — Team-Based 3-Agent Fan-Out (Parallel Planning)
|
|
294
|
+
|
|
295
|
+
**Use the 3-agent fan-out ONLY when one of these gates fires:**
|
|
296
|
+
|
|
297
|
+
1. **Explicit flag** — user invoked with `--parallel` (maps to `parallel: true` on the create-increment options).
|
|
298
|
+
2. **Large scope** — user-story count is ≥ 10 in the feature description or an existing draft.
|
|
299
|
+
3. **Keyword trigger** — the feature description contains any of: `parallel`, `team lead`, `fan out`.
|
|
300
|
+
|
|
301
|
+
If NONE of these fire, STOP — use Step 4 (single-agent) instead. Do not spawn a planning team for small/medium increments by default.
|
|
269
302
|
|
|
270
|
-
**
|
|
303
|
+
**When the gate fires and TeamCreate is available**, use team-based delegation for better quality. This provides isolated context, persistent memory, resumability, auto-compaction, and tmux pane visibility for each agent.
|
|
271
304
|
|
|
272
305
|
**Team lifecycle:**
|
|
273
306
|
1. `TeamCreate({ team_name: "plan-XXXX-name", description: "Planning: <feature>" })`
|
|
@@ -1,9 +1,24 @@
|
|
|
1
1
|
---
|
|
2
|
-
description: Sync guidance for SpecWeave increments with JIRA epics/stories (content SpecWeave→JIRA, status JIRA→SpecWeave). Use when asking about JIRA integration setup or troubleshooting sync. For actual syncing, use sw-jira:
|
|
2
|
+
description: "[DEPRECATED] Sync guidance for SpecWeave increments with JIRA epics/stories (content SpecWeave→JIRA, status JIRA→SpecWeave). Use when asking about JIRA integration setup or troubleshooting sync. For actual syncing, use sw-jira:push or sw-jira:pull command instead."
|
|
3
3
|
user-invokable: false
|
|
4
|
+
deprecated: true
|
|
4
5
|
allowed-tools: Read, Task
|
|
5
6
|
---
|
|
6
7
|
|
|
8
|
+
> ⚠️ DEPRECATED: Use `sw-jira:push` / `sw-jira:pull` instead. This skill will be removed in v1.3.0.
|
|
9
|
+
|
|
10
|
+
## Migration
|
|
11
|
+
|
|
12
|
+
This skill has been deprecated as part of the Opus 4.7 framework alignment (increment 0669).
|
|
13
|
+
|
|
14
|
+
- **Use instead**: `sw-jira:push` (content SpecWeave→JIRA) and `sw-jira:pull` (status JIRA→SpecWeave)
|
|
15
|
+
- **Removal**: Scheduled for v1.3.0 (2 minor releases after v1.1.0)
|
|
16
|
+
- **Why**: Consolidated sync logic moved to the `sw-jira:*` command family.
|
|
17
|
+
|
|
18
|
+
For the migration policy, see `.specweave/docs/internal/specs/skill-deprecation-policy.md`.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
7
22
|
# JIRA Sync Skill
|
|
8
23
|
|
|
9
24
|
Coordinates JIRA synchronization by delegating to `jira-mapper` agent.
|
|
@@ -1,11 +1,35 @@
|
|
|
1
1
|
---
|
|
2
|
-
description:
|
|
2
|
+
description: Adaptive-thinking LLM-as-Judge validation of completed work. Uses the Opus model and an adaptive-thinking prompt hint for thorough, independent evaluation. Use when saying "judge my code", "judge-llm", "deep validate", or as part of sw:done closure.
|
|
3
3
|
allowed-tools: Read, Grep, Glob, Bash
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
#
|
|
6
|
+
# Adaptive-Thinking LLM-as-Judge Validation
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
Validate completed work using the adaptive-thinking LLM-as-Judge pattern. Provides an independent second opinion separate from `sw:grill`.
|
|
9
|
+
|
|
10
|
+
## Tool-Use Rationale
|
|
11
|
+
|
|
12
|
+
- **Read**: Load spec.md, tasks.md, rubric.md, and the files under review to build evaluation context.
|
|
13
|
+
- **Grep**: Search for AC patterns, test assertions, and implementation markers across the codebase.
|
|
14
|
+
- **Glob**: Discover test files and implementation files matching the increment's scope.
|
|
15
|
+
- **Bash**: Run `npx vitest run` to verify test pass rates; check file existence.
|
|
16
|
+
|
|
17
|
+
## Adaptive-Thinking Prompt Hint
|
|
18
|
+
|
|
19
|
+
> **think carefully and step-by-step — this evaluation is harder than it looks**
|
|
20
|
+
|
|
21
|
+
With Opus 4.7, we no longer pass a `thinking` API parameter. Instead, we rely on adaptive thinking triggered by the prompt hint above. The model decides how much reasoning each evaluation requires.
|
|
22
|
+
|
|
23
|
+
## Model Configuration
|
|
24
|
+
|
|
25
|
+
**Default effort**: `xhigh` — recommended for all evaluation tasks per Opus 4.7 conventions.
|
|
26
|
+
**Opt-in max**: `--effort max` enables maximum effort with a warning: "max effort risks overthinking on straightforward problems."
|
|
27
|
+
**Legacy mode**: Set `quality.thinkingBudget: "legacy"` in config to pass a fixed `thinking` parameter (for pre-4.7 models only).
|
|
28
|
+
|
|
29
|
+
## Effort Level
|
|
30
|
+
|
|
31
|
+
- **Default**: `xhigh` effort — the judge runs with the highest reasoning effort level by default.
|
|
32
|
+
- **Opt-in**: `--effort max` — elevates to maximum effort for exceptionally complex or high-stakes reviews.
|
|
9
33
|
|
|
10
34
|
## How It Differs from sw:grill
|
|
11
35
|
|
|
@@ -13,7 +37,7 @@ allowed-tools: Read, Grep, Glob, Bash
|
|
|
13
37
|
|--------|-------------|-----------------|
|
|
14
38
|
| Execution | In-session (same context) | **Separate Opus API call** |
|
|
15
39
|
| Context | Shares conversation context | **Fresh context (no bias)** |
|
|
16
|
-
|
|
|
40
|
+
| Reasoning | Standard reasoning | **Adaptive thinking via prompt hint (`xhigh` default, `--effort max` opt-in)** |
|
|
17
41
|
| Output | Confidence-scored findings | Structured verdict + score |
|
|
18
42
|
| Domain | Generic code review | **Built-in domain criteria** |
|
|
19
43
|
|
|
@@ -23,6 +47,7 @@ allowed-tools: Read, Grep, Glob, Bash
|
|
|
23
47
|
|
|
24
48
|
**TypeScript**: `src/core/skills/skill-judge.ts`
|
|
25
49
|
- Uses Anthropic SDK with user's `ANTHROPIC_API_KEY`
|
|
50
|
+
- Model-version guard: omits the `thinking` API parameter on `claude-opus-4-7*` and newer models; falls back to adaptive-thinking prompt hint
|
|
26
51
|
- AbortController-based timeout to prevent stuck states (default: 60s)
|
|
27
52
|
- Progress logging to `.specweave/logs/judge-llm.log`
|
|
28
53
|
- Fallback to basic pattern matching if no API key
|
|
@@ -31,11 +56,11 @@ allowed-tools: Read, Grep, Glob, Bash
|
|
|
31
56
|
## Usage
|
|
32
57
|
|
|
33
58
|
```bash
|
|
34
|
-
# DEFAULT:
|
|
59
|
+
# DEFAULT: Adaptive-thinking validation at xhigh effort
|
|
35
60
|
sw:judge-llm src/file.ts
|
|
36
61
|
sw:judge-llm "src/**/*.ts"
|
|
37
62
|
|
|
38
|
-
# Validate git changes (
|
|
63
|
+
# Validate git changes (adaptive thinking by default)
|
|
39
64
|
sw:judge-llm --staged # Staged changes
|
|
40
65
|
sw:judge-llm --last-commit # Last commit
|
|
41
66
|
sw:judge-llm --diff main # Diff vs branch
|
|
@@ -43,6 +68,9 @@ sw:judge-llm --diff main # Diff vs branch
|
|
|
43
68
|
# Quick mode (ONLY if you need speed over thoroughness)
|
|
44
69
|
sw:judge-llm src/file.ts --quick
|
|
45
70
|
|
|
71
|
+
# Maximum reasoning effort (opt-in)
|
|
72
|
+
sw:judge-llm src/file.ts --effort max
|
|
73
|
+
|
|
46
74
|
# Timeout control (default: 60s)
|
|
47
75
|
sw:judge-llm src/file.ts --timeout 120000
|
|
48
76
|
|
|
@@ -61,13 +89,13 @@ sw:judge-llm src/file.ts --verbose # Show progress to console
|
|
|
61
89
|
|
|
62
90
|
1. Read `.specweave/config.json` → check `externalModels.consent` field
|
|
63
91
|
2. If `"always-allow"` → proceed silently
|
|
64
|
-
3. If `"never"` → skip API call, use in-session
|
|
92
|
+
3. If `"never"` → skip API call, use in-session adaptive-thinking evaluation instead
|
|
65
93
|
4. If `"ask"` (default):
|
|
66
94
|
- Check if `"anthropic"` is in `externalModels.allowedProviders`
|
|
67
95
|
- If YES → proceed silently (standing permission)
|
|
68
96
|
- If NO → **ASK USER**: "Judge-LLM will call the Anthropic API using your ANTHROPIC_API_KEY. This costs ~$0.01-0.05 per evaluation. Proceed? (yes/no/always)"
|
|
69
97
|
- "yes" → proceed this time only
|
|
70
|
-
- "no" → skip API call, use in-session
|
|
98
|
+
- "no" → skip API call, use in-session adaptive-thinking evaluation instead
|
|
71
99
|
- "always" → run: `grantStandingConsent('anthropic', projectRoot)` from `src/core/llm/consent.ts`, then proceed
|
|
72
100
|
5. No `ANTHROPIC_API_KEY` set → falls back to pattern matching automatically (no cost, no consent needed)
|
|
73
101
|
|
|
@@ -82,12 +110,16 @@ Determine what to validate:
|
|
|
82
110
|
- If `--diff <branch>`: get diff against branch
|
|
83
111
|
- If no args: validate recent work in conversation context
|
|
84
112
|
|
|
85
|
-
### Step 2:
|
|
113
|
+
### Step 2: Adaptive-Thinking Analysis (Default)
|
|
114
|
+
|
|
115
|
+
Prompt hint prefix (include verbatim at the start of the judge prompt):
|
|
116
|
+
|
|
117
|
+
> **think carefully and step-by-step — this evaluation is harder than it looks**
|
|
86
118
|
|
|
87
|
-
Use
|
|
119
|
+
Use adaptive thinking (triggered by the hint) for deep LLM-as-Judge evaluation via the Opus 4.7 model at `xhigh` effort by default (`--effort max` opt-in for exceptional cases):
|
|
88
120
|
|
|
89
121
|
```
|
|
90
|
-
Claude MUST
|
|
122
|
+
Claude MUST think carefully and step-by-step to:
|
|
91
123
|
|
|
92
124
|
1. DEEP READ: Thoroughly understand all code, context, and intent
|
|
93
125
|
2. MULTI-DIMENSIONAL ANALYSIS: Evaluate across ALL dimensions:
|
|
@@ -109,12 +141,12 @@ Claude MUST use ultrathink/extended thinking to:
|
|
|
109
141
|
JUDGE-LLM VERDICT: APPROVED | CONCERNS | REJECTED
|
|
110
142
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
111
143
|
|
|
112
|
-
Mode:
|
|
144
|
+
Mode: ADAPTIVE-THINKING (xhigh effort)
|
|
113
145
|
Confidence: 0.XX
|
|
114
146
|
Files Analyzed: N
|
|
115
147
|
|
|
116
148
|
REASONING:
|
|
117
|
-
[Detailed chain-of-thought from
|
|
149
|
+
[Detailed chain-of-thought from adaptive thinking]
|
|
118
150
|
|
|
119
151
|
ISSUES (if any):
|
|
120
152
|
CRITICAL: [title]
|
|
@@ -161,7 +193,7 @@ After evaluation (including consent-denied fallback), you **MUST** write a JSON
|
|
|
161
193
|
"timestamp": "<ISO-8601>",
|
|
162
194
|
"verdict": "APPROVED|CONCERNS|REJECTED",
|
|
163
195
|
"score": 87,
|
|
164
|
-
"mode": "
|
|
196
|
+
"mode": "adaptive-thinking|quick|pattern-match",
|
|
165
197
|
"timedOut": false,
|
|
166
198
|
"duration_ms": 45000,
|
|
167
199
|
"consentStatus": "granted",
|