specweave 1.0.577 → 1.0.579

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +29 -0
  2. package/bin/specweave.js +1 -0
  3. package/dist/src/cli/commands/auto.d.ts +28 -0
  4. package/dist/src/cli/commands/auto.d.ts.map +1 -1
  5. package/dist/src/cli/commands/auto.js +73 -0
  6. package/dist/src/cli/commands/auto.js.map +1 -1
  7. package/dist/src/cli/commands/create-increment.d.ts +8 -0
  8. package/dist/src/cli/commands/create-increment.d.ts.map +1 -1
  9. package/dist/src/cli/commands/create-increment.js +2 -1
  10. package/dist/src/cli/commands/create-increment.js.map +1 -1
  11. package/dist/src/cli/dispatcher.d.ts +29 -0
  12. package/dist/src/cli/dispatcher.d.ts.map +1 -0
  13. package/dist/src/cli/dispatcher.js +53 -0
  14. package/dist/src/cli/dispatcher.js.map +1 -0
  15. package/dist/src/core/cache/static-context-loader.d.ts +39 -0
  16. package/dist/src/core/cache/static-context-loader.d.ts.map +1 -0
  17. package/dist/src/core/cache/static-context-loader.js +49 -0
  18. package/dist/src/core/cache/static-context-loader.js.map +1 -0
  19. package/dist/src/core/config/types.d.ts +61 -0
  20. package/dist/src/core/config/types.d.ts.map +1 -1
  21. package/dist/src/core/config/types.js +16 -0
  22. package/dist/src/core/config/types.js.map +1 -1
  23. package/dist/src/core/doctor/checkers/configuration-checker.d.ts +1 -0
  24. package/dist/src/core/doctor/checkers/configuration-checker.d.ts.map +1 -1
  25. package/dist/src/core/doctor/checkers/configuration-checker.js +50 -0
  26. package/dist/src/core/doctor/checkers/configuration-checker.js.map +1 -1
  27. package/dist/src/core/hooks/pretooluse-guard.d.ts +37 -0
  28. package/dist/src/core/hooks/pretooluse-guard.d.ts.map +1 -0
  29. package/dist/src/core/hooks/pretooluse-guard.js +64 -0
  30. package/dist/src/core/hooks/pretooluse-guard.js.map +1 -0
  31. package/dist/src/core/increment/template-creator.d.ts +7 -0
  32. package/dist/src/core/increment/template-creator.d.ts.map +1 -1
  33. package/dist/src/core/increment/template-creator.js +2 -1
  34. package/dist/src/core/increment/template-creator.js.map +1 -1
  35. package/dist/src/core/skills/skill-judge.d.ts +56 -0
  36. package/dist/src/core/skills/skill-judge.d.ts.map +1 -1
  37. package/dist/src/core/skills/skill-judge.js +95 -8
  38. package/dist/src/core/skills/skill-judge.js.map +1 -1
  39. package/dist/src/core/team-lead/template-loader.d.ts +19 -0
  40. package/dist/src/core/team-lead/template-loader.d.ts.map +1 -0
  41. package/dist/src/core/team-lead/template-loader.js +33 -0
  42. package/dist/src/core/team-lead/template-loader.js.map +1 -0
  43. package/dist/src/core/telemetry/cache-metrics.d.ts +38 -0
  44. package/dist/src/core/telemetry/cache-metrics.d.ts.map +1 -0
  45. package/dist/src/core/telemetry/cache-metrics.js +65 -0
  46. package/dist/src/core/telemetry/cache-metrics.js.map +1 -0
  47. package/dist/src/core/validators/resource-base.d.ts +29 -0
  48. package/dist/src/core/validators/resource-base.d.ts.map +1 -0
  49. package/dist/src/core/validators/resource-base.js +52 -0
  50. package/dist/src/core/validators/resource-base.js.map +1 -0
  51. package/package.json +1 -1
  52. package/plugins/specweave/.lint/skill-lint.test.ts +91 -0
  53. package/plugins/specweave/.lint/skill-lint.ts +51 -0
  54. package/plugins/specweave/marketplace.json +114 -0
  55. package/plugins/specweave/skills/ado-multi-project/SKILL.md +9 -1
  56. package/plugins/specweave/skills/ado-sync/SKILL.md +16 -1
  57. package/plugins/specweave/skills/analytics/SKILL.md +45 -0
  58. package/plugins/specweave/skills/architect/SKILL.md +5 -19
  59. package/plugins/specweave/skills/auto/SKILL.md +41 -10
  60. package/plugins/specweave/skills/brainstorm/SKILL.md +15 -15
  61. package/plugins/specweave/skills/close-all/SKILL.md +21 -51
  62. package/plugins/specweave/skills/code-reviewer/SKILL.md +114 -57
  63. package/plugins/specweave/skills/do/SKILL.md +14 -5
  64. package/plugins/specweave/skills/done/SKILL.md +36 -1
  65. package/plugins/specweave/skills/github-issue-standard/SKILL.md +8 -224
  66. package/plugins/specweave/skills/github-multi-project/SKILL.md +9 -1
  67. package/plugins/specweave/skills/github-sync/SKILL.md +16 -1
  68. package/plugins/specweave/skills/grill/SKILL.md +56 -9
  69. package/plugins/specweave/skills/help/SKILL.md +32 -4
  70. package/plugins/specweave/skills/increment/SKILL.md +41 -8
  71. package/plugins/specweave/skills/jira-sync/SKILL.md +16 -1
  72. package/plugins/specweave/skills/judge-llm/SKILL.md +46 -14
  73. package/plugins/specweave/skills/multi-project/SKILL.md +370 -0
  74. package/plugins/specweave/skills/plan/SKILL.md +16 -1
  75. package/plugins/specweave/skills/pm/SKILL.md +12 -47
  76. package/plugins/specweave/skills/tdd-cycle/SKILL.md +38 -0
  77. package/plugins/specweave/skills/tdd-green/SKILL.md +16 -1
  78. package/plugins/specweave/skills/tdd-red/SKILL.md +16 -1
  79. package/plugins/specweave/skills/tdd-refactor/SKILL.md +16 -1
  80. package/plugins/specweave/skills/team-build/SKILL.md +16 -462
  81. package/plugins/specweave/skills/team-lead/SKILL.md +59 -44
  82. package/plugins/specweave/skills/team-lead/agents/_protocol.md +88 -0
  83. package/plugins/specweave/skills/team-lead/agents/architect.md +27 -36
  84. package/plugins/specweave/skills/team-lead/agents/backend.md +26 -56
  85. package/plugins/specweave/skills/team-lead/agents/database.md +23 -51
  86. package/plugins/specweave/skills/team-lead/agents/frontend.md +27 -57
  87. package/plugins/specweave/skills/team-lead/agents/pm.md +7 -11
  88. package/plugins/specweave/skills/team-lead/agents/researcher.md +29 -55
  89. package/plugins/specweave/skills/team-lead/agents/reviewer-security.md +43 -59
  90. package/plugins/specweave/skills/team-lead/agents/security.md +22 -48
  91. package/plugins/specweave/skills/team-lead/agents/testing.md +26 -53
  92. package/plugins/specweave/skills/team-merge/SKILL.md +13 -10
  93. package/plugins/specweave/skills/team-lead/agents/reviewer-logic.md +0 -82
  94. package/plugins/specweave/skills/team-lead/agents/reviewer-performance.md +0 -83
@@ -8,6 +8,48 @@ model: opus
8
8
 
9
9
  # Code Grill Expert
10
10
 
11
+ ## Tool-Use Rationale
12
+
13
+ - **Read**: Load the increment's `spec.md`, `rubric.md`, `tasks.md`, and the implementation files being interrogated so findings have real evidence.
14
+ - **Grep**: Search for AC IDs, try/catch sites, TODO markers, and patterns cited during grilling.
15
+ - **Glob**: Enumerate implementation and test files within the increment's scope to spot untested paths.
16
+ - **Bash**: Run `npx vitest run` (and related commands) to confirm whether a suspected bug actually trips a test.
17
+
18
+ ## Model Configuration
19
+
20
+ **Default effort**: `xhigh` — recommended for all review tasks per Opus 4.7 conventions.
21
+ **Opt-in max**: `--effort max` enables maximum effort with a warning: "max effort risks overthinking on straightforward problems."
22
+ **Legacy mode**: Set `quality.thinkingBudget: "legacy"` in config to pass a fixed `thinking` parameter (for pre-4.7 models only).
23
+
24
+ ## Prompt Caching
25
+
26
+ `sw:grill` uses Anthropic's ephemeral prompt caching to keep stable context hot between invocations. This cuts cost and latency when running grill back-to-back on the same increment (e.g. during the `sw:done` fix-loop).
27
+
28
+ **Files cached by default** (via `static-context-loader`):
29
+ - `CLAUDE.md` (project root)
30
+ - `.specweave/config.json`
31
+ - The active increment's `spec.md`
32
+ - The active increment's `rubric.md` (if present)
33
+
34
+ **Cache window**: 5-minute TTL (Anthropic's `cache_control: { type: "ephemeral" }` breakpoint). A second grill invocation within 5 minutes reads the cached prefix and only pays tokens for the dynamic tail.
35
+
36
+ **Extending the list**: Add paths to `cache.staticContextFiles` in `.specweave/config.json`:
37
+ ```json
38
+ {
39
+ "cache": {
40
+ "staticContextFiles": [
41
+ "CLAUDE.md",
42
+ ".specweave/config.json",
43
+ ".specweave/docs/internal/specs/custom-rubric.md"
44
+ ]
45
+ }
46
+ }
47
+ ```
48
+
49
+ **Disable caching**: Set `cache.staticContextFiles: []` in `.specweave/config.json`. Grill will still run, but without the prefix cache (full prompt tokens every call).
50
+
51
+ See `.specweave/docs/internal/specs/config-reference.md` and `opus-47-migration.md` for the full caching setup.
52
+
11
53
  ## Project Overrides
12
54
 
13
55
  **Skill Memories**: If `.specweave/skill-memories/grill.md` exists, read and apply its learnings.
@@ -46,6 +88,10 @@ I approach code like a demanding tech lead:
46
88
 
47
89
  ## Grill Process
48
90
 
91
+ > **think carefully and step-by-step — this evaluation is harder than it looks**
92
+
93
+ Apply this adaptive-thinking prompt hint throughout every grill phase. On Opus 4.7 we no longer pass a `thinking` API parameter; the hint above triggers the model to reason deeply where it matters. When a finding looks obvious, look again — the hardest bugs are the ones hiding in plain sight.
94
+
49
95
  ### Phase 0: Spec Compliance Interrogation (ALWAYS RUNS)
50
96
 
51
97
  **Rubric Integration**: If `rubric.md` exists in the increment directory:
@@ -157,17 +203,18 @@ Every finding from the grill process MUST be scored for confidence. This reduces
157
203
  ### Scoring System
158
204
 
159
205
  - Each finding receives a confidence score from 0 to 100
160
- - Only findings with confidence >= 70 are surfaced by default
206
+ - Only findings with confidence >= **50** are surfaced by default
161
207
  - Findings below the threshold are silently dropped (they create noise, not value)
162
208
  - Categories: **correctness** (bugs), **performance**, **security**, **maintainability**, **edge-case**
209
+ - Override via `quality.grillConfidenceThreshold` in `.specweave/config.json`. Pass `--threshold N` on the CLI for one-off overrides.
163
210
 
164
211
  ### Confidence Guidelines
165
212
 
166
213
  | Score | Meaning | Action |
167
214
  |-------|---------|--------|
168
215
  | 90-100 | Certain bug/issue — reproducible or provably wrong | MUST fix before shipping |
169
- | 70-89 | Very likely issue — strong evidence but not 100% confirmed | SHOULD fix, review recommended |
170
- | 50-69 | Possible issue — circumstantial evidence | Consider fixing, low priority |
216
+ | 75-89 | Very likely issue — strong evidence but not 100% confirmed | SHOULD fix, review recommended |
217
+ | 50-74 | Possible issue — circumstantial evidence | Consider fixing surfaced by default at the lowered threshold |
171
218
  | <50 | Speculative — gut feeling, no hard evidence | Don't report (noise reduction) |
172
219
 
173
220
  **How to score**: Base confidence on concrete evidence. Reading the code and seeing a null dereference path = 95. Suspecting a performance issue without profiling data = 60. "This might be a problem someday" = 30 (don't report).
@@ -192,8 +239,8 @@ Each finding in the grill report MUST use this structured format:
192
239
  | Confidence Finding | Legacy Severity |
193
240
  |---|---|
194
241
  | critical (90-100 confidence) | BLOCKER / CRITICAL |
195
- | high (70-89 confidence) | MAJOR |
196
- | medium (50-69 confidence) | MINOR (only if explicitly requested) |
242
+ | high (75-89 confidence) | MAJOR |
243
+ | medium (50-74 confidence) | MINOR (surfaced by default at the lowered 50-point threshold) |
197
244
  | low (<50 confidence) | Not reported |
198
245
 
199
246
  ### Aggregated Summary
@@ -209,8 +256,8 @@ Total findings: {X} (above threshold)
209
256
  Suppressed: {Y} (below confidence threshold)
210
257
 
211
258
  Critical (must-fix, confidence 90+): {X}
212
- High (should-fix, confidence 70-89): {X}
213
- Medium (consider, confidence 50-69): {X} (only shown with --verbose)
259
+ High (should-fix, confidence 75-89): {X}
260
+ Medium (consider, confidence 50-74): {X} (shown by default; previously --verbose only)
214
261
 
215
262
  Ship readiness: READY | NOT READY | NEEDS REVIEW
216
263
 
@@ -225,11 +272,11 @@ Ship readiness: READY | NOT READY | NEEDS REVIEW
225
272
  To see all findings including low-confidence ones:
226
273
 
227
274
  ```
228
- sw:grill 0042 --verbose # Show findings with confidence >= 50
275
+ sw:grill 0042 --verbose # Show findings with confidence >= 30 (includes speculative)
229
276
  sw:grill 0042 --threshold 30 # Show findings with confidence >= 30
230
277
  ```
231
278
 
232
- Default threshold is 70. Lowering it is useful when debugging a specific area or doing a thorough pre-release review.
279
+ Default threshold is **50** (lowered in SpecWeave 1.1.0). Override via `quality.grillConfidenceThreshold` in `.specweave/config.json` or `--threshold N` on the CLI. Raise it for strictly actionable findings on a focused PR, or lower it further for a thorough pre-release review.
233
280
 
234
281
  ---
235
282
 
@@ -24,10 +24,41 @@ STATUS_JSON=$(specweave status --json 2>/dev/null || echo '{"increments":[]}')
24
24
 
25
25
  # Get usage stats (if initialized)
26
26
  ANALYTICS_JSON=$(specweave analytics --since 30d --json 2>/dev/null || echo '{}')
27
+
28
+ # Parse flags
29
+ SHOW_DEPRECATED=$(echo "$ARGUMENTS" | grep -c -- "--deprecated" || echo "0")
30
+
31
+ # Load marketplace.json so deprecated skills can be filtered
32
+ MARKETPLACE_JSON=$(cat "$(specweave root 2>/dev/null)/plugins/specweave/marketplace.json" 2>/dev/null || echo '{"skills":[]}')
27
33
  ```
28
34
 
29
35
  If any command fails, skip that section gracefully — never show errors to the user.
30
36
 
37
+ ## Deprecated skill filtering (v1.1.0+)
38
+
39
+ By default, `sw:help` **HIDES** skills with `"deprecated": true` in `plugins/specweave/marketplace.json` from the workflow-stage listing in Step 2 / Section C.
40
+
41
+ - Default invocation: `sw:help` → deprecated skills are **not listed**
42
+ - Opt-in invocation: `sw:help --deprecated` → deprecated skills are listed in a dedicated "DEPRECATED" section with their migration notes extracted from each SKILL.md
43
+
44
+ **Filtering logic**:
45
+ 1. Parse `marketplace.json` to build the set of deprecated skill names: `deprecated = {s.name for s in marketplace.skills if s.deprecated == true}`
46
+ 2. When rendering Section C (Skills by Workflow Stage), skip any skill whose name is in `deprecated` unless `--deprecated` was passed.
47
+ 3. When `--deprecated` is passed, after Section C render an extra section:
48
+ ```
49
+ DEPRECATED — Scheduled for removal
50
+ sw:github-sync → Use sw-github:sync-spec (removal: v1.3.0)
51
+ sw:jira-sync → Use sw-jira:push / sw-jira:pull (removal: v1.3.0)
52
+ sw:ado-sync → Use sw-ado:push / sw-ado:pull (removal: v1.3.0)
53
+ sw:tdd-red → Use sw:tdd-cycle --phase red (removal: v1.3.0)
54
+ sw:tdd-green → Use sw:tdd-cycle --phase green (removal: v1.3.0)
55
+ sw:tdd-refactor → Use sw:tdd-cycle --phase refactor (removal: v1.3.0)
56
+ sw:github-issue-standard → See .specweave/docs/internal/specs/github-issue-standard.md
57
+ ```
58
+ 4. Deprecated skills are still invokable directly (alias-routed in marketplace.json) — the filter only affects discovery listing, not invocation.
59
+
60
+ See `.specweave/docs/internal/specs/skill-deprecation-policy.md` for the full lifecycle policy.
61
+
31
62
  ## Step 2: Display Help
32
63
 
33
64
  ### If NOT initialized (no `.specweave/` directory)
@@ -90,10 +121,7 @@ IMPLEMENT — Build it
90
121
  sw:do Execute tasks step by step
91
122
  sw:auto Autonomous execution (unattended)
92
123
  sw:team-lead Parallel multi-agent orchestration
93
- sw:tdd-cycle Test-driven development (red-green-refactor)
94
- sw:tdd-red Write failing tests first
95
- sw:tdd-green Make failing tests pass
96
- sw:tdd-refactor Refactor with test safety net
124
+ sw:tdd-cycle Test-driven development (red-green-refactor; use --phase red|green|refactor for single phase)
97
125
 
98
126
  VERIFY — Check quality
99
127
  sw:validate 130+ rule-based checks + AI quality assessment
@@ -2,10 +2,19 @@
2
2
  description: Plan and create SpecWeave increments with PM and Architect agent collaboration. Use when starting new features, hotfixes, bugs, or any development work that needs specification and task breakdown. Creates spec.md, plan.md, tasks.md with proper AC-IDs and living docs integration.
3
3
  argument-hint: "<feature-description>"
4
4
  model: opus
5
+ effort: xhigh
5
6
  ---
6
7
 
8
+ **Effort**: `xhigh` (Opus 4.7 default for planning). Use `--effort max` for unusually complex architecture, accepting the overthinking risk.
9
+
7
10
  # Plan Product Increment
8
11
 
12
+ ## Tool-Use Rationale
13
+
14
+ - **Read**: Load `.specweave/config.json`, existing increments, and referenced living docs to inform scope and AC-IDs.
15
+ - **Write**: Produce the four increment artifacts (`metadata.json`, `spec.md`, `plan.md`, `tasks.md`) inside the increment directory.
16
+ - **Edit**: Refine AC-IDs, user-story numbering, and task dependencies after the single-agent draft is complete.
17
+
9
18
  ## CRITICAL: Plan Mode Required (BLOCKING)
10
19
 
11
20
  **You MUST be in plan mode before proceeding.** If not, call `EnterPlanMode` now and wait for confirmation before continuing to Step 0A.
@@ -184,6 +193,18 @@ mkdir -p .specweave/increments/XXXX-name
184
193
 
185
194
  Create files in order: metadata.json FIRST, then spec.md, plan.md, tasks.md.
186
195
 
196
+ ## Flags
197
+
198
+ | Flag | Description | Default |
199
+ |------|-------------|---------|
200
+ | `--regenerate-plan` | Regenerate `plan.md` and `tasks.md` for an existing increment without re-running the PM interview. Useful when architecture changes after spec is finalized. Supersedes the deprecated standalone `sw:plan` skill. | false |
201
+
202
+ Example:
203
+
204
+ ```bash
205
+ sw:increment --regenerate-plan 0014-checkout-flow
206
+ ```
207
+
187
208
  ## Quick Reference
188
209
 
189
210
  ### Increment Types
@@ -251,23 +272,35 @@ The PM agent will:
251
272
  **After PM agent returns**, read the interview state file to confirm all categories are covered
252
273
  before proceeding to spec.md creation (especially when `enforcement: "strict"`).
253
274
 
254
- ## Step 4: Direct Specification Writing (Universalworks with ALL AI tools)
275
+ ## Step 4: Single-Agent Planning (DEFAULT0669 AC-US4-01, AC-US4-02)
255
276
 
256
- **After increment folder + metadata.json are created, write the spec files using CLI commands and templates.**
277
+ **Default path: one agent writes spec.md + plan.md + tasks.md + rubric.md sequentially.**
257
278
 
258
- This is the default path. It works with Claude Code, Cursor, OpenCode, Copilot, Aider, and any other AI tool.
279
+ This is now the default for ALL increments — no fan-out, no team-creation overhead, faster planning for the typical small-to-medium increment. It works with Claude Code, Cursor, OpenCode, Copilot, Aider, and any other AI tool.
259
280
 
260
281
  1. Create the increment: `specweave create-increment --auto-id --name "feature-name" --title "Title" --description "Desc" --project "my-app"`
282
+ - Add `--parallel` to opt into 3-agent fan-out planning (see Step 4a).
261
283
  2. Write `spec.md` with user stories and acceptance criteria (use the User Story Format above)
262
- 3. Write `plan.md` with architecture decisions and ADR references
263
- 4. Write `tasks.md` with BDD test plans (Given/When/Then) for each AC
264
- 5. Run: `specweave sync-living-docs {increment-id}`
284
+ 3. Write `plan.md` with architecture decisions and ADR references (must contain `## Design` and `## Rationale` headings)
285
+ 4. Write `tasks.md` with BDD test plans (Given/When/Then) for each AC (`### T-NN` entries)
286
+ 5. Write `rubric.md` with the per-increment quality contract (`## Quality Contract` heading)
287
+ 6. Run: `specweave sync-living-docs {increment-id}`
265
288
 
266
289
  Proceed to Step 5 after writing all files.
267
290
 
268
- ### Step 4a: EnhancedTeam-Based Delegation (Optional, Claude Code only)
291
+ **Parity contract (enforced by `tests/integration/increment-single-agent-parity.test.ts`):** single-agent output MUST match the top-level structure of the 3-agent path `spec.md` with an `Acceptance Criteria` section and `AC-US*-NN` IDs, `plan.md` with `Design` and `Rationale` headings, `tasks.md` with `### T-NN` tasks, and `rubric.md` with a `Quality Contract` heading.
292
+
293
+ ### Step 4a: Opt-In — Team-Based 3-Agent Fan-Out (Parallel Planning)
294
+
295
+ **Use the 3-agent fan-out ONLY when one of these gates fires:**
296
+
297
+ 1. **Explicit flag** — user invoked with `--parallel` (maps to `parallel: true` on the create-increment options).
298
+ 2. **Large scope** — user-story count is ≥ 10 in the feature description or an existing draft.
299
+ 3. **Keyword trigger** — the feature description contains any of: `parallel`, `team lead`, `fan out`.
300
+
301
+ If NONE of these fire, STOP — use Step 4 (single-agent) instead. Do not spawn a planning team for small/medium increments by default.
269
302
 
270
- **If TeamCreate is available**, use team-based delegation for better quality. This provides isolated context, persistent memory, resumability, auto-compaction, and tmux pane visibility for each agent.
303
+ **When the gate fires and TeamCreate is available**, use team-based delegation for better quality. This provides isolated context, persistent memory, resumability, auto-compaction, and tmux pane visibility for each agent.
271
304
 
272
305
  **Team lifecycle:**
273
306
  1. `TeamCreate({ team_name: "plan-XXXX-name", description: "Planning: <feature>" })`
@@ -1,9 +1,24 @@
1
1
  ---
2
- description: Sync guidance for SpecWeave increments with JIRA epics/stories (content SpecWeave→JIRA, status JIRA→SpecWeave). Use when asking about JIRA integration setup or troubleshooting sync. For actual syncing, use sw-jira:sync command instead.
2
+ description: "[DEPRECATED] Sync guidance for SpecWeave increments with JIRA epics/stories (content SpecWeave→JIRA, status JIRA→SpecWeave). Use when asking about JIRA integration setup or troubleshooting sync. For actual syncing, use sw-jira:push or sw-jira:pull command instead."
3
3
  user-invokable: false
4
+ deprecated: true
4
5
  allowed-tools: Read, Task
5
6
  ---
6
7
 
8
+ > ⚠️ DEPRECATED: Use `sw-jira:push` / `sw-jira:pull` instead. This skill will be removed in v1.3.0.
9
+
10
+ ## Migration
11
+
12
+ This skill has been deprecated as part of the Opus 4.7 framework alignment (increment 0669).
13
+
14
+ - **Use instead**: `sw-jira:push` (content SpecWeave→JIRA) and `sw-jira:pull` (status JIRA→SpecWeave)
15
+ - **Removal**: Scheduled for v1.3.0 (2 minor releases after v1.1.0)
16
+ - **Why**: Consolidated sync logic moved to the `sw-jira:*` command family.
17
+
18
+ For the migration policy, see `.specweave/docs/internal/specs/skill-deprecation-policy.md`.
19
+
20
+ ---
21
+
7
22
  # JIRA Sync Skill
8
23
 
9
24
  Coordinates JIRA synchronization by delegating to `jira-mapper` agent.
@@ -1,11 +1,35 @@
1
1
  ---
2
- description: Ultrathink LLM-as-Judge validation of completed work. Uses extended thinking and Opus model for thorough, independent evaluation. Use when saying "judge my code", "judge-llm", "deep validate", or as part of sw:done closure.
2
+ description: Adaptive-thinking LLM-as-Judge validation of completed work. Uses the Opus model and an adaptive-thinking prompt hint for thorough, independent evaluation. Use when saying "judge my code", "judge-llm", "deep validate", or as part of sw:done closure.
3
3
  allowed-tools: Read, Grep, Glob, Bash
4
4
  ---
5
5
 
6
- # Ultrathink LLM-as-Judge Validation
6
+ # Adaptive-Thinking LLM-as-Judge Validation
7
7
 
8
- **ULTRATHINK BY DEFAULT** - Validate completed work using extended thinking and the LLM-as-Judge pattern. Provides an independent second opinion separate from `sw:grill`.
8
+ Validate completed work using the adaptive-thinking LLM-as-Judge pattern. Provides an independent second opinion separate from `sw:grill`.
9
+
10
+ ## Tool-Use Rationale
11
+
12
+ - **Read**: Load spec.md, tasks.md, rubric.md, and the files under review to build evaluation context.
13
+ - **Grep**: Search for AC patterns, test assertions, and implementation markers across the codebase.
14
+ - **Glob**: Discover test files and implementation files matching the increment's scope.
15
+ - **Bash**: Run `npx vitest run` to verify test pass rates; check file existence.
16
+
17
+ ## Adaptive-Thinking Prompt Hint
18
+
19
+ > **think carefully and step-by-step — this evaluation is harder than it looks**
20
+
21
+ With Opus 4.7, we no longer pass a `thinking` API parameter. Instead, we rely on adaptive thinking triggered by the prompt hint above. The model decides how much reasoning each evaluation requires.
22
+
23
+ ## Model Configuration
24
+
25
+ **Default effort**: `xhigh` — recommended for all evaluation tasks per Opus 4.7 conventions.
26
+ **Opt-in max**: `--effort max` enables maximum effort with a warning: "max effort risks overthinking on straightforward problems."
27
+ **Legacy mode**: Set `quality.thinkingBudget: "legacy"` in config to pass a fixed `thinking` parameter (for pre-4.7 models only).
28
+
29
+ ## Effort Level
30
+
31
+ - **Default**: `xhigh` effort — the judge runs with the highest reasoning effort level by default.
32
+ - **Opt-in**: `--effort max` — elevates to maximum effort for exceptionally complex or high-stakes reviews.
9
33
 
10
34
  ## How It Differs from sw:grill
11
35
 
@@ -13,7 +37,7 @@ allowed-tools: Read, Grep, Glob, Bash
13
37
  |--------|-------------|-----------------|
14
38
  | Execution | In-session (same context) | **Separate Opus API call** |
15
39
  | Context | Shares conversation context | **Fresh context (no bias)** |
16
- | Thinking | Standard reasoning | **Extended thinking / ultrathink** |
40
+ | Reasoning | Standard reasoning | **Adaptive thinking via prompt hint (`xhigh` default, `--effort max` opt-in)** |
17
41
  | Output | Confidence-scored findings | Structured verdict + score |
18
42
  | Domain | Generic code review | **Built-in domain criteria** |
19
43
 
@@ -23,6 +47,7 @@ allowed-tools: Read, Grep, Glob, Bash
23
47
 
24
48
  **TypeScript**: `src/core/skills/skill-judge.ts`
25
49
  - Uses Anthropic SDK with user's `ANTHROPIC_API_KEY`
50
+ - Model-version guard: omits the `thinking` API parameter on `claude-opus-4-7*` and newer models; falls back to adaptive-thinking prompt hint
26
51
  - AbortController-based timeout to prevent stuck states (default: 60s)
27
52
  - Progress logging to `.specweave/logs/judge-llm.log`
28
53
  - Fallback to basic pattern matching if no API key
@@ -31,11 +56,11 @@ allowed-tools: Read, Grep, Glob, Bash
31
56
  ## Usage
32
57
 
33
58
  ```bash
34
- # DEFAULT: Ultrathink validation (recommended)
59
+ # DEFAULT: Adaptive-thinking validation at xhigh effort
35
60
  sw:judge-llm src/file.ts
36
61
  sw:judge-llm "src/**/*.ts"
37
62
 
38
- # Validate git changes (ultrathink by default)
63
+ # Validate git changes (adaptive thinking by default)
39
64
  sw:judge-llm --staged # Staged changes
40
65
  sw:judge-llm --last-commit # Last commit
41
66
  sw:judge-llm --diff main # Diff vs branch
@@ -43,6 +68,9 @@ sw:judge-llm --diff main # Diff vs branch
43
68
  # Quick mode (ONLY if you need speed over thoroughness)
44
69
  sw:judge-llm src/file.ts --quick
45
70
 
71
+ # Maximum reasoning effort (opt-in)
72
+ sw:judge-llm src/file.ts --effort max
73
+
46
74
  # Timeout control (default: 60s)
47
75
  sw:judge-llm src/file.ts --timeout 120000
48
76
 
@@ -61,13 +89,13 @@ sw:judge-llm src/file.ts --verbose # Show progress to console
61
89
 
62
90
  1. Read `.specweave/config.json` → check `externalModels.consent` field
63
91
  2. If `"always-allow"` → proceed silently
64
- 3. If `"never"` → skip API call, use in-session ultrathink evaluation instead
92
+ 3. If `"never"` → skip API call, use in-session adaptive-thinking evaluation instead
65
93
  4. If `"ask"` (default):
66
94
  - Check if `"anthropic"` is in `externalModels.allowedProviders`
67
95
  - If YES → proceed silently (standing permission)
68
96
  - If NO → **ASK USER**: "Judge-LLM will call the Anthropic API using your ANTHROPIC_API_KEY. This costs ~$0.01-0.05 per evaluation. Proceed? (yes/no/always)"
69
97
  - "yes" → proceed this time only
70
- - "no" → skip API call, use in-session ultrathink instead
98
+ - "no" → skip API call, use in-session adaptive-thinking evaluation instead
71
99
  - "always" → run: `grantStandingConsent('anthropic', projectRoot)` from `src/core/llm/consent.ts`, then proceed
72
100
  5. No `ANTHROPIC_API_KEY` set → falls back to pattern matching automatically (no cost, no consent needed)
73
101
 
@@ -82,12 +110,16 @@ Determine what to validate:
82
110
  - If `--diff <branch>`: get diff against branch
83
111
  - If no args: validate recent work in conversation context
84
112
 
85
- ### Step 2: Ultrathink Analysis (Default)
113
+ ### Step 2: Adaptive-Thinking Analysis (Default)
114
+
115
+ Prompt hint prefix (include verbatim at the start of the judge prompt):
116
+
117
+ > **think carefully and step-by-step — this evaluation is harder than it looks**
86
118
 
87
- Use extended thinking for deep LLM-as-Judge evaluation via the Opus model:
119
+ Use adaptive thinking (triggered by the hint) for deep LLM-as-Judge evaluation via the Opus 4.7 model at `xhigh` effort by default (`--effort max` opt-in for exceptional cases):
88
120
 
89
121
  ```
90
- Claude MUST use ultrathink/extended thinking to:
122
+ Claude MUST think carefully and step-by-step to:
91
123
 
92
124
  1. DEEP READ: Thoroughly understand all code, context, and intent
93
125
  2. MULTI-DIMENSIONAL ANALYSIS: Evaluate across ALL dimensions:
@@ -109,12 +141,12 @@ Claude MUST use ultrathink/extended thinking to:
109
141
  JUDGE-LLM VERDICT: APPROVED | CONCERNS | REJECTED
110
142
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
111
143
 
112
- Mode: ULTRATHINK (extended thinking)
144
+ Mode: ADAPTIVE-THINKING (xhigh effort)
113
145
  Confidence: 0.XX
114
146
  Files Analyzed: N
115
147
 
116
148
  REASONING:
117
- [Detailed chain-of-thought from extended thinking]
149
+ [Detailed chain-of-thought from adaptive thinking]
118
150
 
119
151
  ISSUES (if any):
120
152
  CRITICAL: [title]
@@ -161,7 +193,7 @@ After evaluation (including consent-denied fallback), you **MUST** write a JSON
161
193
  "timestamp": "<ISO-8601>",
162
194
  "verdict": "APPROVED|CONCERNS|REJECTED",
163
195
  "score": 87,
164
- "mode": "ultrathink|quick|pattern-match",
196
+ "mode": "adaptive-thinking|quick|pattern-match",
165
197
  "timedOut": false,
166
198
  "duration_ms": 45000,
167
199
  "consentStatus": "granted",