@bradygaster/squad-cli 0.8.24 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +284 -32
- package/dist/cli/commands/aspire.d.ts.map +1 -1
- package/dist/cli/commands/aspire.js +3 -0
- package/dist/cli/commands/aspire.js.map +1 -1
- package/dist/cli/commands/build.js +3 -3
- package/dist/cli/commands/build.js.map +1 -1
- package/dist/cli/commands/cast.d.ts +13 -0
- package/dist/cli/commands/cast.d.ts.map +1 -0
- package/dist/cli/commands/cast.js +77 -0
- package/dist/cli/commands/cast.js.map +1 -0
- package/dist/cli/commands/cost.d.ts +2 -0
- package/dist/cli/commands/cost.d.ts.map +1 -0
- package/dist/cli/commands/cost.js +170 -0
- package/dist/cli/commands/cost.js.map +1 -0
- package/dist/cli/commands/cross-squad.d.ts +12 -0
- package/dist/cli/commands/cross-squad.d.ts.map +1 -0
- package/dist/cli/commands/cross-squad.js +80 -0
- package/dist/cli/commands/cross-squad.js.map +1 -0
- package/dist/cli/commands/doctor.d.ts +5 -0
- package/dist/cli/commands/doctor.d.ts.map +1 -1
- package/dist/cli/commands/doctor.js +156 -0
- package/dist/cli/commands/doctor.js.map +1 -1
- package/dist/cli/commands/economy.d.ts +10 -0
- package/dist/cli/commands/economy.d.ts.map +1 -0
- package/dist/cli/commands/economy.js +64 -0
- package/dist/cli/commands/economy.js.map +1 -0
- package/dist/cli/commands/export.d.ts.map +1 -1
- package/dist/cli/commands/export.js +11 -4
- package/dist/cli/commands/export.js.map +1 -1
- package/dist/cli/commands/import.js +2 -2
- package/dist/cli/commands/import.js.map +1 -1
- package/dist/cli/commands/personal.d.ts +16 -0
- package/dist/cli/commands/personal.d.ts.map +1 -0
- package/dist/cli/commands/personal.js +213 -0
- package/dist/cli/commands/personal.js.map +1 -0
- package/dist/cli/commands/roles.d.ts +2 -0
- package/dist/cli/commands/roles.d.ts.map +1 -0
- package/dist/cli/commands/roles.js +54 -0
- package/dist/cli/commands/roles.js.map +1 -0
- package/dist/cli/commands/schedule.d.ts +14 -0
- package/dist/cli/commands/schedule.d.ts.map +1 -0
- package/dist/cli/commands/schedule.js +208 -0
- package/dist/cli/commands/schedule.js.map +1 -0
- package/dist/cli/commands/watch.d.ts.map +1 -1
- package/dist/cli/commands/watch.js +17 -5
- package/dist/cli/commands/watch.js.map +1 -1
- package/dist/cli/core/cast.d.ts +9 -0
- package/dist/cli/core/cast.d.ts.map +1 -1
- package/dist/cli/core/cast.js +170 -60
- package/dist/cli/core/cast.js.map +1 -1
- package/dist/cli/core/detect-squad-dir.d.ts +10 -0
- package/dist/cli/core/detect-squad-dir.d.ts.map +1 -1
- package/dist/cli/core/detect-squad-dir.js +47 -0
- package/dist/cli/core/detect-squad-dir.js.map +1 -1
- package/dist/cli/core/init.d.ts +8 -0
- package/dist/cli/core/init.d.ts.map +1 -1
- package/dist/cli/core/init.js +115 -2
- package/dist/cli/core/init.js.map +1 -1
- package/dist/cli/core/migrations.d.ts.map +1 -1
- package/dist/cli/core/migrations.js +21 -0
- package/dist/cli/core/migrations.js.map +1 -1
- package/dist/cli/core/templates.d.ts.map +1 -1
- package/dist/cli/core/templates.js +8 -1
- package/dist/cli/core/templates.js.map +1 -1
- package/dist/cli/core/upgrade.d.ts +14 -0
- package/dist/cli/core/upgrade.d.ts.map +1 -1
- package/dist/cli/core/upgrade.js +182 -48
- package/dist/cli/core/upgrade.js.map +1 -1
- package/dist/cli/core/version.js +2 -2
- package/dist/cli/core/version.js.map +1 -1
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +2 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/self-update.d.ts +21 -0
- package/dist/cli/self-update.d.ts.map +1 -0
- package/dist/cli/self-update.js +113 -0
- package/dist/cli/self-update.js.map +1 -0
- package/dist/cli/shell/autocomplete.d.ts.map +1 -1
- package/dist/cli/shell/autocomplete.js +5 -0
- package/dist/cli/shell/autocomplete.js.map +1 -1
- package/dist/cli/shell/commands.d.ts.map +1 -1
- package/dist/cli/shell/commands.js +55 -24
- package/dist/cli/shell/commands.js.map +1 -1
- package/dist/cli/shell/components/AgentPanel.d.ts.map +1 -1
- package/dist/cli/shell/components/AgentPanel.js +21 -6
- package/dist/cli/shell/components/AgentPanel.js.map +1 -1
- package/dist/cli/shell/components/App.d.ts.map +1 -1
- package/dist/cli/shell/components/App.js +33 -15
- package/dist/cli/shell/components/App.js.map +1 -1
- package/dist/cli/shell/components/InputPrompt.js +1 -1
- package/dist/cli/shell/components/InputPrompt.js.map +1 -1
- package/dist/cli/shell/components/MessageStream.d.ts +2 -0
- package/dist/cli/shell/components/MessageStream.d.ts.map +1 -1
- package/dist/cli/shell/components/MessageStream.js +20 -20
- package/dist/cli/shell/components/MessageStream.js.map +1 -1
- package/dist/cli/shell/components/ThinkingIndicator.d.ts +4 -0
- package/dist/cli/shell/components/ThinkingIndicator.d.ts.map +1 -1
- package/dist/cli/shell/components/ThinkingIndicator.js +23 -4
- package/dist/cli/shell/components/ThinkingIndicator.js.map +1 -1
- package/dist/cli/shell/coordinator.d.ts +7 -0
- package/dist/cli/shell/coordinator.d.ts.map +1 -1
- package/dist/cli/shell/coordinator.js +206 -88
- package/dist/cli/shell/coordinator.js.map +1 -1
- package/dist/cli/shell/error-messages.d.ts +14 -0
- package/dist/cli/shell/error-messages.d.ts.map +1 -1
- package/dist/cli/shell/error-messages.js +65 -1
- package/dist/cli/shell/error-messages.js.map +1 -1
- package/dist/cli/shell/index.d.ts +1 -1
- package/dist/cli/shell/index.d.ts.map +1 -1
- package/dist/cli/shell/index.js +83 -12
- package/dist/cli/shell/index.js.map +1 -1
- package/dist/cli/shell/router.d.ts.map +1 -1
- package/dist/cli/shell/router.js +21 -2
- package/dist/cli/shell/router.js.map +1 -1
- package/dist/cli/shell/stream-bridge.d.ts +2 -0
- package/dist/cli/shell/stream-bridge.d.ts.map +1 -1
- package/dist/cli/shell/stream-bridge.js +11 -2
- package/dist/cli/shell/stream-bridge.js.map +1 -1
- package/dist/cli/shell/terminal.d.ts +5 -5
- package/dist/cli/shell/terminal.d.ts.map +1 -1
- package/dist/cli/shell/terminal.js +35 -26
- package/dist/cli/shell/terminal.js.map +1 -1
- package/dist/cli-entry.js +140 -43
- package/dist/cli-entry.js.map +1 -1
- package/package.json +20 -4
- package/scripts/patch-esm-imports.mjs +84 -55
- package/scripts/patch-ink-rendering.mjs +115 -0
- package/templates/casting/Futurama.json +10 -0
- package/templates/casting-policy.json +4 -2
- package/templates/casting-reference.md +104 -0
- package/templates/cooperative-rate-limiting.md +229 -0
- package/templates/issue-lifecycle.md +412 -0
- package/templates/keda-scaler.md +164 -0
- package/templates/machine-capabilities.md +75 -0
- package/templates/orchestration-log.md +1 -1
- package/templates/ralph-circuit-breaker.md +313 -0
- package/templates/routing.md +5 -20
- package/templates/scribe-charter.md +1 -1
- package/templates/skills/agent-collaboration/SKILL.md +42 -0
- package/templates/skills/agent-conduct/SKILL.md +24 -0
- package/templates/skills/architectural-proposals/SKILL.md +151 -0
- package/templates/skills/ci-validation-gates/SKILL.md +84 -0
- package/templates/skills/cli-wiring/SKILL.md +47 -0
- package/templates/skills/client-compatibility/SKILL.md +89 -0
- package/templates/skills/cross-squad/SKILL.md +114 -0
- package/templates/skills/distributed-mesh/SKILL.md +287 -0
- package/templates/skills/distributed-mesh/mesh.json.example +30 -0
- package/templates/skills/distributed-mesh/sync-mesh.ps1 +111 -0
- package/templates/skills/distributed-mesh/sync-mesh.sh +104 -0
- package/templates/skills/docs-standards/SKILL.md +71 -0
- package/templates/skills/economy-mode/SKILL.md +114 -0
- package/templates/skills/external-comms/SKILL.md +329 -0
- package/templates/skills/gh-auth-isolation/SKILL.md +183 -0
- package/templates/skills/git-workflow/SKILL.md +204 -0
- package/templates/skills/github-multi-account/SKILL.md +95 -0
- package/templates/skills/history-hygiene/SKILL.md +36 -0
- package/templates/skills/humanizer/SKILL.md +105 -0
- package/templates/skills/init-mode/SKILL.md +102 -0
- package/templates/skills/model-selection/SKILL.md +117 -0
- package/templates/skills/nap/SKILL.md +24 -0
- package/templates/skills/personal-squad/SKILL.md +57 -0
- package/templates/skills/release-process/SKILL.md +423 -0
- package/templates/skills/reskill/SKILL.md +92 -0
- package/templates/skills/reviewer-protocol/SKILL.md +79 -0
- package/templates/skills/secret-handling/SKILL.md +200 -0
- package/templates/skills/session-recovery/SKILL.md +155 -0
- package/templates/skills/test-discipline/SKILL.md +37 -0
- package/templates/skills/windows-compatibility/SKILL.md +74 -0
- package/templates/squad.agent.md +1287 -1146
- package/templates/workflows/squad-docs.yml +8 -4
- package/templates/workflows/squad-heartbeat.yml +3 -4
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
# Ralph Circuit Breaker — Model Rate Limit Fallback
|
|
2
|
+
|
|
3
|
+
> Classic circuit breaker pattern (Hystrix / Polly / Resilience4j) applied to Copilot model selection.
|
|
4
|
+
> When the preferred model hits rate limits, Ralph automatically degrades to free-tier models, then self-heals.
|
|
5
|
+
|
|
6
|
+
## Problem
|
|
7
|
+
|
|
8
|
+
When running multiple Ralph instances across repos, Copilot model rate limits cause cascading failures.
|
|
9
|
+
All Ralphs fail simultaneously when the preferred model (e.g., `claude-sonnet-4.6`) hits quota.
|
|
10
|
+
|
|
11
|
+
Premium models burn quota fast:
|
|
12
|
+
| Model | Multiplier | Risk |
|
|
13
|
+
|-------|-----------|------|
|
|
14
|
+
| `claude-sonnet-4.6` | 1x | Moderate with many Ralphs |
|
|
15
|
+
| `claude-opus-4.6` | 10x | High |
|
|
16
|
+
| `gpt-5.4` | 50x | Very high |
|
|
17
|
+
| `gpt-5.4-mini` | **0x** | **Free — unlimited** |
|
|
18
|
+
| `gpt-5-mini` | **0x** | **Free — unlimited** |
|
|
19
|
+
| `gpt-4.1` | **0x** | **Free — unlimited** |
|
|
20
|
+
|
|
21
|
+
## Circuit Breaker States
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
┌─────────┐ rate limit error ┌────────┐
|
|
25
|
+
│ CLOSED │ ───────────────────► │ OPEN │
|
|
26
|
+
│ (normal)│ │(fallback)│
|
|
27
|
+
└────┬────┘ ◄──────────────── └────┬────┘
|
|
28
|
+
│ 2 consecutive │
|
|
29
|
+
│ successes │ cooldown expires
|
|
30
|
+
│ ▼
|
|
31
|
+
│ ┌──────────┐
|
|
32
|
+
└───── success ◄──────── │HALF-OPEN │
|
|
33
|
+
(close) │ (testing) │
|
|
34
|
+
└──────────┘
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### CLOSED (normal operation)
|
|
38
|
+
- Use preferred model from config
|
|
39
|
+
- Every successful response confirms circuit stays closed
|
|
40
|
+
- On rate limit error → transition to OPEN
|
|
41
|
+
|
|
42
|
+
### OPEN (rate limited — fallback active)
|
|
43
|
+
- Fall back through the free-tier model chain:
|
|
44
|
+
1. `gpt-5.4-mini`
|
|
45
|
+
2. `gpt-5-mini`
|
|
46
|
+
3. `gpt-4.1`
|
|
47
|
+
- Start cooldown timer (default: 10 minutes)
|
|
48
|
+
- When cooldown expires → transition to HALF-OPEN
|
|
49
|
+
|
|
50
|
+
### HALF-OPEN (testing recovery)
|
|
51
|
+
- Try preferred model again
|
|
52
|
+
- If 2 consecutive successes → transition to CLOSED
|
|
53
|
+
- If rate limit error → back to OPEN, reset cooldown
|
|
54
|
+
|
|
55
|
+
## State File: `.squad/ralph-circuit-breaker.json`
|
|
56
|
+
|
|
57
|
+
```json
|
|
58
|
+
{
|
|
59
|
+
"state": "closed",
|
|
60
|
+
"preferredModel": "claude-sonnet-4.6",
|
|
61
|
+
"fallbackChain": ["gpt-5.4-mini", "gpt-5-mini", "gpt-4.1"],
|
|
62
|
+
"currentFallbackIndex": 0,
|
|
63
|
+
"cooldownMinutes": 10,
|
|
64
|
+
"openedAt": null,
|
|
65
|
+
"halfOpenSuccesses": 0,
|
|
66
|
+
"consecutiveFailures": 0,
|
|
67
|
+
"metrics": {
|
|
68
|
+
"totalFallbacks": 0,
|
|
69
|
+
"totalRecoveries": 0,
|
|
70
|
+
"lastFallbackAt": null,
|
|
71
|
+
"lastRecoveryAt": null
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## PowerShell Functions
|
|
77
|
+
|
|
78
|
+
Paste these into your `ralph-watch.ps1` or source them from a shared module.
|
|
79
|
+
|
|
80
|
+
### `Get-CircuitBreakerState`
|
|
81
|
+
|
|
82
|
+
```powershell
|
|
83
|
+
function Get-CircuitBreakerState {
|
|
84
|
+
param([string]$StateFile = ".squad/ralph-circuit-breaker.json")
|
|
85
|
+
|
|
86
|
+
if (-not (Test-Path $StateFile)) {
|
|
87
|
+
$default = @{
|
|
88
|
+
state = "closed"
|
|
89
|
+
preferredModel = "claude-sonnet-4.6"
|
|
90
|
+
fallbackChain = @("gpt-5.4-mini", "gpt-5-mini", "gpt-4.1")
|
|
91
|
+
currentFallbackIndex = 0
|
|
92
|
+
cooldownMinutes = 10
|
|
93
|
+
openedAt = $null
|
|
94
|
+
halfOpenSuccesses = 0
|
|
95
|
+
consecutiveFailures = 0
|
|
96
|
+
metrics = @{
|
|
97
|
+
totalFallbacks = 0
|
|
98
|
+
totalRecoveries = 0
|
|
99
|
+
lastFallbackAt = $null
|
|
100
|
+
lastRecoveryAt = $null
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
$default | ConvertTo-Json -Depth 3 | Set-Content $StateFile
|
|
104
|
+
return $default
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return (Get-Content $StateFile -Raw | ConvertFrom-Json)
|
|
108
|
+
}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### `Save-CircuitBreakerState`
|
|
112
|
+
|
|
113
|
+
```powershell
|
|
114
|
+
function Save-CircuitBreakerState {
|
|
115
|
+
param(
|
|
116
|
+
[object]$State,
|
|
117
|
+
[string]$StateFile = ".squad/ralph-circuit-breaker.json"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
$State | ConvertTo-Json -Depth 3 | Set-Content $StateFile
|
|
121
|
+
}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### `Get-CurrentModel`
|
|
125
|
+
|
|
126
|
+
Returns the model Ralph should use right now, based on circuit state.
|
|
127
|
+
|
|
128
|
+
```powershell
|
|
129
|
+
function Get-CurrentModel {
|
|
130
|
+
param([string]$StateFile = ".squad/ralph-circuit-breaker.json")
|
|
131
|
+
|
|
132
|
+
$cb = Get-CircuitBreakerState -StateFile $StateFile
|
|
133
|
+
|
|
134
|
+
switch ($cb.state) {
|
|
135
|
+
"closed" {
|
|
136
|
+
return $cb.preferredModel
|
|
137
|
+
}
|
|
138
|
+
"open" {
|
|
139
|
+
# Check if cooldown has expired
|
|
140
|
+
if ($cb.openedAt) {
|
|
141
|
+
$opened = [DateTime]::Parse($cb.openedAt)
|
|
142
|
+
$elapsed = (Get-Date) - $opened
|
|
143
|
+
if ($elapsed.TotalMinutes -ge $cb.cooldownMinutes) {
|
|
144
|
+
# Transition to half-open
|
|
145
|
+
$cb.state = "half-open"
|
|
146
|
+
$cb.halfOpenSuccesses = 0
|
|
147
|
+
Save-CircuitBreakerState -State $cb -StateFile $StateFile
|
|
148
|
+
Write-Host " [circuit-breaker] Cooldown expired. Testing preferred model..." -ForegroundColor Yellow
|
|
149
|
+
return $cb.preferredModel
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
# Still in cooldown — use fallback
|
|
153
|
+
$idx = [Math]::Min($cb.currentFallbackIndex, $cb.fallbackChain.Count - 1)
|
|
154
|
+
return $cb.fallbackChain[$idx]
|
|
155
|
+
}
|
|
156
|
+
"half-open" {
|
|
157
|
+
return $cb.preferredModel
|
|
158
|
+
}
|
|
159
|
+
default {
|
|
160
|
+
return $cb.preferredModel
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### `Update-CircuitBreakerOnSuccess`
|
|
167
|
+
|
|
168
|
+
Call after every successful model response.
|
|
169
|
+
|
|
170
|
+
```powershell
|
|
171
|
+
function Update-CircuitBreakerOnSuccess {
|
|
172
|
+
param([string]$StateFile = ".squad/ralph-circuit-breaker.json")
|
|
173
|
+
|
|
174
|
+
$cb = Get-CircuitBreakerState -StateFile $StateFile
|
|
175
|
+
$cb.consecutiveFailures = 0
|
|
176
|
+
|
|
177
|
+
if ($cb.state -eq "half-open") {
|
|
178
|
+
$cb.halfOpenSuccesses++
|
|
179
|
+
if ($cb.halfOpenSuccesses -ge 2) {
|
|
180
|
+
# Recovery! Close the circuit
|
|
181
|
+
$cb.state = "closed"
|
|
182
|
+
$cb.openedAt = $null
|
|
183
|
+
$cb.halfOpenSuccesses = 0
|
|
184
|
+
$cb.currentFallbackIndex = 0
|
|
185
|
+
$cb.metrics.totalRecoveries++
|
|
186
|
+
$cb.metrics.lastRecoveryAt = (Get-Date).ToString("o")
|
|
187
|
+
Save-CircuitBreakerState -State $cb -StateFile $StateFile
|
|
188
|
+
Write-Host " [circuit-breaker] RECOVERED — back to preferred model ($($cb.preferredModel))" -ForegroundColor Green
|
|
189
|
+
return
|
|
190
|
+
}
|
|
191
|
+
Save-CircuitBreakerState -State $cb -StateFile $StateFile
|
|
192
|
+
Write-Host " [circuit-breaker] Half-open success $($cb.halfOpenSuccesses)/2" -ForegroundColor Yellow
|
|
193
|
+
return
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
# closed state — nothing to do
|
|
197
|
+
}
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### `Update-CircuitBreakerOnRateLimit`
|
|
201
|
+
|
|
202
|
+
Call when a model response indicates rate limiting (HTTP 429 or error message containing "rate limit").
|
|
203
|
+
|
|
204
|
+
```powershell
|
|
205
|
+
function Update-CircuitBreakerOnRateLimit {
|
|
206
|
+
param([string]$StateFile = ".squad/ralph-circuit-breaker.json")
|
|
207
|
+
|
|
208
|
+
$cb = Get-CircuitBreakerState -StateFile $StateFile
|
|
209
|
+
$cb.consecutiveFailures++
|
|
210
|
+
|
|
211
|
+
if ($cb.state -eq "closed" -or $cb.state -eq "half-open") {
|
|
212
|
+
# Open the circuit
|
|
213
|
+
$cb.state = "open"
|
|
214
|
+
$cb.openedAt = (Get-Date).ToString("o")
|
|
215
|
+
$cb.halfOpenSuccesses = 0
|
|
216
|
+
$cb.currentFallbackIndex = 0
|
|
217
|
+
$cb.metrics.totalFallbacks++
|
|
218
|
+
$cb.metrics.lastFallbackAt = (Get-Date).ToString("o")
|
|
219
|
+
Save-CircuitBreakerState -State $cb -StateFile $StateFile
|
|
220
|
+
|
|
221
|
+
$fallbackModel = $cb.fallbackChain[0]
|
|
222
|
+
Write-Host " [circuit-breaker] RATE LIMITED — falling back to $fallbackModel (cooldown: $($cb.cooldownMinutes)m)" -ForegroundColor Red
|
|
223
|
+
return
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
if ($cb.state -eq "open") {
|
|
227
|
+
# Already open — try next fallback in chain if current one also fails
|
|
228
|
+
if ($cb.currentFallbackIndex -lt ($cb.fallbackChain.Count - 1)) {
|
|
229
|
+
$cb.currentFallbackIndex++
|
|
230
|
+
$nextModel = $cb.fallbackChain[$cb.currentFallbackIndex]
|
|
231
|
+
Write-Host " [circuit-breaker] Fallback also limited — trying $nextModel" -ForegroundColor Red
|
|
232
|
+
}
|
|
233
|
+
# Reset cooldown timer
|
|
234
|
+
$cb.openedAt = (Get-Date).ToString("o")
|
|
235
|
+
Save-CircuitBreakerState -State $cb -StateFile $StateFile
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
## Integration with ralph-watch.ps1
|
|
241
|
+
|
|
242
|
+
In your Ralph polling loop, wrap the model selection:
|
|
243
|
+
|
|
244
|
+
```powershell
|
|
245
|
+
# At the top of your polling loop
|
|
246
|
+
$model = Get-CurrentModel
|
|
247
|
+
|
|
248
|
+
# When invoking copilot CLI
|
|
249
|
+
$result = copilot-cli --model $model ...
|
|
250
|
+
|
|
251
|
+
# After the call
|
|
252
|
+
if ($result -match "rate.?limit" -or $LASTEXITCODE -eq 429) {
|
|
253
|
+
Update-CircuitBreakerOnRateLimit
|
|
254
|
+
} else {
|
|
255
|
+
Update-CircuitBreakerOnSuccess
|
|
256
|
+
}
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### Full integration example
|
|
260
|
+
|
|
261
|
+
```powershell
|
|
262
|
+
# Source the circuit breaker functions
|
|
263
|
+
. .squad-templates/ralph-circuit-breaker-functions.ps1
|
|
264
|
+
|
|
265
|
+
while ($true) {
|
|
266
|
+
$model = Get-CurrentModel
|
|
267
|
+
Write-Host "Polling with model: $model"
|
|
268
|
+
|
|
269
|
+
try {
|
|
270
|
+
# Your existing Ralph logic here, but pass $model
|
|
271
|
+
$response = Invoke-RalphCycle -Model $model
|
|
272
|
+
|
|
273
|
+
# Success path
|
|
274
|
+
Update-CircuitBreakerOnSuccess
|
|
275
|
+
}
|
|
276
|
+
catch {
|
|
277
|
+
if ($_.Exception.Message -match "rate.?limit|429|quota|Too Many Requests") {
|
|
278
|
+
Update-CircuitBreakerOnRateLimit
|
|
279
|
+
# Retry immediately with fallback model
|
|
280
|
+
continue
|
|
281
|
+
}
|
|
282
|
+
# Other errors — handle normally
|
|
283
|
+
throw
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
Start-Sleep -Seconds $pollInterval
|
|
287
|
+
}
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## Configuration
|
|
291
|
+
|
|
292
|
+
Override defaults by editing `.squad/ralph-circuit-breaker.json`:
|
|
293
|
+
|
|
294
|
+
| Field | Default | Description |
|
|
295
|
+
|-------|---------|-------------|
|
|
296
|
+
| `preferredModel` | `claude-sonnet-4.6` | Model to use when circuit is closed |
|
|
297
|
+
| `fallbackChain` | `["gpt-5.4-mini", "gpt-5-mini", "gpt-4.1"]` | Ordered fallback models (all free-tier) |
|
|
298
|
+
| `cooldownMinutes` | `10` | How long to wait before testing recovery |
|
|
299
|
+
|
|
300
|
+
## Metrics
|
|
301
|
+
|
|
302
|
+
The state file tracks operational metrics:
|
|
303
|
+
|
|
304
|
+
- **totalFallbacks** — How many times the circuit opened
|
|
305
|
+
- **totalRecoveries** — How many times it recovered to preferred model
|
|
306
|
+
- **lastFallbackAt** — ISO timestamp of last rate limit event
|
|
307
|
+
- **lastRecoveryAt** — ISO timestamp of last successful recovery
|
|
308
|
+
|
|
309
|
+
Query metrics with:
|
|
310
|
+
```powershell
|
|
311
|
+
$cb = Get-Content .squad/ralph-circuit-breaker.json | ConvertFrom-Json
|
|
312
|
+
Write-Host "Fallbacks: $($cb.metrics.totalFallbacks) | Recoveries: $($cb.metrics.totalRecoveries)"
|
|
313
|
+
```
|
package/templates/routing.md
CHANGED
|
@@ -12,35 +12,21 @@ How to decide who handles what.
|
|
|
12
12
|
| Code review | {Name} | Review PRs, check quality, suggest improvements |
|
|
13
13
|
| Testing | {Name} | Write tests, find edge cases, verify fixes |
|
|
14
14
|
| Scope & priorities | {Name} | What to build next, trade-offs, decisions |
|
|
15
|
-
| Async issue work (bugs, tests, small features) | @copilot 🤖 | Well-defined tasks matching capability profile |
|
|
16
15
|
| Session logging | Scribe | Automatic — never needs routing |
|
|
17
16
|
|
|
18
17
|
## Issue Routing
|
|
19
18
|
|
|
20
19
|
| Label | Action | Who |
|
|
21
20
|
|-------|--------|-----|
|
|
22
|
-
| `squad` | Triage: analyze issue,
|
|
21
|
+
| `squad` | Triage: analyze issue, assign `squad:{member}` label | Lead |
|
|
23
22
|
| `squad:{name}` | Pick up issue and complete the work | Named member |
|
|
24
|
-
| `squad:copilot` | Assign to @copilot for autonomous work (if enabled) | @copilot 🤖 |
|
|
25
23
|
|
|
26
24
|
### How Issue Assignment Works
|
|
27
25
|
|
|
28
|
-
1. When a GitHub issue gets the `squad` label, the **Lead** triages it — analyzing content,
|
|
29
|
-
2.
|
|
30
|
-
3.
|
|
31
|
-
4.
|
|
32
|
-
5. Members can reassign by removing their label and adding another member's label.
|
|
33
|
-
6. The `squad` label is the "inbox" — untriaged issues waiting for Lead review.
|
|
34
|
-
|
|
35
|
-
### Lead Triage Guidance for @copilot
|
|
36
|
-
|
|
37
|
-
When triaging, the Lead should ask:
|
|
38
|
-
|
|
39
|
-
1. **Is this well-defined?** Clear title, reproduction steps or acceptance criteria, bounded scope → likely 🟢
|
|
40
|
-
2. **Does it follow existing patterns?** Adding a test, fixing a known bug, updating a dependency → likely 🟢
|
|
41
|
-
3. **Does it need design judgment?** Architecture, API design, UX decisions → likely 🔴
|
|
42
|
-
4. **Is it security-sensitive?** Auth, encryption, access control → always 🔴
|
|
43
|
-
5. **Is it medium complexity with specs?** Feature with clear requirements, refactoring with tests → likely 🟡
|
|
26
|
+
1. When a GitHub issue gets the `squad` label, the **Lead** triages it — analyzing content, assigning the right `squad:{member}` label, and commenting with triage notes.
|
|
27
|
+
2. When a `squad:{member}` label is applied, that member picks up the issue in their next session.
|
|
28
|
+
3. Members can reassign by removing their label and adding another member's label.
|
|
29
|
+
4. The `squad` label is the "inbox" — untriaged issues waiting for Lead review.
|
|
44
30
|
|
|
45
31
|
## Rules
|
|
46
32
|
|
|
@@ -51,4 +37,3 @@ When triaging, the Lead should ask:
|
|
|
51
37
|
5. **"Team, ..." → fan-out.** Spawn all relevant agents in parallel as `mode: "background"`.
|
|
52
38
|
6. **Anticipate downstream work.** If a feature is being built, spawn the tester to write test cases from requirements simultaneously.
|
|
53
39
|
7. **Issue-labeled work** — when a `squad:{member}` label is applied to an issue, route to that member. The Lead handles all `squad` (base label) triage.
|
|
54
|
-
8. **@copilot routing** — when evaluating issues, check @copilot's capability profile in `team.md`. Route 🟢 good-fit tasks to `squad:copilot`. Flag 🟡 needs-review tasks for PR review. Keep 🔴 not-suitable tasks with squad members.
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
|
|
23
23
|
After every substantial work session:
|
|
24
24
|
|
|
25
|
-
1. **Log the session** to `.squad/log/{timestamp}-{topic}.md
|
|
25
|
+
1. **Log the session** to `.squad/log/{timestamp}-{topic}.md`:
|
|
26
26
|
- Who worked
|
|
27
27
|
- What was done
|
|
28
28
|
- Decisions made
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: "agent-collaboration"
|
|
3
|
+
description: "Standard collaboration patterns for all squad agents — worktree awareness, decisions, cross-agent communication"
|
|
4
|
+
domain: "team-workflow"
|
|
5
|
+
confidence: "high"
|
|
6
|
+
source: "extracted from charter boilerplate — identical content in 18+ agent charters"
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Context
|
|
10
|
+
|
|
11
|
+
Every agent on the team follows identical collaboration patterns for worktree awareness, decision recording, and cross-agent communication. These were previously duplicated in every charter's Collaboration section (~300 bytes × 18 agents = ~5.4KB of redundant context). Now centralized here.
|
|
12
|
+
|
|
13
|
+
The coordinator's spawn prompt already instructs agents to read decisions.md and their history.md. This skill adds the patterns for WRITING decisions and requesting help.
|
|
14
|
+
|
|
15
|
+
## Patterns
|
|
16
|
+
|
|
17
|
+
### Worktree Awareness
|
|
18
|
+
Use the `TEAM ROOT` path provided in your spawn prompt. All `.squad/` paths are relative to this root. If TEAM ROOT is not provided (rare), run `git rev-parse --show-toplevel` as fallback. Never assume CWD is the repo root.
|
|
19
|
+
|
|
20
|
+
### Decision Recording
|
|
21
|
+
After making a decision that affects other team members, write it to:
|
|
22
|
+
`.squad/decisions/inbox/{your-name}-{brief-slug}.md`
|
|
23
|
+
|
|
24
|
+
Format:
|
|
25
|
+
```
|
|
26
|
+
### {date}: {decision title}
|
|
27
|
+
**By:** {Your Name}
|
|
28
|
+
**What:** {the decision}
|
|
29
|
+
**Why:** {rationale}
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Cross-Agent Communication
|
|
33
|
+
If you need another team member's input, say so in your response. The coordinator will bring them in. Don't try to do work outside your domain.
|
|
34
|
+
|
|
35
|
+
### Reviewer Protocol
|
|
36
|
+
If you have reviewer authority and reject work: the original author is locked out from revising that artifact. A different agent must own the revision. State who should revise in your rejection response.
|
|
37
|
+
|
|
38
|
+
## Anti-Patterns
|
|
39
|
+
- Don't read all agent charters — you only need your own context + decisions.md
|
|
40
|
+
- Don't write directly to `.squad/decisions.md` — always use the inbox drop-box
|
|
41
|
+
- Don't modify other agents' history.md files — that's Scribe's job
|
|
42
|
+
- Don't assume CWD is the repo root — always use TEAM ROOT
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: "agent-conduct"
|
|
3
|
+
description: "Shared hard rules enforced across all squad agents"
|
|
4
|
+
domain: "team-governance"
|
|
5
|
+
confidence: "high"
|
|
6
|
+
source: "reskill extraction — Product Isolation Rule and Peer Quality Check appeared in all 20 agent charters"
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Context
|
|
10
|
+
|
|
11
|
+
Every squad agent must follow these two hard rules. They were previously duplicated in every charter. Now they live here as a shared skill, loaded once.
|
|
12
|
+
|
|
13
|
+
## Patterns
|
|
14
|
+
|
|
15
|
+
### Product Isolation Rule (hard rule)
|
|
16
|
+
Tests, CI workflows, and product code must NEVER depend on specific agent names from any particular squad. "Our squad" must not impact "the squad." No hardcoded references to agent names (Flight, EECOM, FIDO, etc.) in test assertions, CI configs, or product logic. Use generic/parameterized values. If a test needs agent names, use obviously-fake test fixtures (e.g., "test-agent-1", "TestBot").
|
|
17
|
+
|
|
18
|
+
### Peer Quality Check (hard rule)
|
|
19
|
+
Before finishing work, verify your changes don't break existing tests. Run the test suite for files you touched. If CI has been failing, check your changes aren't contributing to the problem. When you learn from mistakes, update your history.md.
|
|
20
|
+
|
|
21
|
+
## Anti-Patterns
|
|
22
|
+
- Don't hardcode dev team agent names in product code or tests
|
|
23
|
+
- Don't skip test verification before declaring work done
|
|
24
|
+
- Don't ignore pre-existing CI failures that your changes may worsen
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: "architectural-proposals"
|
|
3
|
+
description: "How to write comprehensive architectural proposals that drive alignment before code is written"
|
|
4
|
+
domain: "architecture, product-direction"
|
|
5
|
+
confidence: "high"
|
|
6
|
+
source: "earned (2026-02-21 interactive shell proposal)"
|
|
7
|
+
tools:
|
|
8
|
+
- name: "view"
|
|
9
|
+
description: "Read existing codebase, prior decisions, and team context before proposing changes"
|
|
10
|
+
when: "Always read .squad/decisions.md, relevant PRDs, and current architecture docs before writing proposal"
|
|
11
|
+
- name: "create"
|
|
12
|
+
description: "Create proposal in docs/proposals/ with structured format"
|
|
13
|
+
when: "After gathering context, before any implementation work begins"
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Context
|
|
17
|
+
|
|
18
|
+
Proposals create alignment before code is written. Cheaper to change a doc than refactor code. Use this pattern when:
|
|
19
|
+
- Architecture shifts invalidate existing assumptions
|
|
20
|
+
- Product direction changes require new foundation
|
|
21
|
+
- Multiple waves/milestones will be affected by a decision
|
|
22
|
+
- External dependencies (Copilot CLI, SDK APIs) change
|
|
23
|
+
|
|
24
|
+
## Patterns
|
|
25
|
+
|
|
26
|
+
### Proposal Structure (docs/proposals/)
|
|
27
|
+
|
|
28
|
+
**Required sections:**
|
|
29
|
+
1. **Problem Statement** — Why current state is broken (specific, measurable evidence)
|
|
30
|
+
2. **Proposed Architecture** — Solution with technical specifics (not hand-waving)
|
|
31
|
+
3. **What Changes** — Impact on existing work (waves, milestones, modules)
|
|
32
|
+
4. **What Stays the Same** — Preserve existing functionality (no regression)
|
|
33
|
+
5. **Key Decisions Needed** — Explicit choices with recommendations
|
|
34
|
+
6. **Risks and Mitigations** — Likelihood + impact + mitigation strategy
|
|
35
|
+
7. **Scope** — What's in v1, what's deferred (timeline clarity)
|
|
36
|
+
|
|
37
|
+
**Optional sections:**
|
|
38
|
+
- Implementation Plan (high-level milestones)
|
|
39
|
+
- Success Criteria (measurable outcomes)
|
|
40
|
+
- Open Questions (unresolved items)
|
|
41
|
+
- Appendix (prior art, alternatives considered)
|
|
42
|
+
|
|
43
|
+
### Tone Ceiling Enforcement
|
|
44
|
+
|
|
45
|
+
**Always:**
|
|
46
|
+
- Cite specific evidence (user reports, performance data, failure modes)
|
|
47
|
+
- Justify recommendations with technical rationale
|
|
48
|
+
- Acknowledge trade-offs (no perfect solutions)
|
|
49
|
+
- Be specific about APIs, libraries, file paths
|
|
50
|
+
|
|
51
|
+
**Never:**
|
|
52
|
+
- Hype ("revolutionary", "game-changing")
|
|
53
|
+
- Hand-waving ("we'll figure it out later")
|
|
54
|
+
- Unsubstantiated claims ("users will love this")
|
|
55
|
+
- Vague timelines ("soon", "eventually")
|
|
56
|
+
|
|
57
|
+
### Wave Restructuring Pattern
|
|
58
|
+
|
|
59
|
+
When a proposal invalidates existing wave structure:
|
|
60
|
+
1. **Acknowledge the shift:** "This becomes Wave 0 (Foundation)"
|
|
61
|
+
2. **Cascade impacts:** Adjust downstream waves (Wave 1, Wave 2, Wave 3)
|
|
62
|
+
3. **Preserve non-blocking work:** Identify what can proceed in parallel
|
|
63
|
+
4. **Update dependencies:** Document new blocking relationships
|
|
64
|
+
|
|
65
|
+
**Example (Interactive Shell):**
|
|
66
|
+
- Wave 0 (NEW): Interactive Shell — blocks all other waves
|
|
67
|
+
- Wave 1 (ADJUSTED): npm Distribution — shell bundled in cli.js
|
|
68
|
+
- Wave 2 (DEFERRED): SquadUI — waits for shell foundation
|
|
69
|
+
- Wave 3 (ADJUSTED): Public Docs — now documents shell as primary interface
|
|
70
|
+
|
|
71
|
+
### Decision Framing
|
|
72
|
+
|
|
73
|
+
**Format:** "Recommendation: X (recommended) or alternatives?"
|
|
74
|
+
|
|
75
|
+
**Components:**
|
|
76
|
+
- Recommendation (pick one, justify)
|
|
77
|
+
- Alternatives (what else was considered)
|
|
78
|
+
- Decision rationale (why recommended option wins)
|
|
79
|
+
- Needs sign-off from (which agents/roles must approve)
|
|
80
|
+
|
|
81
|
+
**Example:**
|
|
82
|
+
```
|
|
83
|
+
### 1. Terminal UI Library: `ink` (recommended) or alternatives?
|
|
84
|
+
|
|
85
|
+
**Recommendation:** `ink`
|
|
86
|
+
**Alternatives:** `blessed`, raw readline
|
|
87
|
+
**Decision rationale:** Component model enables testable UI. Battle-tested ecosystem.
|
|
88
|
+
|
|
89
|
+
**Needs sign-off from:** Brady (product direction), Fortier (runtime performance)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Risk Documentation
|
|
93
|
+
|
|
94
|
+
**Format per risk:**
|
|
95
|
+
- **Risk:** Specific failure mode
|
|
96
|
+
- **Likelihood:** Low / Medium / High (not percentages)
|
|
97
|
+
- **Impact:** Low / Medium / High
|
|
98
|
+
- **Mitigation:** Concrete actions (measurable)
|
|
99
|
+
|
|
100
|
+
**Example:**
|
|
101
|
+
```
|
|
102
|
+
### Risk 2: SDK Streaming Reliability
|
|
103
|
+
|
|
104
|
+
**Risk:** SDK streaming events might drop messages or arrive out of order.
|
|
105
|
+
**Likelihood:** Low (SDK is production-grade).
|
|
106
|
+
**Impact:** High — broken streaming makes shell unusable.
|
|
107
|
+
|
|
108
|
+
**Mitigation:**
|
|
109
|
+
- Add integration test: Send 1000-message stream, verify all deltas arrive in order
|
|
110
|
+
- Implement fallback: If streaming fails, fall back to polling session state
|
|
111
|
+
- Log all SDK events to `.squad/orchestration-log/sdk-events.jsonl` for debugging
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Examples
|
|
115
|
+
|
|
116
|
+
**File references from interactive shell proposal:**
|
|
117
|
+
- Full proposal: `docs/proposals/squad-interactive-shell.md`
|
|
118
|
+
- User directive: `.squad/decisions/inbox/copilot-directive-2026-02-21T202535Z.md`
|
|
119
|
+
- Team decisions: `.squad/decisions.md`
|
|
120
|
+
- Current architecture: `docs/architecture/module-map.md`, `docs/prd-23-release-readiness.md`
|
|
121
|
+
|
|
122
|
+
**Key patterns demonstrated:**
|
|
123
|
+
1. Read user directive first (understand the "why")
|
|
124
|
+
2. Survey current architecture (module map, existing waves)
|
|
125
|
+
3. Research SDK APIs (exploration task to validate feasibility)
|
|
126
|
+
4. Document problem with specific evidence (unreliable handoffs, zero visibility, UX mismatch)
|
|
127
|
+
5. Propose solution with technical specifics (ink components, SDK session management, spawn.ts module)
|
|
128
|
+
6. Restructure waves when foundation shifts (Wave 0 becomes blocker)
|
|
129
|
+
7. Preserve backward compatibility (squad.agent.md still works, VS Code mode unchanged)
|
|
130
|
+
8. Frame decisions explicitly (5 key decisions with recommendations)
|
|
131
|
+
9. Document risks with mitigations (5 risks, each with concrete actions)
|
|
132
|
+
10. Define scope (what's in v1 vs. deferred)
|
|
133
|
+
|
|
134
|
+
## Anti-Patterns
|
|
135
|
+
|
|
136
|
+
**Avoid:**
|
|
137
|
+
- ❌ Proposals without problem statements (solution-first thinking)
|
|
138
|
+
- ❌ Vague architecture ("we'll use a shell") — be specific (ink components, session registry, spawn.ts)
|
|
139
|
+
- ❌ Ignoring existing work — always document impact on waves/milestones
|
|
140
|
+
- ❌ No risk analysis — every architecture has risks, document them
|
|
141
|
+
- ❌ Unbounded scope — draw the v1 line explicitly
|
|
142
|
+
- ❌ Missing decision ownership — always say "needs sign-off from X"
|
|
143
|
+
- ❌ No backward compatibility plan — users don't care about your replatform
|
|
144
|
+
- ❌ Hand-waving timelines ("a few weeks") — be specific (2-3 weeks, 1 engineer full-time)
|
|
145
|
+
|
|
146
|
+
**Red flags in proposal reviews:**
|
|
147
|
+
- "Users will love this" (citation needed)
|
|
148
|
+
- "We'll figure out X later" (scope creep incoming)
|
|
149
|
+
- "This is revolutionary" (tone ceiling violation)
|
|
150
|
+
- No section on "What Stays the Same" (regression risk)
|
|
151
|
+
- No risks documented (wishful thinking)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: "ci-validation-gates"
|
|
3
|
+
description: "Defensive CI/CD patterns: semver validation, token checks, retry logic, draft detection — earned from v0.8.22"
|
|
4
|
+
domain: "ci-cd"
|
|
5
|
+
confidence: "high"
|
|
6
|
+
source: "extracted from Drucker and Trejo charters — earned knowledge from v0.8.22 release incident"
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Context
|
|
10
|
+
|
|
11
|
+
CI workflows must be defensive. These patterns were learned from the v0.8.22 release disaster where invalid semver, wrong token types, missing retry logic, and draft releases caused a multi-hour outage. Both Drucker (CI/CD) and Trejo (Release Manager) carried this knowledge in their charters — now centralized here.
|
|
12
|
+
|
|
13
|
+
## Patterns
|
|
14
|
+
|
|
15
|
+
### Semver Validation Gate
|
|
16
|
+
Every publish workflow MUST validate version format before `npm publish`. 4-part versions (e.g., 0.8.21.4) are NOT valid semver — npm mangles them.
|
|
17
|
+
|
|
18
|
+
```yaml
|
|
19
|
+
- name: Validate semver
|
|
20
|
+
run: |
|
|
21
|
+
VERSION="${{ github.event.release.tag_name }}"
|
|
22
|
+
VERSION="${VERSION#v}"
|
|
23
|
+
if ! npx semver "$VERSION" > /dev/null 2>&1; then
|
|
24
|
+
echo "❌ Invalid semver: $VERSION"
|
|
25
|
+
echo "Only 3-part versions (X.Y.Z) or prerelease (X.Y.Z-tag.N) are valid."
|
|
26
|
+
exit 1
|
|
27
|
+
fi
|
|
28
|
+
echo "✅ Valid semver: $VERSION"
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### NPM Token Type Verification
|
|
32
|
+
NPM_TOKEN MUST be an Automation token, not a User token with 2FA:
|
|
33
|
+
- User tokens require OTP — CI can't provide it → EOTP error
|
|
34
|
+
- Create Automation tokens at npmjs.com → Settings → Access Tokens → Automation
|
|
35
|
+
- Verify before first publish in any workflow
|
|
36
|
+
|
|
37
|
+
### Retry Logic for npm Registry Propagation
|
|
38
|
+
npm registry uses eventual consistency. After `npm publish` succeeds, the package may not be immediately queryable.
|
|
39
|
+
- Propagation: typically 5-30s, up to 2min in rare cases
|
|
40
|
+
- All verify steps: 5 attempts, 15-second intervals
|
|
41
|
+
- Log each attempt: "Attempt 1/5: Checking package..."
|
|
42
|
+
- Exit loop on success, fail after max attempts
|
|
43
|
+
|
|
44
|
+
```yaml
|
|
45
|
+
- name: Verify package (with retry)
|
|
46
|
+
run: |
|
|
47
|
+
MAX_ATTEMPTS=5
|
|
48
|
+
WAIT_SECONDS=15
|
|
49
|
+
for attempt in $(seq 1 $MAX_ATTEMPTS); do
|
|
50
|
+
echo "Attempt $attempt/$MAX_ATTEMPTS: Checking $PACKAGE@$VERSION..."
|
|
51
|
+
if npm view "$PACKAGE@$VERSION" version > /dev/null 2>&1; then
|
|
52
|
+
echo "✅ Package verified"
|
|
53
|
+
exit 0
|
|
54
|
+
fi
|
|
55
|
+
[ $attempt -lt $MAX_ATTEMPTS ] && sleep $WAIT_SECONDS
|
|
56
|
+
done
|
|
57
|
+
echo "❌ Failed to verify after $MAX_ATTEMPTS attempts"
|
|
58
|
+
exit 1
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Draft Release Detection
|
|
62
|
+
Draft releases don't emit `release: published` event. Workflows MUST:
|
|
63
|
+
- Trigger on `release: published` (NOT `created`)
|
|
64
|
+
- If using workflow_dispatch: verify release is published via GitHub API before proceeding
|
|
65
|
+
|
|
66
|
+
### Build Script Protection
|
|
67
|
+
Set `SKIP_BUILD_BUMP=1` (or `$env:SKIP_BUILD_BUMP = "1"` on Windows) before ANY release build. bump-build.mjs is for dev builds ONLY — it silently mutates versions.
|
|
68
|
+
|
|
69
|
+
## Known Failure Modes (v0.8.22 Incident)
|
|
70
|
+
|
|
71
|
+
| # | What Happened | Root Cause | Prevention |
|
|
72
|
+
|---|---------------|-----------|------------|
|
|
73
|
+
| 1 | 4-part version published, npm mangled it | No semver validation gate | `npx semver` check before every publish |
|
|
74
|
+
| 2 | CI failed 5+ times with EOTP | User token with 2FA | Automation token only |
|
|
75
|
+
| 3 | Verify returned false 404 | No retry logic for propagation | 5 attempts, 15s intervals |
|
|
76
|
+
| 4 | Workflow never triggered | Draft release doesn't emit event | Never create draft releases |
|
|
77
|
+
| 5 | Version mutated during release | bump-build.mjs ran in release | SKIP_BUILD_BUMP=1 |
|
|
78
|
+
|
|
79
|
+
## Anti-Patterns
|
|
80
|
+
- ❌ Publishing without semver validation gate
|
|
81
|
+
- ❌ Single-shot verification without retry
|
|
82
|
+
- ❌ Hard-coded secrets in workflows
|
|
83
|
+
- ❌ Silent CI failures — every error needs actionable output with remediation
|
|
84
|
+
- ❌ Assuming npm publish is instantly queryable
|