@bradygaster/squad-sdk 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +296 -296
  2. package/dist/agents/history-shadow.js +30 -30
  3. package/dist/build/github-dist.js +42 -42
  4. package/dist/config/init.js +173 -173
  5. package/dist/sharing/consult.js +78 -78
  6. package/package.json +1 -1
  7. package/templates/casting/Futurama.json +9 -9
  8. package/templates/casting-history.json +4 -4
  9. package/templates/casting-policy.json +37 -37
  10. package/templates/casting-reference.md +104 -104
  11. package/templates/casting-registry.json +3 -3
  12. package/templates/ceremonies.md +41 -41
  13. package/templates/charter.md +53 -53
  14. package/templates/constraint-tracking.md +38 -38
  15. package/templates/cooperative-rate-limiting.md +229 -229
  16. package/templates/copilot-instructions.md +46 -46
  17. package/templates/history.md +10 -10
  18. package/templates/identity/now.md +9 -9
  19. package/templates/identity/wisdom.md +15 -15
  20. package/templates/issue-lifecycle.md +412 -412
  21. package/templates/keda-scaler.md +164 -164
  22. package/templates/machine-capabilities.md +74 -74
  23. package/templates/mcp-config.md +90 -90
  24. package/templates/multi-agent-format.md +28 -28
  25. package/templates/plugin-marketplace.md +49 -49
  26. package/templates/ralph-circuit-breaker.md +313 -313
  27. package/templates/raw-agent-output.md +37 -37
  28. package/templates/roster.md +60 -60
  29. package/templates/routing.md +39 -39
  30. package/templates/run-output.md +50 -50
  31. package/templates/schedule.json +19 -19
  32. package/templates/scribe-charter.md +119 -119
  33. package/templates/skill.md +24 -24
  34. package/templates/skills/agent-collaboration/SKILL.md +42 -42
  35. package/templates/skills/agent-conduct/SKILL.md +24 -24
  36. package/templates/skills/architectural-proposals/SKILL.md +151 -151
  37. package/templates/skills/ci-validation-gates/SKILL.md +84 -84
  38. package/templates/skills/cli-wiring/SKILL.md +47 -47
  39. package/templates/skills/client-compatibility/SKILL.md +89 -89
  40. package/templates/skills/cross-squad/SKILL.md +114 -114
  41. package/templates/skills/distributed-mesh/SKILL.md +287 -287
  42. package/templates/skills/distributed-mesh/mesh.json.example +30 -30
  43. package/templates/skills/distributed-mesh/sync-mesh.ps1 +111 -111
  44. package/templates/skills/distributed-mesh/sync-mesh.sh +104 -104
  45. package/templates/skills/docs-standards/SKILL.md +71 -71
  46. package/templates/skills/economy-mode/SKILL.md +114 -114
  47. package/templates/skills/external-comms/SKILL.md +329 -329
  48. package/templates/skills/gh-auth-isolation/SKILL.md +183 -183
  49. package/templates/skills/git-workflow/SKILL.md +204 -204
  50. package/templates/skills/github-multi-account/SKILL.md +95 -95
  51. package/templates/skills/history-hygiene/SKILL.md +36 -36
  52. package/templates/skills/humanizer/SKILL.md +105 -105
  53. package/templates/skills/init-mode/SKILL.md +102 -102
  54. package/templates/skills/model-selection/SKILL.md +117 -117
  55. package/templates/skills/nap/SKILL.md +24 -24
  56. package/templates/skills/personal-squad/SKILL.md +57 -57
  57. package/templates/skills/project-conventions/SKILL.md +56 -56
  58. package/templates/skills/release-process/SKILL.md +423 -423
  59. package/templates/skills/reskill/SKILL.md +92 -92
  60. package/templates/skills/reviewer-protocol/SKILL.md +79 -79
  61. package/templates/skills/secret-handling/SKILL.md +200 -200
  62. package/templates/skills/session-recovery/SKILL.md +155 -155
  63. package/templates/skills/squad-conventions/SKILL.md +69 -69
  64. package/templates/skills/test-discipline/SKILL.md +37 -37
  65. package/templates/skills/windows-compatibility/SKILL.md +74 -74
  66. package/templates/workflows/squad-ci.yml +24 -24
  67. package/templates/workflows/squad-docs.yml +54 -54
  68. package/templates/workflows/squad-heartbeat.yml +171 -171
  69. package/templates/workflows/squad-insider-release.yml +61 -61
  70. package/templates/workflows/squad-issue-assign.yml +161 -161
  71. package/templates/workflows/squad-label-enforce.yml +181 -181
  72. package/templates/workflows/squad-preview.yml +55 -55
  73. package/templates/workflows/squad-promote.yml +120 -120
  74. package/templates/workflows/squad-release.yml +77 -77
  75. package/templates/workflows/squad-triage.yml +260 -260
  76. package/templates/workflows/sync-squad-labels.yml +169 -169
@@ -1,53 +1,53 @@
1
- # {Name} — {Role}
2
-
3
- > {One-line personality statement — what makes this person tick}
4
-
5
- ## Identity
6
-
7
- - **Name:** {Name}
8
- - **Role:** {Role title}
9
- - **Expertise:** {2-3 specific skills relevant to the project}
10
- - **Style:** {How they communicate — direct? thorough? opinionated?}
11
-
12
- ## What I Own
13
-
14
- - {Area of responsibility 1}
15
- - {Area of responsibility 2}
16
- - {Area of responsibility 3}
17
-
18
- ## How I Work
19
-
20
- - {Key approach or principle 1}
21
- - {Key approach or principle 2}
22
- - {Pattern or convention I follow}
23
-
24
- ## Boundaries
25
-
26
- **I handle:** {types of work this agent does}
27
-
28
- **I don't handle:** {types of work that belong to other team members}
29
-
30
- **When I'm unsure:** I say so and suggest who might know.
31
-
32
- **If I review others' work:** On rejection, I may require a different agent to revise (not the original author) or request a new specialist be spawned. The Coordinator enforces this.
33
-
34
- ## Model
35
-
36
- - **Preferred:** auto
37
- - **Rationale:** Coordinator selects the best model based on task type — cost first unless writing code
38
- - **Fallback:** Standard chain — the coordinator handles fallback automatically
39
-
40
- ## Collaboration
41
-
42
- Before starting work, run `git rev-parse --show-toplevel` to find the repo root, or use the `TEAM ROOT` provided in the spawn prompt. All `.squad/` paths must be resolved relative to this root — do not assume CWD is the repo root (you may be in a worktree or subdirectory).
43
-
44
- Before starting work, read `.squad/decisions.md` for team decisions that affect me.
45
- After making a decision others should know, write it to `.squad/decisions/inbox/{my-name}-{brief-slug}.md` — the Scribe will merge it.
46
- If I need another team member's input, say so — the coordinator will bring them in.
47
-
48
- ## Voice
49
-
50
- {1-2 sentences describing personality. Not generic — specific. This agent has OPINIONS.
51
- They have preferences. They push back. They have a style that's distinctly theirs.
52
- Example: "Opinionated about test coverage. Will push back if tests are skipped.
53
- Prefers integration tests over mocks. Thinks 80% coverage is the floor, not the ceiling."}
1
+ # {Name} — {Role}
2
+
3
+ > {One-line personality statement — what makes this person tick}
4
+
5
+ ## Identity
6
+
7
+ - **Name:** {Name}
8
+ - **Role:** {Role title}
9
+ - **Expertise:** {2-3 specific skills relevant to the project}
10
+ - **Style:** {How they communicate — direct? thorough? opinionated?}
11
+
12
+ ## What I Own
13
+
14
+ - {Area of responsibility 1}
15
+ - {Area of responsibility 2}
16
+ - {Area of responsibility 3}
17
+
18
+ ## How I Work
19
+
20
+ - {Key approach or principle 1}
21
+ - {Key approach or principle 2}
22
+ - {Pattern or convention I follow}
23
+
24
+ ## Boundaries
25
+
26
+ **I handle:** {types of work this agent does}
27
+
28
+ **I don't handle:** {types of work that belong to other team members}
29
+
30
+ **When I'm unsure:** I say so and suggest who might know.
31
+
32
+ **If I review others' work:** On rejection, I may require a different agent to revise (not the original author) or request a new specialist be spawned. The Coordinator enforces this.
33
+
34
+ ## Model
35
+
36
+ - **Preferred:** auto
37
+ - **Rationale:** Coordinator selects the best model based on task type — cost first unless writing code
38
+ - **Fallback:** Standard chain — the coordinator handles fallback automatically
39
+
40
+ ## Collaboration
41
+
42
+ Before starting work, run `git rev-parse --show-toplevel` to find the repo root, or use the `TEAM ROOT` provided in the spawn prompt. All `.squad/` paths must be resolved relative to this root — do not assume CWD is the repo root (you may be in a worktree or subdirectory).
43
+
44
+ Before starting work, read `.squad/decisions.md` for team decisions that affect me.
45
+ After making a decision others should know, write it to `.squad/decisions/inbox/{my-name}-{brief-slug}.md` — the Scribe will merge it.
46
+ If I need another team member's input, say so — the coordinator will bring them in.
47
+
48
+ ## Voice
49
+
50
+ {1-2 sentences describing personality. Not generic — specific. This agent has OPINIONS.
51
+ They have preferences. They push back. They have a style that's distinctly theirs.
52
+ Example: "Opinionated about test coverage. Will push back if tests are skipped.
53
+ Prefers integration tests over mocks. Thinks 80% coverage is the floor, not the ceiling."}
@@ -1,38 +1,38 @@
1
- # Constraint Budget Tracking
2
-
3
- When the user or system imposes constraints (question limits, revision limits, time budgets), maintain a visible counter in your responses and in the artifact.
4
-
5
- ## Format
6
-
7
- ```
8
- 📊 Clarifying questions used: 2 / 3
9
- ```
10
-
11
- ## Rules
12
-
13
- - Update the counter each time the constraint is consumed
14
- - When a constraint is exhausted, state it: `📊 Question budget exhausted (3/3). Proceeding with current information.`
15
- - If no constraints are active, do not display counters
16
- - Include the final constraint status in multi-agent artifacts
17
-
18
- ## Example Session
19
-
20
- ```
21
- Coordinator: Spawning agents to analyze requirements...
22
- 📊 Clarifying questions used: 0 / 3
23
-
24
- Agent asks clarification: "Should we support OAuth?"
25
- Coordinator: Checking with user...
26
- 📊 Clarifying questions used: 1 / 3
27
-
28
- Agent asks clarification: "What's the rate limit?"
29
- Coordinator: Checking with user...
30
- 📊 Clarifying questions used: 2 / 3
31
-
32
- Agent asks clarification: "Do we need RBAC?"
33
- Coordinator: Checking with user...
34
- 📊 Clarifying questions used: 3 / 3
35
-
36
- Agent asks clarification: "Should we cache responses?"
37
- Coordinator: 📊 Question budget exhausted (3/3). Proceeding without clarification.
38
- ```
1
+ # Constraint Budget Tracking
2
+
3
+ When the user or system imposes constraints (question limits, revision limits, time budgets), maintain a visible counter in your responses and in the artifact.
4
+
5
+ ## Format
6
+
7
+ ```
8
+ 📊 Clarifying questions used: 2 / 3
9
+ ```
10
+
11
+ ## Rules
12
+
13
+ - Update the counter each time the constraint is consumed
14
+ - When a constraint is exhausted, state it: `📊 Question budget exhausted (3/3). Proceeding with current information.`
15
+ - If no constraints are active, do not display counters
16
+ - Include the final constraint status in multi-agent artifacts
17
+
18
+ ## Example Session
19
+
20
+ ```
21
+ Coordinator: Spawning agents to analyze requirements...
22
+ 📊 Clarifying questions used: 0 / 3
23
+
24
+ Agent asks clarification: "Should we support OAuth?"
25
+ Coordinator: Checking with user...
26
+ 📊 Clarifying questions used: 1 / 3
27
+
28
+ Agent asks clarification: "What's the rate limit?"
29
+ Coordinator: Checking with user...
30
+ 📊 Clarifying questions used: 2 / 3
31
+
32
+ Agent asks clarification: "Do we need RBAC?"
33
+ Coordinator: Checking with user...
34
+ 📊 Clarifying questions used: 3 / 3
35
+
36
+ Agent asks clarification: "Should we cache responses?"
37
+ Coordinator: 📊 Question budget exhausted (3/3). Proceeding without clarification.
38
+ ```
@@ -1,229 +1,229 @@
1
- # Cooperative Rate Limiting for Multi-Agent Deployments
2
-
3
- > Coordinate API quota across multiple Ralph instances to prevent cascading failures.
4
-
5
- ## Problem
6
-
7
- The [circuit breaker template](ralph-circuit-breaker.md) handles single-instance rate limiting well. But when multiple Ralphs run across machines (or pods on K8s), each instance independently hits API limits:
8
-
9
- - **No coordination** — 5 Ralphs each think they have full API quota
10
- - **Thundering herd** — All Ralphs retry simultaneously after rate limit resets
11
- - **Priority inversion** — Low-priority work exhausts quota before critical work runs
12
- - **Reactive only** — Circuit opens AFTER 429, wasting the failed request
13
-
14
- ## Solution: 6-Pattern Architecture
15
-
16
- These patterns layer on top of the existing circuit breaker. Each is independent — adopt one or all.
17
-
18
- ### Pattern 1: Traffic Light (RAAS — Rate-Aware Agent Scheduling)
19
-
20
- Map GitHub API `X-RateLimit-Remaining` to traffic light states:
21
-
22
- | State | Remaining % | Behavior |
23
- |-------|------------|----------|
24
- | 🟢 GREEN | >20% | Normal operation |
25
- | 🟡 AMBER | 5–20% | Only P0 agents proceed |
26
- | 🔴 RED | <5% | Block all except emergency P0 |
27
-
28
- ```typescript
29
- type TrafficLight = 'green' | 'amber' | 'red';
30
-
31
- function getTrafficLight(remaining: number, limit: number): TrafficLight {
32
- const pct = remaining / limit;
33
- if (pct > 0.20) return 'green';
34
- if (pct > 0.05) return 'amber';
35
- return 'red';
36
- }
37
-
38
- function shouldProceed(light: TrafficLight, agentPriority: number): boolean {
39
- if (light === 'green') return true;
40
- if (light === 'amber') return agentPriority === 0; // P0 only
41
- return false; // RED — block all
42
- }
43
- ```
44
-
45
- ### Pattern 2: Cooperative Token Pool (CMARP)
46
-
47
- A shared JSON file (`~/.squad/rate-pool.json`) distributes API quota:
48
-
49
- ```json
50
- {
51
- "totalLimit": 5000,
52
- "resetAt": "2026-03-22T20:00:00Z",
53
- "allocations": {
54
- "picard": { "priority": 0, "allocated": 2000, "used": 450, "leaseExpiry": "2026-03-22T19:55:00Z" },
55
- "data": { "priority": 1, "allocated": 1750, "used": 200, "leaseExpiry": "2026-03-22T19:55:00Z" },
56
- "ralph": { "priority": 2, "allocated": 1250, "used": 100, "leaseExpiry": "2026-03-22T19:55:00Z" }
57
- }
58
- }
59
- ```
60
-
61
- **Rules:**
62
- - P0 agents (Lead) get 40% of quota
63
- - P1 agents (specialists) get 35%
64
- - P2 agents (Ralph, Scribe) get 25%
65
- - Stale leases (>5 minutes without heartbeat) are auto-recovered
66
- - Each agent checks their remaining allocation before making API calls
67
-
68
- ```typescript
69
- interface RatePoolAllocation {
70
- priority: number;
71
- allocated: number;
72
- used: number;
73
- leaseExpiry: string;
74
- }
75
-
76
- interface RatePool {
77
- totalLimit: number;
78
- resetAt: string;
79
- allocations: Record<string, RatePoolAllocation>;
80
- }
81
-
82
- function canUseQuota(pool: RatePool, agentName: string): boolean {
83
- const alloc = pool.allocations[agentName];
84
- if (!alloc) return true; // Unknown agent — allow (graceful)
85
-
86
- // Reclaim stale leases from crashed agents
87
- const now = new Date();
88
- for (const [name, a] of Object.entries(pool.allocations)) {
89
- if (new Date(a.leaseExpiry) < now && name !== agentName) {
90
- a.allocated = 0; // Reclaim
91
- }
92
- }
93
-
94
- return alloc.used < alloc.allocated;
95
- }
96
- ```
97
-
98
- ### Pattern 3: Predictive Circuit Breaker (PCB)
99
-
100
- Opens the circuit BEFORE getting a 429 by predicting when quota will run out:
101
-
102
- ```typescript
103
- interface RateSample {
104
- timestamp: number; // Date.now()
105
- remaining: number; // from X-RateLimit-Remaining header
106
- }
107
-
108
- class PredictiveCircuitBreaker {
109
- private samples: RateSample[] = [];
110
- private readonly maxSamples = 10;
111
- private readonly warningThresholdSeconds = 120;
112
-
113
- addSample(remaining: number): void {
114
- this.samples.push({ timestamp: Date.now(), remaining });
115
- if (this.samples.length > this.maxSamples) {
116
- this.samples.shift();
117
- }
118
- }
119
-
120
- /** Predict seconds until quota exhaustion using linear regression */
121
- predictExhaustion(): number | null {
122
- if (this.samples.length < 3) return null;
123
-
124
- const n = this.samples.length;
125
- const first = this.samples[0];
126
- const last = this.samples[n - 1];
127
-
128
- const elapsedMs = last.timestamp - first.timestamp;
129
- if (elapsedMs === 0) return null;
130
-
131
- const consumedPerMs = (first.remaining - last.remaining) / elapsedMs;
132
- if (consumedPerMs <= 0) return null; // Not consuming — safe
133
-
134
- const msUntilExhausted = last.remaining / consumedPerMs;
135
- return msUntilExhausted / 1000;
136
- }
137
-
138
- shouldOpen(): boolean {
139
- const eta = this.predictExhaustion();
140
- if (eta === null) return false;
141
- return eta < this.warningThresholdSeconds;
142
- }
143
- }
144
- ```
145
-
146
- ### Pattern 4: Priority Retry Windows (PWJG)
147
-
148
- Non-overlapping jitter windows prevent thundering herd:
149
-
150
- | Priority | Retry Window | Description |
151
- |----------|-------------|-------------|
152
- | P0 (Lead) | 500ms–5s | Recovers first |
153
- | P1 (Specialists) | 2s–30s | Moderate delay |
154
- | P2 (Ralph/Scribe) | 5s–60s | Most patient |
155
-
156
- ```typescript
157
- function getRetryDelay(priority: number, attempt: number): number {
158
- const windows: Record<number, [number, number]> = {
159
- 0: [500, 5000], // P0: 500ms–5s
160
- 1: [2000, 30000], // P1: 2s–30s
161
- 2: [5000, 60000], // P2: 5s–60s
162
- };
163
-
164
- const [min, max] = windows[priority] ?? windows[2];
165
- const base = Math.min(min * Math.pow(2, attempt), max);
166
- const jitter = Math.random() * base * 0.5;
167
- return base + jitter;
168
- }
169
- ```
170
-
171
- ### Pattern 5: Resource Epoch Tracker (RET)
172
-
173
- Heartbeat-based lease system for multi-machine deployments:
174
-
175
- ```typescript
176
- interface ResourceLease {
177
- agent: string;
178
- machine: string;
179
- leaseStart: string;
180
- leaseExpiry: string; // Typically 5 minutes from now
181
- allocated: number;
182
- }
183
-
184
- // Each agent renews its lease every 2 minutes
185
- // If lease expires (agent crashed), allocation is reclaimed
186
- ```
187
-
188
- ### Pattern 6: Cascade Dependency Detector (CDD)
189
-
190
- Track downstream failures and apply backpressure:
191
-
192
- ```
193
- Agent A (rate limited) → Agent B (waiting for A) → Agent C (waiting for B)
194
- ↑ Backpressure signal: "don't start new work"
195
- ```
196
-
197
- When a dependency is rate-limited, upstream agents should pause new work rather than queuing requests that will fail.
198
-
199
- ## Kubernetes Integration
200
-
201
- On K8s, cooperative rate limiting can use KEDA to scale pods based on API quota:
202
-
203
- ```yaml
204
- apiVersion: keda.sh/v1alpha1
205
- kind: ScaledObject
206
- spec:
207
- scaleTargetRef:
208
- name: ralph-deployment
209
- triggers:
210
- - type: external
211
- metadata:
212
- scalerAddress: keda-copilot-scaler:6000
213
- # Scaler returns 0 when rate limited → pods scale to zero
214
- ```
215
-
216
- See [keda-copilot-scaler](https://github.com/tamirdresher/keda-copilot-scaler) for a complete implementation.
217
-
218
- ## Quick Start
219
-
220
- 1. **Minimum viable:** Adopt Pattern 1 (Traffic Light) — read `X-RateLimit-Remaining` from API responses
221
- 2. **Multi-machine:** Add Pattern 2 (Cooperative Pool) — shared `rate-pool.json`
222
- 3. **Production:** Add Pattern 3 (Predictive CB) — prevent 429s entirely
223
- 4. **Kubernetes:** Add KEDA scaler for automatic pod scaling
224
-
225
- ## References
226
-
227
- - [Circuit Breaker Template](ralph-circuit-breaker.md) — Foundation patterns
228
- - [Squad on AKS](https://github.com/tamirdresher/squad-on-aks) — Production K8s deployment
229
- - [KEDA Copilot Scaler](https://github.com/tamirdresher/keda-copilot-scaler) — Custom KEDA external scaler
1
+ # Cooperative Rate Limiting for Multi-Agent Deployments
2
+
3
+ > Coordinate API quota across multiple Ralph instances to prevent cascading failures.
4
+
5
+ ## Problem
6
+
7
+ The [circuit breaker template](ralph-circuit-breaker.md) handles single-instance rate limiting well. But when multiple Ralphs run across machines (or pods on K8s), each instance independently hits API limits:
8
+
9
+ - **No coordination** — 5 Ralphs each think they have full API quota
10
+ - **Thundering herd** — All Ralphs retry simultaneously after rate limit resets
11
+ - **Priority inversion** — Low-priority work exhausts quota before critical work runs
12
+ - **Reactive only** — Circuit opens AFTER 429, wasting the failed request
13
+
14
+ ## Solution: 6-Pattern Architecture
15
+
16
+ These patterns layer on top of the existing circuit breaker. Each is independent — adopt one or all.
17
+
18
+ ### Pattern 1: Traffic Light (RAAS — Rate-Aware Agent Scheduling)
19
+
20
+ Map GitHub API `X-RateLimit-Remaining` to traffic light states:
21
+
22
+ | State | Remaining % | Behavior |
23
+ |-------|------------|----------|
24
+ | 🟢 GREEN | >20% | Normal operation |
25
+ | 🟡 AMBER | 5–20% | Only P0 agents proceed |
26
+ | 🔴 RED | <5% | Block all except emergency P0 |
27
+
28
+ ```typescript
29
+ type TrafficLight = 'green' | 'amber' | 'red';
30
+
31
+ function getTrafficLight(remaining: number, limit: number): TrafficLight {
32
+ const pct = remaining / limit;
33
+ if (pct > 0.20) return 'green';
34
+ if (pct > 0.05) return 'amber';
35
+ return 'red';
36
+ }
37
+
38
+ function shouldProceed(light: TrafficLight, agentPriority: number): boolean {
39
+ if (light === 'green') return true;
40
+ if (light === 'amber') return agentPriority === 0; // P0 only
41
+ return false; // RED — block all
42
+ }
43
+ ```
44
+
45
+ ### Pattern 2: Cooperative Token Pool (CMARP)
46
+
47
+ A shared JSON file (`~/.squad/rate-pool.json`) distributes API quota:
48
+
49
+ ```json
50
+ {
51
+ "totalLimit": 5000,
52
+ "resetAt": "2026-03-22T20:00:00Z",
53
+ "allocations": {
54
+ "picard": { "priority": 0, "allocated": 2000, "used": 450, "leaseExpiry": "2026-03-22T19:55:00Z" },
55
+ "data": { "priority": 1, "allocated": 1750, "used": 200, "leaseExpiry": "2026-03-22T19:55:00Z" },
56
+ "ralph": { "priority": 2, "allocated": 1250, "used": 100, "leaseExpiry": "2026-03-22T19:55:00Z" }
57
+ }
58
+ }
59
+ ```
60
+
61
+ **Rules:**
62
+ - P0 agents (Lead) get 40% of quota
63
+ - P1 agents (specialists) get 35%
64
+ - P2 agents (Ralph, Scribe) get 25%
65
+ - Stale leases (>5 minutes without heartbeat) are auto-recovered
66
+ - Each agent checks their remaining allocation before making API calls
67
+
68
+ ```typescript
69
+ interface RatePoolAllocation {
70
+ priority: number;
71
+ allocated: number;
72
+ used: number;
73
+ leaseExpiry: string;
74
+ }
75
+
76
+ interface RatePool {
77
+ totalLimit: number;
78
+ resetAt: string;
79
+ allocations: Record<string, RatePoolAllocation>;
80
+ }
81
+
82
+ function canUseQuota(pool: RatePool, agentName: string): boolean {
83
+ const alloc = pool.allocations[agentName];
84
+ if (!alloc) return true; // Unknown agent — allow (graceful)
85
+
86
+ // Reclaim stale leases from crashed agents
87
+ const now = new Date();
88
+ for (const [name, a] of Object.entries(pool.allocations)) {
89
+ if (new Date(a.leaseExpiry) < now && name !== agentName) {
90
+ a.allocated = 0; // Reclaim
91
+ }
92
+ }
93
+
94
+ return alloc.used < alloc.allocated;
95
+ }
96
+ ```
97
+
98
+ ### Pattern 3: Predictive Circuit Breaker (PCB)
99
+
100
+ Opens the circuit BEFORE getting a 429 by predicting when quota will run out:
101
+
102
+ ```typescript
103
+ interface RateSample {
104
+ timestamp: number; // Date.now()
105
+ remaining: number; // from X-RateLimit-Remaining header
106
+ }
107
+
108
+ class PredictiveCircuitBreaker {
109
+ private samples: RateSample[] = [];
110
+ private readonly maxSamples = 10;
111
+ private readonly warningThresholdSeconds = 120;
112
+
113
+ addSample(remaining: number): void {
114
+ this.samples.push({ timestamp: Date.now(), remaining });
115
+ if (this.samples.length > this.maxSamples) {
116
+ this.samples.shift();
117
+ }
118
+ }
119
+
120
+ /** Predict seconds until quota exhaustion using linear regression */
121
+ predictExhaustion(): number | null {
122
+ if (this.samples.length < 3) return null;
123
+
124
+ const n = this.samples.length;
125
+ const first = this.samples[0];
126
+ const last = this.samples[n - 1];
127
+
128
+ const elapsedMs = last.timestamp - first.timestamp;
129
+ if (elapsedMs === 0) return null;
130
+
131
+ const consumedPerMs = (first.remaining - last.remaining) / elapsedMs;
132
+ if (consumedPerMs <= 0) return null; // Not consuming — safe
133
+
134
+ const msUntilExhausted = last.remaining / consumedPerMs;
135
+ return msUntilExhausted / 1000;
136
+ }
137
+
138
+ shouldOpen(): boolean {
139
+ const eta = this.predictExhaustion();
140
+ if (eta === null) return false;
141
+ return eta < this.warningThresholdSeconds;
142
+ }
143
+ }
144
+ ```
145
+
146
+ ### Pattern 4: Priority Retry Windows (PWJG)
147
+
148
+ Non-overlapping jitter windows prevent thundering herd:
149
+
150
+ | Priority | Retry Window | Description |
151
+ |----------|-------------|-------------|
152
+ | P0 (Lead) | 500ms–5s | Recovers first |
153
+ | P1 (Specialists) | 2s–30s | Moderate delay |
154
+ | P2 (Ralph/Scribe) | 5s–60s | Most patient |
155
+
156
+ ```typescript
157
+ function getRetryDelay(priority: number, attempt: number): number {
158
+ const windows: Record<number, [number, number]> = {
159
+ 0: [500, 5000], // P0: 500ms–5s
160
+ 1: [2000, 30000], // P1: 2s–30s
161
+ 2: [5000, 60000], // P2: 5s–60s
162
+ };
163
+
164
+ const [min, max] = windows[priority] ?? windows[2];
165
+ const base = Math.min(min * Math.pow(2, attempt), max);
166
+ const jitter = Math.random() * base * 0.5;
167
+ return base + jitter;
168
+ }
169
+ ```
170
+
171
+ ### Pattern 5: Resource Epoch Tracker (RET)
172
+
173
+ Heartbeat-based lease system for multi-machine deployments:
174
+
175
+ ```typescript
176
+ interface ResourceLease {
177
+ agent: string;
178
+ machine: string;
179
+ leaseStart: string;
180
+ leaseExpiry: string; // Typically 5 minutes from now
181
+ allocated: number;
182
+ }
183
+
184
+ // Each agent renews its lease every 2 minutes
185
+ // If lease expires (agent crashed), allocation is reclaimed
186
+ ```
187
+
188
+ ### Pattern 6: Cascade Dependency Detector (CDD)
189
+
190
+ Track downstream failures and apply backpressure:
191
+
192
+ ```
193
+ Agent A (rate limited) → Agent B (waiting for A) → Agent C (waiting for B)
194
+ ↑ Backpressure signal: "don't start new work"
195
+ ```
196
+
197
+ When a dependency is rate-limited, upstream agents should pause new work rather than queuing requests that will fail.
198
+
199
+ ## Kubernetes Integration
200
+
201
+ On K8s, cooperative rate limiting can use KEDA to scale pods based on API quota:
202
+
203
+ ```yaml
204
+ apiVersion: keda.sh/v1alpha1
205
+ kind: ScaledObject
206
+ spec:
207
+ scaleTargetRef:
208
+ name: ralph-deployment
209
+ triggers:
210
+ - type: external
211
+ metadata:
212
+ scalerAddress: keda-copilot-scaler:6000
213
+ # Scaler returns 0 when rate limited → pods scale to zero
214
+ ```
215
+
216
+ See [keda-copilot-scaler](https://github.com/tamirdresher/keda-copilot-scaler) for a complete implementation.
217
+
218
+ ## Quick Start
219
+
220
+ 1. **Minimum viable:** Adopt Pattern 1 (Traffic Light) — read `X-RateLimit-Remaining` from API responses
221
+ 2. **Multi-machine:** Add Pattern 2 (Cooperative Pool) — shared `rate-pool.json`
222
+ 3. **Production:** Add Pattern 3 (Predictive CB) — prevent 429s entirely
223
+ 4. **Kubernetes:** Add KEDA scaler for automatic pod scaling
224
+
225
+ ## References
226
+
227
+ - [Circuit Breaker Template](ralph-circuit-breaker.md) — Foundation patterns
228
+ - [Squad on AKS](https://github.com/tamirdresher/squad-on-aks) — Production K8s deployment
229
+ - [KEDA Copilot Scaler](https://github.com/tamirdresher/keda-copilot-scaler) — Custom KEDA external scaler