@bradygaster/squad-sdk 0.8.25 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapter/client.d.ts +17 -0
- package/dist/adapter/client.d.ts.map +1 -1
- package/dist/adapter/client.js +101 -1
- package/dist/adapter/client.js.map +1 -1
- package/dist/agents/history-shadow.d.ts.map +1 -1
- package/dist/agents/history-shadow.js +99 -32
- package/dist/agents/history-shadow.js.map +1 -1
- package/dist/agents/index.d.ts +1 -0
- package/dist/agents/index.d.ts.map +1 -1
- package/dist/agents/index.js +2 -0
- package/dist/agents/index.js.map +1 -1
- package/dist/agents/model-selector.d.ts +2 -0
- package/dist/agents/model-selector.d.ts.map +1 -1
- package/dist/agents/model-selector.js +41 -35
- package/dist/agents/model-selector.js.map +1 -1
- package/dist/agents/personal.d.ts +35 -0
- package/dist/agents/personal.d.ts.map +1 -0
- package/dist/agents/personal.js +67 -0
- package/dist/agents/personal.js.map +1 -0
- package/dist/builders/index.d.ts +3 -2
- package/dist/builders/index.d.ts.map +1 -1
- package/dist/builders/index.js +28 -0
- package/dist/builders/index.js.map +1 -1
- package/dist/builders/types.d.ts +13 -0
- package/dist/builders/types.d.ts.map +1 -1
- package/dist/config/init.d.ts +8 -0
- package/dist/config/init.d.ts.map +1 -1
- package/dist/config/init.js +131 -20
- package/dist/config/init.js.map +1 -1
- package/dist/config/models.d.ts +112 -0
- package/dist/config/models.d.ts.map +1 -1
- package/dist/config/models.js +329 -18
- package/dist/config/models.js.map +1 -1
- package/dist/coordinator/index.js +2 -2
- package/dist/coordinator/index.js.map +1 -1
- package/dist/index.d.ts +8 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -2
- package/dist/index.js.map +1 -1
- package/dist/platform/azure-devops.d.ts +42 -0
- package/dist/platform/azure-devops.d.ts.map +1 -1
- package/dist/platform/azure-devops.js +75 -0
- package/dist/platform/azure-devops.js.map +1 -1
- package/dist/platform/comms-file-log.d.ts.map +1 -1
- package/dist/platform/comms-file-log.js +2 -1
- package/dist/platform/comms-file-log.js.map +1 -1
- package/dist/platform/index.d.ts +2 -1
- package/dist/platform/index.d.ts.map +1 -1
- package/dist/platform/index.js +1 -0
- package/dist/platform/index.js.map +1 -1
- package/dist/ralph/capabilities.d.ts +67 -0
- package/dist/ralph/capabilities.d.ts.map +1 -0
- package/dist/ralph/capabilities.js +111 -0
- package/dist/ralph/capabilities.js.map +1 -0
- package/dist/ralph/index.d.ts +2 -0
- package/dist/ralph/index.d.ts.map +1 -1
- package/dist/ralph/index.js +6 -5
- package/dist/ralph/index.js.map +1 -1
- package/dist/ralph/rate-limiting.d.ts +99 -0
- package/dist/ralph/rate-limiting.d.ts.map +1 -0
- package/dist/ralph/rate-limiting.js +170 -0
- package/dist/ralph/rate-limiting.js.map +1 -0
- package/dist/resolution.d.ts +24 -2
- package/dist/resolution.d.ts.map +1 -1
- package/dist/resolution.js +106 -6
- package/dist/resolution.js.map +1 -1
- package/dist/roles/catalog-categories.d.ts +146 -0
- package/dist/roles/catalog-categories.d.ts.map +1 -0
- package/dist/roles/catalog-categories.js +374 -0
- package/dist/roles/catalog-categories.js.map +1 -0
- package/dist/roles/catalog-engineering.d.ts +212 -0
- package/dist/roles/catalog-engineering.d.ts.map +1 -0
- package/dist/roles/catalog-engineering.js +549 -0
- package/dist/roles/catalog-engineering.js.map +1 -0
- package/dist/roles/catalog.d.ts +24 -0
- package/dist/roles/catalog.d.ts.map +1 -0
- package/dist/roles/catalog.js +28 -0
- package/dist/roles/catalog.js.map +1 -0
- package/dist/roles/index.d.ts +69 -0
- package/dist/roles/index.d.ts.map +1 -0
- package/dist/roles/index.js +197 -0
- package/dist/roles/index.js.map +1 -0
- package/dist/roles/types.d.ts +87 -0
- package/dist/roles/types.d.ts.map +1 -0
- package/dist/roles/types.js +14 -0
- package/dist/roles/types.js.map +1 -0
- package/dist/runtime/benchmarks.js +5 -5
- package/dist/runtime/benchmarks.js.map +1 -1
- package/dist/runtime/constants.d.ts +2 -2
- package/dist/runtime/constants.d.ts.map +1 -1
- package/dist/runtime/constants.js +5 -3
- package/dist/runtime/constants.js.map +1 -1
- package/dist/runtime/cross-squad.d.ts +118 -0
- package/dist/runtime/cross-squad.d.ts.map +1 -0
- package/dist/runtime/cross-squad.js +234 -0
- package/dist/runtime/cross-squad.js.map +1 -0
- package/dist/runtime/otel-init.d.ts +24 -17
- package/dist/runtime/otel-init.d.ts.map +1 -1
- package/dist/runtime/otel-init.js +29 -20
- package/dist/runtime/otel-init.js.map +1 -1
- package/dist/runtime/otel-metrics.d.ts +5 -0
- package/dist/runtime/otel-metrics.d.ts.map +1 -1
- package/dist/runtime/otel-metrics.js +54 -0
- package/dist/runtime/otel-metrics.js.map +1 -1
- package/dist/runtime/rework.d.ts +71 -0
- package/dist/runtime/rework.d.ts.map +1 -0
- package/dist/runtime/rework.js +107 -0
- package/dist/runtime/rework.js.map +1 -0
- package/dist/runtime/scheduler.d.ts +128 -0
- package/dist/runtime/scheduler.d.ts.map +1 -0
- package/dist/runtime/scheduler.js +427 -0
- package/dist/runtime/scheduler.js.map +1 -0
- package/dist/runtime/squad-observer.d.ts.map +1 -1
- package/dist/runtime/squad-observer.js +4 -0
- package/dist/runtime/squad-observer.js.map +1 -1
- package/dist/runtime/streaming.d.ts +2 -0
- package/dist/runtime/streaming.d.ts.map +1 -1
- package/dist/runtime/streaming.js +6 -0
- package/dist/runtime/streaming.js.map +1 -1
- package/dist/runtime/telemetry.d.ts +2 -0
- package/dist/runtime/telemetry.d.ts.map +1 -1
- package/dist/runtime/telemetry.js +6 -0
- package/dist/runtime/telemetry.js.map +1 -1
- package/dist/sharing/consult.d.ts +2 -2
- package/dist/sharing/consult.js +6 -6
- package/dist/sharing/consult.js.map +1 -1
- package/dist/sharing/export.d.ts.map +1 -1
- package/dist/sharing/export.js +17 -4
- package/dist/sharing/export.js.map +1 -1
- package/dist/skills/handler-types.d.ts +271 -0
- package/dist/skills/handler-types.d.ts.map +1 -0
- package/dist/skills/handler-types.js +31 -0
- package/dist/skills/handler-types.js.map +1 -0
- package/dist/skills/index.d.ts +3 -0
- package/dist/skills/index.d.ts.map +1 -1
- package/dist/skills/index.js +3 -0
- package/dist/skills/index.js.map +1 -1
- package/dist/skills/skill-script-loader.d.ts +65 -0
- package/dist/skills/skill-script-loader.d.ts.map +1 -0
- package/dist/skills/skill-script-loader.js +227 -0
- package/dist/skills/skill-script-loader.js.map +1 -0
- package/dist/skills/skill-source.d.ts.map +1 -1
- package/dist/skills/skill-source.js +5 -1
- package/dist/skills/skill-source.js.map +1 -1
- package/dist/tools/index.d.ts +10 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +49 -8
- package/dist/tools/index.js.map +1 -1
- package/dist/upstream/resolver.d.ts.map +1 -1
- package/dist/upstream/resolver.js +14 -5
- package/dist/upstream/resolver.js.map +1 -1
- package/package.json +34 -3
- package/templates/casting/Futurama.json +10 -0
- package/templates/casting-policy.json +4 -2
- package/templates/casting-reference.md +104 -0
- package/templates/cooperative-rate-limiting.md +229 -0
- package/templates/issue-lifecycle.md +412 -0
- package/templates/keda-scaler.md +164 -0
- package/templates/machine-capabilities.md +75 -0
- package/templates/mcp-config.md +0 -8
- package/templates/orchestration-log.md +27 -27
- package/templates/package.json +3 -0
- package/templates/ralph-circuit-breaker.md +313 -0
- package/templates/ralph-triage.js +543 -0
- package/templates/routing.md +5 -20
- package/templates/schedule.json +19 -0
- package/templates/scribe-charter.md +1 -1
- package/templates/skills/agent-collaboration/SKILL.md +42 -0
- package/templates/skills/agent-conduct/SKILL.md +24 -0
- package/templates/skills/architectural-proposals/SKILL.md +151 -0
- package/templates/skills/ci-validation-gates/SKILL.md +84 -0
- package/templates/skills/cli-wiring/SKILL.md +47 -0
- package/templates/skills/client-compatibility/SKILL.md +89 -0
- package/templates/skills/cross-squad/SKILL.md +114 -0
- package/templates/skills/distributed-mesh/SKILL.md +287 -0
- package/templates/skills/distributed-mesh/mesh.json.example +30 -0
- package/templates/skills/distributed-mesh/sync-mesh.ps1 +111 -0
- package/templates/skills/distributed-mesh/sync-mesh.sh +104 -0
- package/templates/skills/docs-standards/SKILL.md +71 -0
- package/templates/skills/economy-mode/SKILL.md +114 -0
- package/templates/skills/external-comms/SKILL.md +329 -0
- package/templates/skills/gh-auth-isolation/SKILL.md +183 -0
- package/templates/skills/git-workflow/SKILL.md +204 -0
- package/templates/skills/github-multi-account/SKILL.md +95 -0
- package/templates/skills/history-hygiene/SKILL.md +36 -0
- package/templates/skills/humanizer/SKILL.md +105 -0
- package/templates/skills/init-mode/SKILL.md +102 -0
- package/templates/skills/model-selection/SKILL.md +117 -0
- package/templates/skills/nap/SKILL.md +24 -0
- package/templates/skills/personal-squad/SKILL.md +57 -0
- package/templates/skills/release-process/SKILL.md +423 -0
- package/templates/skills/reskill/SKILL.md +92 -0
- package/templates/skills/reviewer-protocol/SKILL.md +79 -0
- package/templates/skills/secret-handling/SKILL.md +200 -0
- package/templates/skills/session-recovery/SKILL.md +155 -0
- package/templates/skills/squad-conventions/SKILL.md +69 -0
- package/templates/skills/test-discipline/SKILL.md +37 -0
- package/templates/skills/windows-compatibility/SKILL.md +74 -0
- package/templates/squad.agent.md +1287 -1146
- package/templates/workflows/squad-docs.yml +8 -4
- package/templates/workflows/squad-heartbeat.yml +55 -200
- package/templates/workflows/squad-insider-release.yml +1 -1
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# KEDA External Scaler for GitHub Issue-Driven Agent Autoscaling
|
|
2
|
+
|
|
3
|
+
> Scale agent pods to zero when idle, up when work arrives — driven by GitHub Issues.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
When running Squad on Kubernetes, agent pods sit idle when no work exists. [KEDA](https://keda.sh) (Kubernetes Event-Driven Autoscaler) solves this for queue-based workloads, but GitHub Issues isn't a native KEDA trigger.
|
|
8
|
+
|
|
9
|
+
The `keda-copilot-scaler` is a KEDA External Scaler (gRPC) that bridges this gap:
|
|
10
|
+
1. Polls GitHub API for issues matching specific labels (e.g., `squad:copilot`)
|
|
11
|
+
2. Reports queue depth as a KEDA metric
|
|
12
|
+
3. Handles rate limits gracefully (Retry-After, exponential backoff)
|
|
13
|
+
4. Supports composite scaling decisions
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
### Prerequisites
|
|
18
|
+
- Kubernetes cluster with KEDA v2.x installed
|
|
19
|
+
- GitHub personal access token (PAT) with `repo` scope
|
|
20
|
+
- Helm 3.x
|
|
21
|
+
|
|
22
|
+
### 1. Install the Scaler
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
helm install keda-copilot-scaler oci://ghcr.io/tamirdresher/keda-copilot-scaler \
|
|
26
|
+
--namespace squad-scaler --create-namespace \
|
|
27
|
+
--set github.owner=YOUR_ORG \
|
|
28
|
+
--set github.repo=YOUR_REPO \
|
|
29
|
+
--set github.token=YOUR_TOKEN
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Or with Kustomize:
|
|
33
|
+
```bash
|
|
34
|
+
kubectl apply -k https://github.com/tamirdresher/keda-copilot-scaler/deploy/kustomize
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### 2. Create a ScaledObject
|
|
38
|
+
|
|
39
|
+
```yaml
|
|
40
|
+
apiVersion: keda.sh/v1alpha1
|
|
41
|
+
kind: ScaledObject
|
|
42
|
+
metadata:
|
|
43
|
+
name: picard-scaler
|
|
44
|
+
namespace: squad
|
|
45
|
+
spec:
|
|
46
|
+
scaleTargetRef:
|
|
47
|
+
name: picard-deployment
|
|
48
|
+
minReplicaCount: 0 # Scale to zero when idle
|
|
49
|
+
maxReplicaCount: 3
|
|
50
|
+
pollingInterval: 30 # Check every 30 seconds
|
|
51
|
+
cooldownPeriod: 300 # Wait 5 minutes before scaling down
|
|
52
|
+
triggers:
|
|
53
|
+
- type: external
|
|
54
|
+
metadata:
|
|
55
|
+
scalerAddress: keda-copilot-scaler.squad-scaler.svc.cluster.local:6000
|
|
56
|
+
owner: your-org
|
|
57
|
+
repo: your-repo
|
|
58
|
+
labels: squad:copilot # Only count issues with this label
|
|
59
|
+
threshold: "1" # Scale up when >= 1 issue exists
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### 3. Verify
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
# Check the scaler is running
|
|
66
|
+
kubectl get pods -n squad-scaler
|
|
67
|
+
|
|
68
|
+
# Check ScaledObject status
|
|
69
|
+
kubectl get scaledobject picard-scaler -n squad
|
|
70
|
+
|
|
71
|
+
# Watch scaling events
|
|
72
|
+
kubectl get events -n squad --watch
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Scaling Behavior
|
|
76
|
+
|
|
77
|
+
| Open Issues | Target Replicas | Behavior |
|
|
78
|
+
|------------|----------------|----------|
|
|
79
|
+
| 0 | 0 | Scale to zero — save resources |
|
|
80
|
+
| 1–3 | 1 | Single agent handles work |
|
|
81
|
+
| 4–10 | 2 | Scale up for parallel processing |
|
|
82
|
+
| 10+ | 3 (max) | Maximum parallelism |
|
|
83
|
+
|
|
84
|
+
The threshold and max replicas are configurable per ScaledObject.
|
|
85
|
+
|
|
86
|
+
## Rate Limit Awareness
|
|
87
|
+
|
|
88
|
+
The scaler tracks GitHub API rate limits:
|
|
89
|
+
- Reads `X-RateLimit-Remaining` from API responses
|
|
90
|
+
- Backs off when quota is low (< 100 remaining)
|
|
91
|
+
- Reports rate limit metrics as secondary KEDA triggers
|
|
92
|
+
- Never exhausts API quota from polling
|
|
93
|
+
|
|
94
|
+
## Integration with Squad
|
|
95
|
+
|
|
96
|
+
### Machine Capabilities (#514)
|
|
97
|
+
|
|
98
|
+
Combine with machine capability labels for intelligent scheduling:
|
|
99
|
+
|
|
100
|
+
```yaml
|
|
101
|
+
# Only scale pods on GPU-capable nodes
|
|
102
|
+
spec:
|
|
103
|
+
template:
|
|
104
|
+
spec:
|
|
105
|
+
nodeSelector:
|
|
106
|
+
node.squad.dev/gpu: "true"
|
|
107
|
+
triggers:
|
|
108
|
+
- type: external
|
|
109
|
+
metadata:
|
|
110
|
+
labels: squad:copilot,needs:gpu
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Cooperative Rate Limiting (#515)
|
|
114
|
+
|
|
115
|
+
The scaler exposes rate limit metrics that feed into the cooperative rate limiting system:
|
|
116
|
+
- Current `X-RateLimit-Remaining` value
|
|
117
|
+
- Predicted time to exhaustion (from predictive circuit breaker)
|
|
118
|
+
- Can return 0 target replicas when rate limited → pods scale to zero
|
|
119
|
+
|
|
120
|
+
## Architecture
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
GitHub API KEDA Kubernetes
|
|
124
|
+
┌──────────┐ ┌──────────┐ ┌──────────────┐
|
|
125
|
+
│ Issues │◄── poll ──►│ Scaler │──metrics─►│ HPA / KEDA │
|
|
126
|
+
│ (REST) │ │ (gRPC) │ │ Controller │
|
|
127
|
+
└──────────┘ └──────────┘ └──────┬───────┘
|
|
128
|
+
│
|
|
129
|
+
scale up/down
|
|
130
|
+
│
|
|
131
|
+
┌──────▼───────┐
|
|
132
|
+
│ Agent Pods │
|
|
133
|
+
│ (0–N replicas)│
|
|
134
|
+
└──────────────┘
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Configuration Reference
|
|
138
|
+
|
|
139
|
+
| Parameter | Default | Description |
|
|
140
|
+
|-----------|---------|-------------|
|
|
141
|
+
| `github.owner` | — | Repository owner |
|
|
142
|
+
| `github.repo` | — | Repository name |
|
|
143
|
+
| `github.token` | — | GitHub PAT with `repo` scope |
|
|
144
|
+
| `github.labels` | `squad:copilot` | Comma-separated label filter |
|
|
145
|
+
| `scaler.port` | `6000` | gRPC server port |
|
|
146
|
+
| `scaler.pollInterval` | `30s` | GitHub API polling interval |
|
|
147
|
+
| `scaler.rateLimitThreshold` | `100` | Stop polling below this remaining |
|
|
148
|
+
|
|
149
|
+
## Source & Contributing
|
|
150
|
+
|
|
151
|
+
- **Repository:** [tamirdresher/keda-copilot-scaler](https://github.com/tamirdresher/keda-copilot-scaler)
|
|
152
|
+
- **License:** MIT
|
|
153
|
+
- **Language:** Go
|
|
154
|
+
- **Tests:** 51 passing (unit + integration)
|
|
155
|
+
- **CI:** GitHub Actions
|
|
156
|
+
|
|
157
|
+
The scaler is maintained as a standalone project. PRs and issues welcome.
|
|
158
|
+
|
|
159
|
+
## References
|
|
160
|
+
|
|
161
|
+
- [KEDA External Scalers](https://keda.sh/docs/latest/concepts/external-scalers/) — KEDA documentation
|
|
162
|
+
- [Squad on AKS](https://github.com/tamirdresher/squad-on-aks) — Full Kubernetes deployment example
|
|
163
|
+
- [Machine Capabilities](machine-capabilities.md) — Capability-based routing (#514)
|
|
164
|
+
- [Cooperative Rate Limiting](cooperative-rate-limiting.md) — Multi-agent rate management (#515)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Machine Capability Discovery & Label-Based Routing
|
|
2
|
+
|
|
3
|
+
> Enable Ralph to skip issues requiring capabilities the current machine lacks.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
When running Squad across multiple machines (laptops, DevBoxes, GPU servers, Kubernetes nodes), each machine has different tooling. The capability system lets you declare what each machine can do, and Ralph automatically routes work accordingly.
|
|
8
|
+
|
|
9
|
+
## Setup
|
|
10
|
+
|
|
11
|
+
### 1. Create a Capabilities Manifest
|
|
12
|
+
|
|
13
|
+
Create `~/.squad/machine-capabilities.json` (user-wide) or `.squad/machine-capabilities.json` (project-local):
|
|
14
|
+
|
|
15
|
+
```json
|
|
16
|
+
{
|
|
17
|
+
"machine": "MY-LAPTOP",
|
|
18
|
+
"capabilities": ["browser", "personal-gh", "onedrive"],
|
|
19
|
+
"missing": ["gpu", "docker", "azure-speech"],
|
|
20
|
+
"lastUpdated": "2026-03-22T00:00:00Z"
|
|
21
|
+
}
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### 2. Label Issues with Requirements
|
|
25
|
+
|
|
26
|
+
Add `needs:*` labels to issues that require specific capabilities:
|
|
27
|
+
|
|
28
|
+
| Label | Meaning |
|
|
29
|
+
|-------|---------|
|
|
30
|
+
| `needs:browser` | Requires Playwright / browser automation |
|
|
31
|
+
| `needs:gpu` | Requires NVIDIA GPU |
|
|
32
|
+
| `needs:personal-gh` | Requires personal GitHub account |
|
|
33
|
+
| `needs:emu-gh` | Requires Enterprise Managed User account |
|
|
34
|
+
| `needs:azure-cli` | Requires authenticated Azure CLI |
|
|
35
|
+
| `needs:docker` | Requires Docker daemon |
|
|
36
|
+
| `needs:onedrive` | Requires OneDrive sync |
|
|
37
|
+
| `needs:teams-mcp` | Requires Teams MCP tools |
|
|
38
|
+
|
|
39
|
+
Custom capabilities are supported — any `needs:X` label works if `X` is in the machine's `capabilities` array.
|
|
40
|
+
|
|
41
|
+
### 3. Run Ralph
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
squad watch --interval 5
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Ralph will log skipped issues:
|
|
48
|
+
```
|
|
49
|
+
⏭️ Skipping #42 "Train ML model" — missing: gpu
|
|
50
|
+
✓ Triaged #43 "Fix CSS layout" → Picard (routing-rule)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## How It Works
|
|
54
|
+
|
|
55
|
+
1. Ralph loads `machine-capabilities.json` at startup
|
|
56
|
+
2. For each open issue, Ralph extracts `needs:*` labels
|
|
57
|
+
3. If any required capability is missing, the issue is skipped
|
|
58
|
+
4. Issues without `needs:*` labels are always processed (opt-in system)
|
|
59
|
+
|
|
60
|
+
## Kubernetes Integration
|
|
61
|
+
|
|
62
|
+
On Kubernetes, machine capabilities map to node labels:
|
|
63
|
+
|
|
64
|
+
```yaml
|
|
65
|
+
# Node labels (set by capability DaemonSet or manually)
|
|
66
|
+
node.squad.dev/gpu: "true"
|
|
67
|
+
node.squad.dev/browser: "true"
|
|
68
|
+
|
|
69
|
+
# Pod spec uses nodeSelector
|
|
70
|
+
spec:
|
|
71
|
+
nodeSelector:
|
|
72
|
+
node.squad.dev/gpu: "true"
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
A DaemonSet can run capability discovery on each node and maintain labels automatically. See the [squad-on-aks](https://github.com/tamirdresher/squad-on-aks) project for a complete Kubernetes deployment example.
|
package/templates/mcp-config.md
CHANGED
|
@@ -4,14 +4,6 @@ MCP (Model Context Protocol) servers extend Squad with tools for external servic
|
|
|
4
4
|
|
|
5
5
|
> **Full patterns:** Read `.squad/skills/mcp-tool-discovery/SKILL.md` for discovery patterns, domain-specific usage, and graceful degradation.
|
|
6
6
|
|
|
7
|
-
## Security Considerations
|
|
8
|
-
|
|
9
|
-
> ⚠️ **Important:** The sample configs below use `npx -y` to run MCP server packages without version pinning. For production use:
|
|
10
|
-
> - **Pin versions:** Use `npx -y @trello/mcp-server@1.2.3` instead of bare package names
|
|
11
|
-
> - **Audit packages:** Review MCP server source code before granting access to credentials
|
|
12
|
-
> - **Use least-privilege tokens:** Create tokens with minimal required scopes
|
|
13
|
-
> - **Consider local installs:** Install packages locally (`npm install`) rather than fetching on each run
|
|
14
|
-
|
|
15
7
|
## Config File Locations
|
|
16
8
|
|
|
17
9
|
Users configure MCP servers at these locations (checked in priority order):
|
|
@@ -1,27 +1,27 @@
|
|
|
1
|
-
# Orchestration Log Entry
|
|
2
|
-
|
|
3
|
-
> One file per agent spawn. Saved to `.squad/orchestration-log/{timestamp}-{agent-name}.md`
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
### {timestamp} — {task summary}
|
|
8
|
-
|
|
9
|
-
| Field | Value |
|
|
10
|
-
|-------|-------|
|
|
11
|
-
| **Agent routed** | {Name} ({Role}) |
|
|
12
|
-
| **Why chosen** | {Routing rationale — what in the request matched this agent} |
|
|
13
|
-
| **Mode** | {`background` / `sync`} |
|
|
14
|
-
| **Why this mode** | {Brief reason — e.g., "No hard data dependencies" or "User needs to approve architecture"} |
|
|
15
|
-
| **Files authorized to read** | {Exact file paths the agent was told to read} |
|
|
16
|
-
| **File(s) agent must produce** | {Exact file paths the agent is expected to create or modify} |
|
|
17
|
-
| **Outcome** | {Completed / Rejected by {Reviewer} / Escalated} |
|
|
18
|
-
|
|
19
|
-
---
|
|
20
|
-
|
|
21
|
-
## Rules
|
|
22
|
-
|
|
23
|
-
1. **One file per agent spawn.** Named `{timestamp}-{agent-name}.md`.
|
|
24
|
-
2. **Log BEFORE spawning.** The entry must exist before the agent runs.
|
|
25
|
-
3. **Update outcome AFTER the agent completes.** Fill in the Outcome field.
|
|
26
|
-
4. **Never delete or edit past entries.** Append-only.
|
|
27
|
-
5. **If a reviewer rejects work,** log the rejection as a new entry with the revision agent.
|
|
1
|
+
# Orchestration Log Entry
|
|
2
|
+
|
|
3
|
+
> One file per agent spawn. Saved to `.squad/orchestration-log/{timestamp}-{agent-name}.md`
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
### {timestamp} — {task summary}
|
|
8
|
+
|
|
9
|
+
| Field | Value |
|
|
10
|
+
|-------|-------|
|
|
11
|
+
| **Agent routed** | {Name} ({Role}) |
|
|
12
|
+
| **Why chosen** | {Routing rationale — what in the request matched this agent} |
|
|
13
|
+
| **Mode** | {`background` / `sync`} |
|
|
14
|
+
| **Why this mode** | {Brief reason — e.g., "No hard data dependencies" or "User needs to approve architecture"} |
|
|
15
|
+
| **Files authorized to read** | {Exact file paths the agent was told to read} |
|
|
16
|
+
| **File(s) agent must produce** | {Exact file paths the agent is expected to create or modify} |
|
|
17
|
+
| **Outcome** | {Completed / Rejected by {Reviewer} / Escalated} |
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Rules
|
|
22
|
+
|
|
23
|
+
1. **One file per agent spawn.** Named `{timestamp}-{agent-name}.md`.
|
|
24
|
+
2. **Log BEFORE spawning.** The entry must exist before the agent runs.
|
|
25
|
+
3. **Update outcome AFTER the agent completes.** Fill in the Outcome field.
|
|
26
|
+
4. **Never delete or edit past entries.** Append-only.
|
|
27
|
+
5. **If a reviewer rejects work,** log the rejection as a new entry with the revision agent.
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
# Ralph Circuit Breaker — Model Rate Limit Fallback
|
|
2
|
+
|
|
3
|
+
> Classic circuit breaker pattern (Hystrix / Polly / Resilience4j) applied to Copilot model selection.
|
|
4
|
+
> When the preferred model hits rate limits, Ralph automatically degrades to free-tier models, then self-heals.
|
|
5
|
+
|
|
6
|
+
## Problem
|
|
7
|
+
|
|
8
|
+
When running multiple Ralph instances across repos, Copilot model rate limits cause cascading failures.
|
|
9
|
+
All Ralphs fail simultaneously when the preferred model (e.g., `claude-sonnet-4.6`) hits quota.
|
|
10
|
+
|
|
11
|
+
Premium models burn quota fast:
|
|
12
|
+
| Model | Multiplier | Risk |
|
|
13
|
+
|-------|-----------|------|
|
|
14
|
+
| `claude-sonnet-4.6` | 1x | Moderate with many Ralphs |
|
|
15
|
+
| `claude-opus-4.6` | 10x | High |
|
|
16
|
+
| `gpt-5.4` | 50x | Very high |
|
|
17
|
+
| `gpt-5.4-mini` | **0x** | **Free — unlimited** |
|
|
18
|
+
| `gpt-5-mini` | **0x** | **Free — unlimited** |
|
|
19
|
+
| `gpt-4.1` | **0x** | **Free — unlimited** |
|
|
20
|
+
|
|
21
|
+
## Circuit Breaker States
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
┌─────────┐ rate limit error ┌────────┐
|
|
25
|
+
│ CLOSED │ ───────────────────► │ OPEN │
|
|
26
|
+
│ (normal)│ │(fallback)│
|
|
27
|
+
└────┬────┘ ◄──────────────── └────┬────┘
|
|
28
|
+
│ 2 consecutive │
|
|
29
|
+
│ successes │ cooldown expires
|
|
30
|
+
│ ▼
|
|
31
|
+
│ ┌──────────┐
|
|
32
|
+
└───── success ◄──────── │HALF-OPEN │
|
|
33
|
+
(close) │ (testing) │
|
|
34
|
+
└──────────┘
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### CLOSED (normal operation)
|
|
38
|
+
- Use preferred model from config
|
|
39
|
+
- Every successful response confirms circuit stays closed
|
|
40
|
+
- On rate limit error → transition to OPEN
|
|
41
|
+
|
|
42
|
+
### OPEN (rate limited — fallback active)
|
|
43
|
+
- Fall back through the free-tier model chain:
|
|
44
|
+
1. `gpt-5.4-mini`
|
|
45
|
+
2. `gpt-5-mini`
|
|
46
|
+
3. `gpt-4.1`
|
|
47
|
+
- Start cooldown timer (default: 10 minutes)
|
|
48
|
+
- When cooldown expires → transition to HALF-OPEN
|
|
49
|
+
|
|
50
|
+
### HALF-OPEN (testing recovery)
|
|
51
|
+
- Try preferred model again
|
|
52
|
+
- If 2 consecutive successes → transition to CLOSED
|
|
53
|
+
- If rate limit error → back to OPEN, reset cooldown
|
|
54
|
+
|
|
55
|
+
## State File: `.squad/ralph-circuit-breaker.json`
|
|
56
|
+
|
|
57
|
+
```json
|
|
58
|
+
{
|
|
59
|
+
"state": "closed",
|
|
60
|
+
"preferredModel": "claude-sonnet-4.6",
|
|
61
|
+
"fallbackChain": ["gpt-5.4-mini", "gpt-5-mini", "gpt-4.1"],
|
|
62
|
+
"currentFallbackIndex": 0,
|
|
63
|
+
"cooldownMinutes": 10,
|
|
64
|
+
"openedAt": null,
|
|
65
|
+
"halfOpenSuccesses": 0,
|
|
66
|
+
"consecutiveFailures": 0,
|
|
67
|
+
"metrics": {
|
|
68
|
+
"totalFallbacks": 0,
|
|
69
|
+
"totalRecoveries": 0,
|
|
70
|
+
"lastFallbackAt": null,
|
|
71
|
+
"lastRecoveryAt": null
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## PowerShell Functions
|
|
77
|
+
|
|
78
|
+
Paste these into your `ralph-watch.ps1` or source them from a shared module.
|
|
79
|
+
|
|
80
|
+
### `Get-CircuitBreakerState`
|
|
81
|
+
|
|
82
|
+
```powershell
|
|
83
|
+
function Get-CircuitBreakerState {
|
|
84
|
+
param([string]$StateFile = ".squad/ralph-circuit-breaker.json")
|
|
85
|
+
|
|
86
|
+
if (-not (Test-Path $StateFile)) {
|
|
87
|
+
$default = @{
|
|
88
|
+
state = "closed"
|
|
89
|
+
preferredModel = "claude-sonnet-4.6"
|
|
90
|
+
fallbackChain = @("gpt-5.4-mini", "gpt-5-mini", "gpt-4.1")
|
|
91
|
+
currentFallbackIndex = 0
|
|
92
|
+
cooldownMinutes = 10
|
|
93
|
+
openedAt = $null
|
|
94
|
+
halfOpenSuccesses = 0
|
|
95
|
+
consecutiveFailures = 0
|
|
96
|
+
metrics = @{
|
|
97
|
+
totalFallbacks = 0
|
|
98
|
+
totalRecoveries = 0
|
|
99
|
+
lastFallbackAt = $null
|
|
100
|
+
lastRecoveryAt = $null
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
$default | ConvertTo-Json -Depth 3 | Set-Content $StateFile
|
|
104
|
+
return $default
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return (Get-Content $StateFile -Raw | ConvertFrom-Json)
|
|
108
|
+
}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### `Save-CircuitBreakerState`
|
|
112
|
+
|
|
113
|
+
```powershell
|
|
114
|
+
function Save-CircuitBreakerState {
|
|
115
|
+
param(
|
|
116
|
+
[object]$State,
|
|
117
|
+
[string]$StateFile = ".squad/ralph-circuit-breaker.json"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
$State | ConvertTo-Json -Depth 3 | Set-Content $StateFile
|
|
121
|
+
}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### `Get-CurrentModel`
|
|
125
|
+
|
|
126
|
+
Returns the model Ralph should use right now, based on circuit state.
|
|
127
|
+
|
|
128
|
+
```powershell
|
|
129
|
+
function Get-CurrentModel {
|
|
130
|
+
param([string]$StateFile = ".squad/ralph-circuit-breaker.json")
|
|
131
|
+
|
|
132
|
+
$cb = Get-CircuitBreakerState -StateFile $StateFile
|
|
133
|
+
|
|
134
|
+
switch ($cb.state) {
|
|
135
|
+
"closed" {
|
|
136
|
+
return $cb.preferredModel
|
|
137
|
+
}
|
|
138
|
+
"open" {
|
|
139
|
+
# Check if cooldown has expired
|
|
140
|
+
if ($cb.openedAt) {
|
|
141
|
+
$opened = [DateTime]::Parse($cb.openedAt)
|
|
142
|
+
$elapsed = (Get-Date) - $opened
|
|
143
|
+
if ($elapsed.TotalMinutes -ge $cb.cooldownMinutes) {
|
|
144
|
+
# Transition to half-open
|
|
145
|
+
$cb.state = "half-open"
|
|
146
|
+
$cb.halfOpenSuccesses = 0
|
|
147
|
+
Save-CircuitBreakerState -State $cb -StateFile $StateFile
|
|
148
|
+
Write-Host " [circuit-breaker] Cooldown expired. Testing preferred model..." -ForegroundColor Yellow
|
|
149
|
+
return $cb.preferredModel
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
# Still in cooldown — use fallback
|
|
153
|
+
$idx = [Math]::Min($cb.currentFallbackIndex, $cb.fallbackChain.Count - 1)
|
|
154
|
+
return $cb.fallbackChain[$idx]
|
|
155
|
+
}
|
|
156
|
+
"half-open" {
|
|
157
|
+
return $cb.preferredModel
|
|
158
|
+
}
|
|
159
|
+
default {
|
|
160
|
+
return $cb.preferredModel
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### `Update-CircuitBreakerOnSuccess`
|
|
167
|
+
|
|
168
|
+
Call after every successful model response.
|
|
169
|
+
|
|
170
|
+
```powershell
|
|
171
|
+
function Update-CircuitBreakerOnSuccess {
|
|
172
|
+
param([string]$StateFile = ".squad/ralph-circuit-breaker.json")
|
|
173
|
+
|
|
174
|
+
$cb = Get-CircuitBreakerState -StateFile $StateFile
|
|
175
|
+
$cb.consecutiveFailures = 0
|
|
176
|
+
|
|
177
|
+
if ($cb.state -eq "half-open") {
|
|
178
|
+
$cb.halfOpenSuccesses++
|
|
179
|
+
if ($cb.halfOpenSuccesses -ge 2) {
|
|
180
|
+
# Recovery! Close the circuit
|
|
181
|
+
$cb.state = "closed"
|
|
182
|
+
$cb.openedAt = $null
|
|
183
|
+
$cb.halfOpenSuccesses = 0
|
|
184
|
+
$cb.currentFallbackIndex = 0
|
|
185
|
+
$cb.metrics.totalRecoveries++
|
|
186
|
+
$cb.metrics.lastRecoveryAt = (Get-Date).ToString("o")
|
|
187
|
+
Save-CircuitBreakerState -State $cb -StateFile $StateFile
|
|
188
|
+
Write-Host " [circuit-breaker] RECOVERED — back to preferred model ($($cb.preferredModel))" -ForegroundColor Green
|
|
189
|
+
return
|
|
190
|
+
}
|
|
191
|
+
Save-CircuitBreakerState -State $cb -StateFile $StateFile
|
|
192
|
+
Write-Host " [circuit-breaker] Half-open success $($cb.halfOpenSuccesses)/2" -ForegroundColor Yellow
|
|
193
|
+
return
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
# closed state — nothing to do
|
|
197
|
+
}
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### `Update-CircuitBreakerOnRateLimit`
|
|
201
|
+
|
|
202
|
+
Call when a model response indicates rate limiting (HTTP 429 or error message containing "rate limit").
|
|
203
|
+
|
|
204
|
+
```powershell
|
|
205
|
+
function Update-CircuitBreakerOnRateLimit {
|
|
206
|
+
param([string]$StateFile = ".squad/ralph-circuit-breaker.json")
|
|
207
|
+
|
|
208
|
+
$cb = Get-CircuitBreakerState -StateFile $StateFile
|
|
209
|
+
$cb.consecutiveFailures++
|
|
210
|
+
|
|
211
|
+
if ($cb.state -eq "closed" -or $cb.state -eq "half-open") {
|
|
212
|
+
# Open the circuit
|
|
213
|
+
$cb.state = "open"
|
|
214
|
+
$cb.openedAt = (Get-Date).ToString("o")
|
|
215
|
+
$cb.halfOpenSuccesses = 0
|
|
216
|
+
$cb.currentFallbackIndex = 0
|
|
217
|
+
$cb.metrics.totalFallbacks++
|
|
218
|
+
$cb.metrics.lastFallbackAt = (Get-Date).ToString("o")
|
|
219
|
+
Save-CircuitBreakerState -State $cb -StateFile $StateFile
|
|
220
|
+
|
|
221
|
+
$fallbackModel = $cb.fallbackChain[0]
|
|
222
|
+
Write-Host " [circuit-breaker] RATE LIMITED — falling back to $fallbackModel (cooldown: $($cb.cooldownMinutes)m)" -ForegroundColor Red
|
|
223
|
+
return
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
if ($cb.state -eq "open") {
|
|
227
|
+
# Already open — try next fallback in chain if current one also fails
|
|
228
|
+
if ($cb.currentFallbackIndex -lt ($cb.fallbackChain.Count - 1)) {
|
|
229
|
+
$cb.currentFallbackIndex++
|
|
230
|
+
$nextModel = $cb.fallbackChain[$cb.currentFallbackIndex]
|
|
231
|
+
Write-Host " [circuit-breaker] Fallback also limited — trying $nextModel" -ForegroundColor Red
|
|
232
|
+
}
|
|
233
|
+
# Reset cooldown timer
|
|
234
|
+
$cb.openedAt = (Get-Date).ToString("o")
|
|
235
|
+
Save-CircuitBreakerState -State $cb -StateFile $StateFile
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
## Integration with ralph-watch.ps1
|
|
241
|
+
|
|
242
|
+
In your Ralph polling loop, wrap the model selection:
|
|
243
|
+
|
|
244
|
+
```powershell
|
|
245
|
+
# At the top of your polling loop
|
|
246
|
+
$model = Get-CurrentModel
|
|
247
|
+
|
|
248
|
+
# When invoking copilot CLI
|
|
249
|
+
$result = copilot-cli --model $model ...
|
|
250
|
+
|
|
251
|
+
# After the call
|
|
252
|
+
if ($result -match "rate.?limit" -or $LASTEXITCODE -eq 429) {
|
|
253
|
+
Update-CircuitBreakerOnRateLimit
|
|
254
|
+
} else {
|
|
255
|
+
Update-CircuitBreakerOnSuccess
|
|
256
|
+
}
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### Full integration example
|
|
260
|
+
|
|
261
|
+
```powershell
|
|
262
|
+
# Source the circuit breaker functions
|
|
263
|
+
. .squad-templates/ralph-circuit-breaker-functions.ps1
|
|
264
|
+
|
|
265
|
+
while ($true) {
|
|
266
|
+
$model = Get-CurrentModel
|
|
267
|
+
Write-Host "Polling with model: $model"
|
|
268
|
+
|
|
269
|
+
try {
|
|
270
|
+
# Your existing Ralph logic here, but pass $model
|
|
271
|
+
$response = Invoke-RalphCycle -Model $model
|
|
272
|
+
|
|
273
|
+
# Success path
|
|
274
|
+
Update-CircuitBreakerOnSuccess
|
|
275
|
+
}
|
|
276
|
+
catch {
|
|
277
|
+
if ($_.Exception.Message -match "rate.?limit|429|quota|Too Many Requests") {
|
|
278
|
+
Update-CircuitBreakerOnRateLimit
|
|
279
|
+
# Retry immediately with fallback model
|
|
280
|
+
continue
|
|
281
|
+
}
|
|
282
|
+
# Other errors — handle normally
|
|
283
|
+
throw
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
Start-Sleep -Seconds $pollInterval
|
|
287
|
+
}
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## Configuration
|
|
291
|
+
|
|
292
|
+
Override defaults by editing `.squad/ralph-circuit-breaker.json`:
|
|
293
|
+
|
|
294
|
+
| Field | Default | Description |
|
|
295
|
+
|-------|---------|-------------|
|
|
296
|
+
| `preferredModel` | `claude-sonnet-4.6` | Model to use when circuit is closed |
|
|
297
|
+
| `fallbackChain` | `["gpt-5.4-mini", "gpt-5-mini", "gpt-4.1"]` | Ordered fallback models (all free-tier) |
|
|
298
|
+
| `cooldownMinutes` | `10` | How long to wait before testing recovery |
|
|
299
|
+
|
|
300
|
+
## Metrics
|
|
301
|
+
|
|
302
|
+
The state file tracks operational metrics:
|
|
303
|
+
|
|
304
|
+
- **totalFallbacks** — How many times the circuit opened
|
|
305
|
+
- **totalRecoveries** — How many times it recovered to preferred model
|
|
306
|
+
- **lastFallbackAt** — ISO timestamp of last rate limit event
|
|
307
|
+
- **lastRecoveryAt** — ISO timestamp of last successful recovery
|
|
308
|
+
|
|
309
|
+
Query metrics with:
|
|
310
|
+
```powershell
|
|
311
|
+
$cb = Get-Content .squad/ralph-circuit-breaker.json | ConvertFrom-Json
|
|
312
|
+
Write-Host "Fallbacks: $($cb.metrics.totalFallbacks) | Recoveries: $($cb.metrics.totalRecoveries)"
|
|
313
|
+
```
|