switchroom 0.13.2 → 0.13.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +2 -2
- package/dist/auth-broker/index.js +2 -2
- package/dist/cli/switchroom.js +21 -18
- package/dist/host-control/main.js +2 -2
- package/dist/vault/approvals/kernel-server.js +2 -2
- package/dist/vault/broker/server.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +42 -10
- package/telegram-plugin/gateway/boot-probes.ts +13 -6
- package/telegram-plugin/gateway/gateway.ts +44 -6
- package/telegram-plugin/hooks/silent-end-interrupt-stop.mjs +5 -1
- package/telegram-plugin/silent-end.ts +56 -0
- package/telegram-plugin/tests/boot-probes.test.ts +26 -2
- package/telegram-plugin/tests/silent-end.test.ts +69 -0
- package/skills/buildkite-agent-infrastructure/SKILL.md +0 -321
- package/skills/buildkite-agent-infrastructure/agents/openai.yaml +0 -6
- package/skills/buildkite-agent-infrastructure/assets/buildkite-icon-large.png +0 -0
- package/skills/buildkite-agent-infrastructure/assets/buildkite-icon-small.png +0 -0
- package/skills/buildkite-agent-infrastructure/references/audit-logging.md +0 -87
- package/skills/buildkite-agent-infrastructure/references/graphql-mutations.md +0 -690
- package/skills/buildkite-agent-infrastructure/references/instance-shapes.md +0 -38
- package/skills/buildkite-agent-infrastructure/references/pipeline-templates.md +0 -73
- package/skills/buildkite-agent-infrastructure/references/self-hosted-agents.md +0 -137
- package/skills/buildkite-agent-infrastructure/references/sso-saml.md +0 -92
- package/skills/buildkite-agent-runtime/SKILL.md +0 -509
- package/skills/buildkite-agent-runtime/agents/openai.yaml +0 -6
- package/skills/buildkite-agent-runtime/assets/buildkite-icon-large.png +0 -0
- package/skills/buildkite-agent-runtime/assets/buildkite-icon-small.png +0 -0
- package/skills/buildkite-agent-runtime/references/flag-reference.md +0 -417
- package/skills/buildkite-agent-runtime/references/patterns-and-recipes.md +0 -555
- package/skills/buildkite-api/SKILL.md +0 -308
- package/skills/buildkite-api/agents/openai.yaml +0 -6
- package/skills/buildkite-api/assets/buildkite-icon-large.png +0 -0
- package/skills/buildkite-api/assets/buildkite-icon-small.png +0 -0
- package/skills/buildkite-api/references/graphql-reference.md +0 -195
- package/skills/buildkite-api/references/patterns.md +0 -44
- package/skills/buildkite-api/references/webhooks.md +0 -161
- package/skills/buildkite-cli/SKILL.md +0 -397
- package/skills/buildkite-cli/agents/openai.yaml +0 -6
- package/skills/buildkite-cli/assets/buildkite-icon-large.png +0 -0
- package/skills/buildkite-cli/assets/buildkite-icon-small.png +0 -0
- package/skills/buildkite-cli/references/command-reference.md +0 -181
- package/skills/buildkite-migration/SKILL.md +0 -195
- package/skills/buildkite-pipelines/SKILL.md +0 -481
- package/skills/buildkite-pipelines/agents/openai.yaml +0 -6
- package/skills/buildkite-pipelines/assets/buildkite-icon-large.png +0 -0
- package/skills/buildkite-pipelines/assets/buildkite-icon-small.png +0 -0
- package/skills/buildkite-pipelines/examples/basic-pipeline.yml +0 -24
- package/skills/buildkite-pipelines/examples/optimized-pipeline.yml +0 -100
- package/skills/buildkite-pipelines/references/advanced-patterns.md +0 -286
- package/skills/buildkite-pipelines/references/retry-and-error-codes.md +0 -131
- package/skills/buildkite-pipelines/references/step-types-reference.md +0 -225
- package/skills/buildkite-secure-delivery/SKILL.md +0 -182
- package/skills/buildkite-secure-delivery/agents/openai.yaml +0 -6
- package/skills/buildkite-secure-delivery/assets/buildkite-icon-large.png +0 -0
- package/skills/buildkite-secure-delivery/assets/buildkite-icon-small.png +0 -0
- package/skills/buildkite-secure-delivery/references/oidc-cloud-providers.md +0 -83
- package/skills/buildkite-secure-delivery/references/package-publishing.md +0 -100
- package/skills/buildkite-test-engine/SKILL.md +0 -256
- package/skills/buildkite-test-engine/agents/openai.yaml +0 -6
- package/skills/buildkite-test-engine/assets/buildkite-icon-large.png +0 -0
- package/skills/buildkite-test-engine/assets/buildkite-icon-small.png +0 -0
- package/skills/buildkite-test-engine/examples/bktec-splitting.yml +0 -16
- package/skills/buildkite-test-engine/examples/collector-pipeline.yml +0 -11
- package/skills/buildkite-test-engine/references/collectors.md +0 -198
- package/skills/buildkite-test-engine/references/splitting-examples.md +0 -93
|
@@ -7,6 +7,8 @@ import {
|
|
|
7
7
|
writeSilentEndState,
|
|
8
8
|
clearSilentEndState,
|
|
9
9
|
readSilentEndState,
|
|
10
|
+
recordSilentTurnEnd,
|
|
11
|
+
SILENT_END_MAX_RETRIES,
|
|
10
12
|
} from '../silent-end.js'
|
|
11
13
|
|
|
12
14
|
let stateDir: string
|
|
@@ -118,6 +120,73 @@ describe('silent-end.ts — gateway state writer', () => {
|
|
|
118
120
|
})
|
|
119
121
|
})
|
|
120
122
|
|
|
123
|
+
describe('recordSilentTurnEnd — #1161 exhaustion detection', () => {
|
|
124
|
+
it('first silent-end of a turn writes state and reports exhausted:false', () => {
|
|
125
|
+
const r = recordSilentTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
126
|
+
expect(r.exhausted).toBe(false)
|
|
127
|
+
expect(readSilentEndState()).toMatchObject({ turnKey: 'c:_', retryCount: 0 })
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
it('reports exhausted:false while prior retryCount is still below the cap', () => {
|
|
131
|
+
// The Stop hook has not yet been able to push retryCount to the cap.
|
|
132
|
+
const path = join(stateDir, 'silent-end-pending.json')
|
|
133
|
+
writeFileSync(path, JSON.stringify({
|
|
134
|
+
chatId: 'c', threadId: null, turnKey: 'c:_',
|
|
135
|
+
retryCount: SILENT_END_MAX_RETRIES - 1, timestamp: 0,
|
|
136
|
+
}))
|
|
137
|
+
const r = recordSilentTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
138
|
+
expect(r.exhausted).toBe(false)
|
|
139
|
+
// State is (re)written, inheriting the prior counter for the same turn.
|
|
140
|
+
expect(readSilentEndState()!.retryCount).toBe(SILENT_END_MAX_RETRIES - 1)
|
|
141
|
+
})
|
|
142
|
+
|
|
143
|
+
it('reports exhausted:true and clears state once the re-prompt cap is reached', () => {
|
|
144
|
+
// The Stop hook already blocked once and pushed retryCount to the cap;
|
|
145
|
+
// the agent is STILL silent on this re-prompted turn.
|
|
146
|
+
const path = join(stateDir, 'silent-end-pending.json')
|
|
147
|
+
writeFileSync(path, JSON.stringify({
|
|
148
|
+
chatId: 'c', threadId: null, turnKey: 'c:_',
|
|
149
|
+
retryCount: SILENT_END_MAX_RETRIES, timestamp: 0,
|
|
150
|
+
}))
|
|
151
|
+
const r = recordSilentTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
152
|
+
expect(r.exhausted).toBe(true)
|
|
153
|
+
// State cleared so the Stop hook on this final turn allows the stop.
|
|
154
|
+
expect(readSilentEndState()).toBeNull()
|
|
155
|
+
})
|
|
156
|
+
|
|
157
|
+
it('treats a capped prior state for a DIFFERENT turn as a fresh silent-end', () => {
|
|
158
|
+
const path = join(stateDir, 'silent-end-pending.json')
|
|
159
|
+
writeFileSync(path, JSON.stringify({
|
|
160
|
+
chatId: 'old', threadId: null, turnKey: 'old:_',
|
|
161
|
+
retryCount: SILENT_END_MAX_RETRIES, timestamp: 0,
|
|
162
|
+
}))
|
|
163
|
+
const r = recordSilentTurnEnd({ chatId: 'new', threadId: 9, turnKey: 'new:9' })
|
|
164
|
+
expect(r.exhausted).toBe(false)
|
|
165
|
+
expect(readSilentEndState()).toMatchObject({ turnKey: 'new:9', retryCount: 0 })
|
|
166
|
+
})
|
|
167
|
+
|
|
168
|
+
it('full lifecycle: silent → re-prompt → still silent → exhausted', () => {
|
|
169
|
+
// 1. Turn ends silent — first record.
|
|
170
|
+
expect(recordSilentTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:_' }).exhausted).toBe(false)
|
|
171
|
+
// 2. Stop hook blocks and increments retryCount (simulated).
|
|
172
|
+
const path = join(stateDir, 'silent-end-pending.json')
|
|
173
|
+
const s = readSilentEndState()!
|
|
174
|
+
writeFileSync(path, JSON.stringify({ ...s, retryCount: s.retryCount + 1 }))
|
|
175
|
+
// 3. Re-prompted turn ends silent again — recovery exhausted.
|
|
176
|
+
expect(recordSilentTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:_' }).exhausted).toBe(true)
|
|
177
|
+
expect(readSilentEndState()).toBeNull()
|
|
178
|
+
})
|
|
179
|
+
|
|
180
|
+
it('SILENT_END_MAX_RETRIES matches MAX_RETRIES in the Stop hook', () => {
|
|
181
|
+
// The hook is a standalone .mjs and hardcodes its own copy — this
|
|
182
|
+
// guards the two from drifting apart.
|
|
183
|
+
const hookSrc = readFileSync(join(__dirname, '..', 'hooks', 'silent-end-interrupt-stop.mjs'), 'utf8')
|
|
184
|
+
const m = hookSrc.match(/const MAX_RETRIES = (\d+)/)
|
|
185
|
+
expect(m).not.toBeNull()
|
|
186
|
+
expect(Number(m![1])).toBe(SILENT_END_MAX_RETRIES)
|
|
187
|
+
})
|
|
188
|
+
})
|
|
189
|
+
|
|
121
190
|
describe('silent-end-interrupt-stop hook — integration', () => {
|
|
122
191
|
const hookPath = join(__dirname, '..', 'hooks', 'silent-end-interrupt-stop.mjs')
|
|
123
192
|
|
|
@@ -1,321 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: buildkite-agent-infrastructure
|
|
3
|
-
description: >
|
|
4
|
-
Buildkite cluster / organization / platform administration. Whenever
|
|
5
|
-
the user's message starts with the phrase "In Buildkite cluster
|
|
6
|
-
admin," — regardless of what follows — use this skill; that prefix
|
|
7
|
-
is a hard trigger that wins over `buildkite-api`, `buildkite-cli`,
|
|
8
|
-
and `buildkite-agent-runtime`. Provision and govern Buildkite CI
|
|
9
|
-
infrastructure: creating clusters, creating queues, scaling queues,
|
|
10
|
-
setting up hosted agents, right-sizing instance shapes, optimizing
|
|
11
|
-
CI costs, managing agent tokens, managing cluster secrets,
|
|
12
|
-
configuring SSO, setting up SAML, setting up audit logging, creating
|
|
13
|
-
pipeline templates, and standardizing pipelines across teams. Use
|
|
14
|
-
when the user says, verbatim: "set up SAML", "manage agent tokens",
|
|
15
|
-
"configure SSO", "set up audit logging", "Let's configure SSO.",
|
|
16
|
-
"I need to configure SSO.", "Could you scale queues for me?",
|
|
17
|
-
"Scale queues, please.", "scale queues", "Create a queue, please.",
|
|
18
|
-
"Create a cluster, please.", "set up hosted agents", "manage
|
|
19
|
-
cluster secrets", "right-size instance shapes", "optimize CI
|
|
20
|
-
costs", "standardize pipelines across teams", "create a pipeline
|
|
21
|
-
template", "configure agents", and typo'd variants like "manage
|
|
22
|
-
clusetr secrets", "configuree agents", "set up hostted agents".
|
|
23
|
-
Anything about buildkite-agent.cfg, agent tags, agent tokens, cluster
|
|
24
|
-
queues, hosted agent instance shapes, pipeline templates, audit
|
|
25
|
-
events, SSO/SAML providers, queue wait time, agent lifecycle hooks,
|
|
26
|
-
or Buildkite platform governance fires this skill — even when the
|
|
27
|
-
request mentions GraphQL or API calls (the rival `buildkite-api` is
|
|
28
|
-
for generic webhook/pagination/scripting, NOT for SSO/queue/cluster
|
|
29
|
-
admin which always belongs here).
|
|
30
|
-
Do NOT use when the user is calling `buildkite-agent <subcommand>` from
|
|
31
|
-
inside a running step (token use, artifact upload, annotate) — that's
|
|
32
|
-
`buildkite-agent-runtime`; or when the user just wants cluster CLI
|
|
33
|
-
shortcuts like `bk cluster ...` — that's `buildkite-cli`.
|
|
34
|
-
---
|
|
35
|
-
|
|
36
|
-
# Buildkite Platform Engineering
|
|
37
|
-
|
|
38
|
-
Provision and govern Buildkite CI infrastructure at scale: clusters, queues, hosted agent sizing, secrets, agent tokens, self-hosted configuration, lifecycle hooks, pipeline templates, audit logging, SSO/SAML, and cost optimization.
|
|
39
|
-
|
|
40
|
-
## Quick Start
|
|
41
|
-
|
|
42
|
-
Create a cluster with a hosted queue to get builds running immediately. **Start with hosted agents unless there is a specific reason to self-host** (GPU workloads, on-prem, custom hardware). Self-hosted queues require provisioning your own agents; builds hang "scheduled" until agents connect.
|
|
43
|
-
|
|
44
|
-
All GraphQL mutations go to `https://graphql.buildkite.com/v1` with a Bearer token:
|
|
45
|
-
|
|
46
|
-
```bash
|
|
47
|
-
curl -sS -X POST "https://graphql.buildkite.com/v1" \
|
|
48
|
-
-H "Authorization: Bearer $BUILDKITE_API_TOKEN" \
|
|
49
|
-
-H "Content-Type: application/json" \
|
|
50
|
-
-d '{"query": "<GRAPHQL_QUERY_OR_MUTATION>", "variables": { ... }}'
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
**Step 1:** Get the organization ID: `query { organization(slug: "my-org") { id } }`
|
|
54
|
-
|
|
55
|
-
**Step 2:** Create a cluster:
|
|
56
|
-
|
|
57
|
-
```graphql
|
|
58
|
-
mutation {
|
|
59
|
-
clusterCreate(input: {
|
|
60
|
-
organizationId: "org-id"
|
|
61
|
-
name: "Production"
|
|
62
|
-
description: "Production CI cluster"
|
|
63
|
-
emoji: ":rocket:"
|
|
64
|
-
color: "#14CC80"
|
|
65
|
-
}) { cluster { id uuid name } }
|
|
66
|
-
}
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
**Step 3:** Create a hosted queue with a specific instance shape:
|
|
70
|
-
|
|
71
|
-
```graphql
|
|
72
|
-
mutation {
|
|
73
|
-
clusterQueueCreate(input: {
|
|
74
|
-
organizationId: "org-id"
|
|
75
|
-
clusterId: "cluster-id"
|
|
76
|
-
key: "linux-large"
|
|
77
|
-
description: "Linux 8 vCPU / 32 GB for heavy compilation"
|
|
78
|
-
hostedAgents: { instanceShape: LINUX_AMD64_8X32 }
|
|
79
|
-
}) { clusterQueue { id key } }
|
|
80
|
-
}
|
|
81
|
-
```
|
|
82
|
-
|
|
83
|
-
**Step 4:** Create a pipeline in the cluster via GraphQL `pipelineCreate` or the REST API, then trigger a build.
|
|
84
|
-
|
|
85
|
-
> For pipeline creation via REST and GraphQL, see the **buildkite-api** skill.
|
|
86
|
-
|
|
87
|
-
> For pipeline YAML syntax including `agents:` routing and `secrets:` access, see the **buildkite-pipelines** skill.
|
|
88
|
-
> For `bk cluster` CLI commands, see the **buildkite-cli** skill.
|
|
89
|
-
|
|
90
|
-
## Clusters
|
|
91
|
-
|
|
92
|
-
A cluster is the top-level container for queues, agent tokens, and secrets. Every organization starts with one default cluster; create additional clusters to isolate workloads (e.g., production vs. staging, team-specific).
|
|
93
|
-
|
|
94
|
-
### Create a cluster
|
|
95
|
-
|
|
96
|
-
```bash
|
|
97
|
-
curl -s -X POST "https://api.buildkite.com/v2/organizations/my-org/clusters" \
|
|
98
|
-
-H "Authorization: Bearer $BUILDKITE_API_TOKEN" \
|
|
99
|
-
-H "Content-Type: application/json" \
|
|
100
|
-
-d '{
|
|
101
|
-
"name": "Backend",
|
|
102
|
-
"description": "Backend team CI cluster",
|
|
103
|
-
"emoji": ":gear:",
|
|
104
|
-
"color": "#0B79CE"
|
|
105
|
-
}'
|
|
106
|
-
```
|
|
107
|
-
|
|
108
|
-
Fields: `name` (required), `description`, `emoji`, `color`, `default_queue_id` (optional).
|
|
109
|
-
|
|
110
|
-
> For full REST and GraphQL API reference, see the **buildkite-api** skill.
|
|
111
|
-
|
|
112
|
-
## Queues and Hosted Agents
|
|
113
|
-
|
|
114
|
-
Queues route builds to agents. **Hosted queues** (Buildkite-managed compute) are the recommended starting point — builds run immediately. **Self-hosted queues** require connecting your own agents; builds remain "scheduled" until agents connect.
|
|
115
|
-
|
|
116
|
-
Create queues with the `clusterQueueCreate` GraphQL mutation (shown in Quick Start above). To create a **self-hosted queue**, omit `hostedAgents`. Self-hosted agents connect by targeting the queue key in their configuration.
|
|
117
|
-
|
|
118
|
-
### Instance shapes and sizing guide
|
|
119
|
-
|
|
120
|
-
Full list: `references/instance-shapes.md`. Quick sizing:
|
|
121
|
-
|
|
122
|
-
| Workload | Shape |
|
|
123
|
-
|----------|-------|
|
|
124
|
-
| Linting, unit tests | `LINUX_AMD64_2X4` |
|
|
125
|
-
| Monorepos, multi-service | `LINUX_AMD64_4X16` |
|
|
126
|
-
| Heavy compilation (C++, Rust) | `LINUX_AMD64_8X32` |
|
|
127
|
-
| Docker builds, ML prep | `LINUX_AMD64_16X64` |
|
|
128
|
-
| iOS / macOS | `MACOS_M4_6X28` or `MACOS_M4_12X56` |
|
|
129
|
-
|
|
130
|
-
Start with the smallest shape that keeps builds under target time. Scale up if queue wait exceeds 2 minutes.
|
|
131
|
-
|
|
132
|
-
### Queue design patterns
|
|
133
|
-
|
|
134
|
-
- **Keep 1-2 static instances in the default queue** — avoids cold-start latency on pipeline uploads
|
|
135
|
-
- **Retire oldest agents first during scale-down** — preserves warm caches
|
|
136
|
-
- **Trial pattern** — test new shapes/architectures on a separate queue before migrating
|
|
137
|
-
- **Tag builds with metadata** for cost attribution by queue or team
|
|
138
|
-
|
|
139
|
-
Temporarily pause dispatch to a queue for maintenance or cost control using `clusterQueuePauseDispatch` / `clusterQueueResumeDispatch` GraphQL mutations. See `references/graphql-mutations.md` for examples.
|
|
140
|
-
|
|
141
|
-
## Cluster Secrets
|
|
142
|
-
|
|
143
|
-
Cluster secrets are encrypted, cluster-scoped values accessible from pipeline steps. They replace hardcoded credentials and environment-hook-based secret injection. Create, update, and rotate secrets via the REST API at `/v2/organizations/{org}/clusters/{cluster_id}/secrets`.
|
|
144
|
-
|
|
145
|
-
### Secret key constraints
|
|
146
|
-
|
|
147
|
-
| Rule | Detail |
|
|
148
|
-
|------|--------|
|
|
149
|
-
| Must start with | A letter (A-Z, a-z) |
|
|
150
|
-
| Allowed characters | Letters, numbers, underscores only |
|
|
151
|
-
| Prohibited prefixes | `buildkite`, `bk` (reserved) |
|
|
152
|
-
| Max key length | 255 characters |
|
|
153
|
-
| Max value size | 8 KB |
|
|
154
|
-
|
|
155
|
-
### Access policies
|
|
156
|
-
|
|
157
|
-
Restrict which pipelines and branches can access a secret by adding a `policy` object with `claims`. Available claim types: `pipeline_slug`, `build_branch`, `build_creator`, `build_source`, `build_creator_team`, `cluster_queue_key`. Claims support `*` wildcards. See [Buildkite Secrets docs](https://buildkite.com/docs/pipelines/security/secrets/buildkite-secrets.md) for policy examples.
|
|
158
|
-
|
|
159
|
-
Value rotation uses a separate endpoint (`PUT .../secrets/{id}/value`) from description/policy updates (`PUT .../secrets/{id}`).
|
|
160
|
-
|
|
161
|
-
> For `secrets:` YAML syntax, see the **buildkite-pipelines** skill. For `buildkite-agent secret get`, see the **buildkite-agent-runtime** skill.
|
|
162
|
-
|
|
163
|
-
## Agent Tokens
|
|
164
|
-
|
|
165
|
-
Agent tokens authenticate agents connecting to a cluster. Each token is scoped to a single cluster.
|
|
166
|
-
|
|
167
|
-
### Create a token
|
|
168
|
-
|
|
169
|
-
```bash
|
|
170
|
-
curl -s -X POST "https://api.buildkite.com/v2/organizations/my-org/clusters/$CLUSTER_ID/tokens" \
|
|
171
|
-
-H "Authorization: Bearer $BUILDKITE_API_TOKEN" \
|
|
172
|
-
-H "Content-Type: application/json" \
|
|
173
|
-
-d '{
|
|
174
|
-
"description": "Backend CI agents - production",
|
|
175
|
-
"allowed_ip_addresses": "10.0.0.0/8"
|
|
176
|
-
}'
|
|
177
|
-
```
|
|
178
|
-
|
|
179
|
-
| Field | Required | Description |
|
|
180
|
-
|-------|----------|-------------|
|
|
181
|
-
| `description` | Yes | Human-readable token description |
|
|
182
|
-
| `allowed_ip_addresses` | No | Comma-separated CIDR ranges restricting agent connections |
|
|
183
|
-
| `expires_at` | No | ISO 8601 expiry timestamp |
|
|
184
|
-
|
|
185
|
-
The token value is only returned at creation time. Store it in a secrets manager immediately.
|
|
186
|
-
|
|
187
|
-
## Self-Hosted Agents and Lifecycle Hooks
|
|
188
|
-
|
|
189
|
-
Self-hosted agents run on your own infrastructure, configured via `buildkite-agent.cfg`. Prefer clustered agents for new deployments — they provide secret scoping, queue isolation, and better organizational control. For full configuration reference, `buildkite-agent.cfg` examples, and clustered vs. unclustered agent details, see `references/self-hosted-agents.md`.
|
|
190
|
-
|
|
191
|
-
Agent lifecycle hooks execute at specific points during job execution: `environment` → `pre-checkout` → `checkout` → `post-checkout` → `pre-command` → `command` → `post-command` → `pre-exit` → `pre-artifact`. Agent-level hooks run first, then repository hooks, then plugin hooks. For hook details and examples, see `references/self-hosted-agents.md`.
|
|
192
|
-
|
|
193
|
-
### Hosted agent caching behavior
|
|
194
|
-
|
|
195
|
-
**Cache volumes on hosted agents are non-deterministic** — jobs may or may not get a warm cache. Treat cache volumes as performance accelerators, not guarantees. Cache volumes are **pipeline-scoped** (not shared across pipelines). For deterministic caching, use Docker images with pre-built dependencies instead. Git mirrors can be enabled via cache volumes to accelerate checkout; mount `.git/lfs/objects` in cache volumes and pre-install `git-lfs` in the agent image.
|
|
196
|
-
|
|
197
|
-
### Hosted agent checkout performance
|
|
198
|
-
|
|
199
|
-
Buildkite's default checkout **prioritizes completeness over speed** — it may be noticeably slower than GitHub Actions for the same repo. Optimize with the Sparse Checkout plugin (monorepos), Git mirrors (frequent builds), or the Git Shallow Clone plugin (repos where full history is unnecessary).
|
|
200
|
-
|
|
201
|
-
### Hosted agent custom hooks
|
|
202
|
-
|
|
203
|
-
Hosted agents support custom hooks via a custom agent image. Add hooks in a Dockerfile:
|
|
204
|
-
|
|
205
|
-
```dockerfile
|
|
206
|
-
FROM buildkite/agent:latest
|
|
207
|
-
|
|
208
|
-
ENV BUILDKITE_ADDITIONAL_HOOKS_PATHS=/custom/hooks
|
|
209
|
-
COPY ./hooks/*.sh /custom/hooks/
|
|
210
|
-
RUN chmod +x /custom/hooks/*.sh
|
|
211
|
-
```
|
|
212
|
-
|
|
213
|
-
### Hosted agent pre-installed tools
|
|
214
|
-
|
|
215
|
-
Linux hosted agents include: `bash`, `curl`, `wget`, `git`, `docker`, `python3`, `jq`.
|
|
216
|
-
|
|
217
|
-
**`nvm` is NOT pre-installed.** Do not source `~/.nvm/nvm.sh` — it will fail silently or exit 127. Use `fnm` instead:
|
|
218
|
-
|
|
219
|
-
```bash
|
|
220
|
-
curl -fsSL https://fnm.vercel.app/install | bash -s -- --install-dir "$HOME/.fnm" --skip-shell
|
|
221
|
-
export PATH="$HOME/.fnm:$PATH" && eval "$(fnm env --use-on-cd)"
|
|
222
|
-
fnm install 20 && fnm use 20
|
|
223
|
-
```
|
|
224
|
-
|
|
225
|
-
`fnm` downloads from `nodejs.org` directly and works for all versions including EOL.
|
|
226
|
-
|
|
227
|
-
**GitHub release asset downloads may be blocked.** `release-assets.githubusercontent.com` is unreachable from hosted agents. Pre-install tools distributed as GitHub release binaries (CodeQL, Scorecard, Trivy) in a custom agent image using `agentImageRef`.
|
|
228
|
-
|
|
229
|
-
**Always verify queue creation** after a GraphQL mutation by listing queues via `GET /v2/organizations/{org}/clusters/{cluster_id}/queues`. Silent GraphQL errors can leave the cluster without a hosted queue — if the list is empty, retry via the REST API.
|
|
230
|
-
|
|
231
|
-
## Plugin Security Controls
|
|
232
|
-
|
|
233
|
-
Restrict which plugins agents can run:
|
|
234
|
-
|
|
235
|
-
- **Agent-level allowlisting** — use `allowed-plugins` in `buildkite-agent.cfg` to restrict to approved plugins
|
|
236
|
-
- **`no-plugins=true`** — disable all plugins on sensitive agents
|
|
237
|
-
- **Cluster-based policies** — apply different plugin restrictions per cluster based on security requirements
|
|
238
|
-
|
|
239
|
-
Audit plugin repositories proactively — Buildkite does not automatically alert to plugin vulnerabilities.
|
|
240
|
-
|
|
241
|
-
## Pipeline Templates
|
|
242
|
-
|
|
243
|
-
Pipeline templates (Enterprise-only) standardize pipeline YAML across the organization. See `references/pipeline-templates.md`.
|
|
244
|
-
|
|
245
|
-
## Audit Logging
|
|
246
|
-
|
|
247
|
-
Audit logging (Enterprise-only) tracks organization-level events for compliance. Query via GraphQL or stream to a SIEM via Amazon EventBridge. See `references/audit-logging.md`.
|
|
248
|
-
|
|
249
|
-
## SSO/SAML
|
|
250
|
-
|
|
251
|
-
Buildkite supports SAML 2.0 (Okta, Azure AD, Google Workspace, OneLogin). See `references/sso-saml.md` for setup flow.
|
|
252
|
-
|
|
253
|
-
## Cost Optimization
|
|
254
|
-
|
|
255
|
-
### Cost reduction patterns
|
|
256
|
-
|
|
257
|
-
| Pattern | Savings | How |
|
|
258
|
-
|---------|---------|-----|
|
|
259
|
-
| Right-size instance shapes | 20-40% | Match shape to actual resource needs |
|
|
260
|
-
| Use `disconnect-after-job` for self-hosted | 10-20% | Ephemeral agents don't idle between jobs |
|
|
261
|
-
| Pause queues during off-hours | 10-30% | `clusterQueuePauseDispatch` on nights/weekends |
|
|
262
|
-
| Skip unnecessary work with `if_changed` | 10-30% | Only run tests for changed code paths |
|
|
263
|
-
| Use `priority` to run critical jobs first | Indirect | Reduces developer wait time for important builds |
|
|
264
|
-
|
|
265
|
-
> For `if_changed` and pipeline optimization patterns, see the **buildkite-pipelines** skill.
|
|
266
|
-
|
|
267
|
-
## Observability and Queue Monitoring
|
|
268
|
-
|
|
269
|
-
| Tool | Purpose | How it works |
|
|
270
|
-
|------|---------|-------------|
|
|
271
|
-
| `buildkite-agent-metrics` | Fleet-level queue and job metrics | Polls the Buildkite API; emits to CloudWatch, Datadog, StatsD |
|
|
272
|
-
| Agent health check service | Per-agent process health | Exposes Prometheus endpoint; scrape from each agent host |
|
|
273
|
-
|
|
274
|
-
**Start with queue profiling** — wait time and checkout time are the biggest, cheapest wins. Target: queue wait time under 2 minutes.
|
|
275
|
-
|
|
276
|
-
### Scaling decision flow
|
|
277
|
-
|
|
278
|
-
```
|
|
279
|
-
Queue wait > 2 min?
|
|
280
|
-
├── Yes → Check agent count
|
|
281
|
-
│ ├── Agents maxed out → Scale up (add agents or increase shape)
|
|
282
|
-
│ ├── Agents idle → Check for job distribution issues (tags, queue routing)
|
|
283
|
-
│ └── No agents → Check token, connectivity, agent health
|
|
284
|
-
└── No → Queue is healthy
|
|
285
|
-
```
|
|
286
|
-
|
|
287
|
-
## Common Mistakes
|
|
288
|
-
|
|
289
|
-
| Mistake | Fix |
|
|
290
|
-
|---------|-----|
|
|
291
|
-
| Secret key starting with `buildkite` or `bk` | Use a different prefix — these are reserved |
|
|
292
|
-
| Secret key with dashes or dots | Only letters, numbers, underscores allowed: `MY_SECRET_KEY` not `my-secret-key` |
|
|
293
|
-
| Not storing agent token at creation time | Token is only shown once — store in secrets manager immediately |
|
|
294
|
-
| Org-level tokens for clustered agents | Use cluster-scoped tokens (`clusterAgentTokenCreate`) |
|
|
295
|
-
| Over-provisioning instance shapes | Start small, monitor, scale up only when builds are slow |
|
|
296
|
-
| No `disconnect-after-job` on autoscaled agents | Set `disconnect-after-job=true` for ephemeral pools |
|
|
297
|
-
| One large queue for all workloads | Create specialized queues per workload type |
|
|
298
|
-
| Cluster creation returns HTTP 500 | List existing clusters first; rename the Default cluster via PATCH as a workaround |
|
|
299
|
-
| "Upgrade to Platform Pro" on hosted queue creation | Fall back: create self-hosted queue, install `buildkite-agent` locally with `--spawn 3` |
|
|
300
|
-
| Expecting cache volumes to always be warm | Design builds to work without cache — volumes are non-deterministic |
|
|
301
|
-
| One IAM role for all queues | Assign different IAM roles per queue; scope secrets by `cluster_queue_key` |
|
|
302
|
-
| Scaling down newest agents first | Retire oldest agents first to preserve warm caches |
|
|
303
|
-
| Jobs hang "scheduled" with agents connected | Check `default_queue_id` matches the agent's queue tag; update via PATCH |
|
|
304
|
-
|
|
305
|
-
## Additional Resources
|
|
306
|
-
|
|
307
|
-
- **`references/graphql-mutations.md`** — GraphQL mutations for clusters, queues, tokens, templates, SSO, audit
|
|
308
|
-
- **`references/instance-shapes.md`** — All hosted agent instance shapes
|
|
309
|
-
- **`references/self-hosted-agents.md`** — Agent config, clustered vs. unclustered, lifecycle hooks
|
|
310
|
-
- **`references/pipeline-templates.md`** — Template mutations and strategy (Enterprise)
|
|
311
|
-
- **`references/audit-logging.md`** — Audit queries, SIEM/EventBridge integration (Enterprise)
|
|
312
|
-
- **`references/sso-saml.md`** — SSO/SAML provider setup
|
|
313
|
-
|
|
314
|
-
## Further Reading
|
|
315
|
-
|
|
316
|
-
- [Buildkite Docs for LLMs](https://buildkite.com/docs/llms.txt)
|
|
317
|
-
- [Manage clusters](https://buildkite.com/docs/clusters/manage-clusters.md)
|
|
318
|
-
- [Manage cluster queues](https://buildkite.com/docs/clusters/manage-queues.md)
|
|
319
|
-
- [Manage cluster secrets](https://buildkite.com/docs/pipelines/security/secrets/buildkite-secrets.md)
|
|
320
|
-
- [Agent configuration](https://buildkite.com/docs/agent/v3/configuration.md)
|
|
321
|
-
- [Agent hooks](https://buildkite.com/docs/agent/v3/hooks.md)
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
interface:
|
|
2
|
-
display_name: "Buildkite Agent Infrastructure"
|
|
3
|
-
short_description: "Clusters, queues, hosted agents, secrets, SSO, and cost optimization"
|
|
4
|
-
icon_small: "./assets/buildkite-icon-small.png"
|
|
5
|
-
icon_large: "./assets/buildkite-icon-large.png"
|
|
6
|
-
brand_color: "#00D974"
|
|
Binary file
|
|
Binary file
|
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
# Audit Logging and SIEM Integration
|
|
2
|
-
|
|
3
|
-
Audit logging (Enterprise-only) tracks organization-level events for compliance and security monitoring.
|
|
4
|
-
|
|
5
|
-
## Query audit events
|
|
6
|
-
|
|
7
|
-
```graphql
|
|
8
|
-
query {
|
|
9
|
-
organization(slug: "my-org") {
|
|
10
|
-
auditEvents(
|
|
11
|
-
first: 50
|
|
12
|
-
occurredAtFrom: "2026-03-01T00:00:00Z"
|
|
13
|
-
occurredAtTo: "2026-03-26T23:59:59Z"
|
|
14
|
-
) {
|
|
15
|
-
edges {
|
|
16
|
-
node {
|
|
17
|
-
type
|
|
18
|
-
occurredAt
|
|
19
|
-
actor { name type uuid }
|
|
20
|
-
subject { name type uuid }
|
|
21
|
-
data
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
| Filter | Description |
|
|
30
|
-
|--------|-------------|
|
|
31
|
-
| `occurredAtFrom` / `occurredAtTo` | ISO 8601 time range |
|
|
32
|
-
| `type` | Specific audit event type (e.g., `ORGANIZATION_UPDATED`) |
|
|
33
|
-
| `subjectType` | Filter by subject type (e.g., `PIPELINE`, `AGENT_TOKEN`) |
|
|
34
|
-
| `subjectUUID` | Filter by specific subject |
|
|
35
|
-
| `order` | `RECENTLY_OCCURRED` (default) or `OLDEST_OCCURRED` |
|
|
36
|
-
|
|
37
|
-
## High-severity events to monitor
|
|
38
|
-
|
|
39
|
-
| Event type | Why it matters |
|
|
40
|
-
|------------|---------------|
|
|
41
|
-
| `agent_token.created` / `.deleted` | Agent authentication changes |
|
|
42
|
-
| `member.invited` / `.removed` | Team membership changes |
|
|
43
|
-
| `sso_provider.created` / `.updated` | SSO configuration changes |
|
|
44
|
-
| `pipeline_schedule.created` | New automated triggers |
|
|
45
|
-
| `cluster_secret.created` / `.deleted` | Secret management changes |
|
|
46
|
-
| `organization.updated` | Org-level setting changes |
|
|
47
|
-
|
|
48
|
-
## SIEM integration via Amazon EventBridge
|
|
49
|
-
|
|
50
|
-
Stream audit events to a SIEM in real time using EventBridge:
|
|
51
|
-
|
|
52
|
-
- **Source:** `aws.partner/buildkite.com/buildkite/<partner-event-source-id>`
|
|
53
|
-
- **Detail type:** `"Audit Event Logged"`
|
|
54
|
-
|
|
55
|
-
Event payload structure:
|
|
56
|
-
|
|
57
|
-
```json
|
|
58
|
-
{
|
|
59
|
-
"organization": {
|
|
60
|
-
"uuid": "org-uuid",
|
|
61
|
-
"graphql_id": "T3JnYW5pemF0aW9u...",
|
|
62
|
-
"slug": "my-org"
|
|
63
|
-
},
|
|
64
|
-
"event": {
|
|
65
|
-
"uuid": "event-uuid",
|
|
66
|
-
"occurred_at": "2026-03-26T14:30:00Z",
|
|
67
|
-
"type": "agent_token.created",
|
|
68
|
-
"data": { },
|
|
69
|
-
"subject_type": "AgentToken",
|
|
70
|
-
"subject_uuid": "token-uuid",
|
|
71
|
-
"subject_name": "Production agents",
|
|
72
|
-
"context": {
|
|
73
|
-
"request_id": "req-uuid",
|
|
74
|
-
"request_ip": "203.0.113.42",
|
|
75
|
-
"session_user_uuid": "user-uuid",
|
|
76
|
-
"request_user_agent": "Mozilla/5.0..."
|
|
77
|
-
}
|
|
78
|
-
},
|
|
79
|
-
"actor": {
|
|
80
|
-
"name": "Jane Engineer",
|
|
81
|
-
"type": "USER",
|
|
82
|
-
"uuid": "user-uuid"
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
```
|
|
86
|
-
|
|
87
|
-
Route high-severity events to PagerDuty, Splunk, or Datadog via EventBridge rules matching on `detail.event.type`.
|