@techwavedev/agi-agent-kit 1.1.7 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @techwavedev/agi-agent-kit might be problematic. Click here for more details.
- package/CHANGELOG.md +82 -1
- package/README.md +190 -12
- package/bin/init.js +30 -2
- package/package.json +6 -3
- package/templates/base/AGENTS.md +54 -23
- package/templates/base/README.md +325 -0
- package/templates/base/directives/memory_integration.md +95 -0
- package/templates/base/execution/memory_manager.py +309 -0
- package/templates/base/execution/session_boot.py +218 -0
- package/templates/base/execution/session_init.py +320 -0
- package/templates/base/skill-creator/SKILL_skillcreator.md +23 -36
- package/templates/base/skill-creator/scripts/init_skill.py +18 -135
- package/templates/skills/ec/README.md +31 -0
- package/templates/skills/ec/aws/SKILL.md +1020 -0
- package/templates/skills/ec/aws/defaults.yaml +13 -0
- package/templates/skills/ec/aws/references/common_patterns.md +80 -0
- package/templates/skills/ec/aws/references/mcp_servers.md +98 -0
- package/templates/skills/ec/aws-terraform/SKILL.md +349 -0
- package/templates/skills/ec/aws-terraform/references/best_practices.md +394 -0
- package/templates/skills/ec/aws-terraform/references/checkov_reference.md +337 -0
- package/templates/skills/ec/aws-terraform/scripts/configure_mcp.py +150 -0
- package/templates/skills/ec/confluent-kafka/SKILL.md +655 -0
- package/templates/skills/ec/confluent-kafka/references/ansible_playbooks.md +792 -0
- package/templates/skills/ec/confluent-kafka/references/ec_deployment.md +579 -0
- package/templates/skills/ec/confluent-kafka/references/kraft_migration.md +490 -0
- package/templates/skills/ec/confluent-kafka/references/troubleshooting.md +778 -0
- package/templates/skills/ec/confluent-kafka/references/upgrade_7x_to_8x.md +488 -0
- package/templates/skills/ec/confluent-kafka/scripts/kafka_health_check.py +435 -0
- package/templates/skills/ec/confluent-kafka/scripts/upgrade_preflight.py +568 -0
- package/templates/skills/ec/confluent-kafka/scripts/validate_config.py +455 -0
- package/templates/skills/ec/consul/SKILL.md +427 -0
- package/templates/skills/ec/consul/references/acl_setup.md +168 -0
- package/templates/skills/ec/consul/references/ha_config.md +196 -0
- package/templates/skills/ec/consul/references/troubleshooting.md +267 -0
- package/templates/skills/ec/consul/references/upgrades.md +213 -0
- package/templates/skills/ec/consul/scripts/consul_health_report.py +530 -0
- package/templates/skills/ec/consul/scripts/consul_status.py +264 -0
- package/templates/skills/ec/consul/scripts/generate_values.py +170 -0
- package/templates/skills/ec/documentation/SKILL.md +351 -0
- package/templates/skills/ec/documentation/references/best_practices.md +201 -0
- package/templates/skills/ec/documentation/scripts/analyze_code.py +307 -0
- package/templates/skills/ec/documentation/scripts/detect_changes.py +460 -0
- package/templates/skills/ec/documentation/scripts/generate_changelog.py +312 -0
- package/templates/skills/ec/documentation/scripts/sync_docs.py +272 -0
- package/templates/skills/ec/documentation/scripts/update_skill_docs.py +366 -0
- package/templates/skills/ec/gitlab/SKILL.md +529 -0
- package/templates/skills/ec/gitlab/references/agent_installation.md +416 -0
- package/templates/skills/ec/gitlab/references/api_reference.md +508 -0
- package/templates/skills/ec/gitlab/references/gitops_flux.md +465 -0
- package/templates/skills/ec/gitlab/references/troubleshooting.md +518 -0
- package/templates/skills/ec/gitlab/scripts/generate_agent_values.py +329 -0
- package/templates/skills/ec/gitlab/scripts/gitlab_agent_status.py +414 -0
- package/templates/skills/ec/jira/SKILL.md +484 -0
- package/templates/skills/ec/jira/references/jql_reference.md +148 -0
- package/templates/skills/ec/jira/scripts/add_comment.py +91 -0
- package/templates/skills/ec/jira/scripts/bulk_log_work.py +124 -0
- package/templates/skills/ec/jira/scripts/create_ticket.py +162 -0
- package/templates/skills/ec/jira/scripts/get_ticket.py +191 -0
- package/templates/skills/ec/jira/scripts/jira_client.py +383 -0
- package/templates/skills/ec/jira/scripts/log_work.py +154 -0
- package/templates/skills/ec/jira/scripts/search_tickets.py +104 -0
- package/templates/skills/ec/jira/scripts/update_comment.py +67 -0
- package/templates/skills/ec/jira/scripts/update_ticket.py +161 -0
- package/templates/skills/ec/karpenter/SKILL.md +301 -0
- package/templates/skills/ec/karpenter/references/ec2nodeclasses.md +421 -0
- package/templates/skills/ec/karpenter/references/migration.md +396 -0
- package/templates/skills/ec/karpenter/references/nodepools.md +400 -0
- package/templates/skills/ec/karpenter/references/troubleshooting.md +359 -0
- package/templates/skills/ec/karpenter/scripts/generate_ec2nodeclass.py +187 -0
- package/templates/skills/ec/karpenter/scripts/generate_nodepool.py +245 -0
- package/templates/skills/ec/karpenter/scripts/karpenter_status.py +359 -0
- package/templates/skills/ec/opensearch/SKILL.md +720 -0
- package/templates/skills/ec/opensearch/references/ml_neural_search.md +576 -0
- package/templates/skills/ec/opensearch/references/operator.md +532 -0
- package/templates/skills/ec/opensearch/references/query_dsl.md +532 -0
- package/templates/skills/ec/opensearch/scripts/configure_mcp.py +148 -0
- package/templates/skills/ec/victoriametrics/SKILL.md +598 -0
- package/templates/skills/ec/victoriametrics/references/kubernetes.md +531 -0
- package/templates/skills/ec/victoriametrics/references/prometheus_migration.md +333 -0
- package/templates/skills/ec/victoriametrics/references/troubleshooting.md +442 -0
- package/templates/skills/knowledge/SKILLS_CATALOG.md +274 -4
- package/templates/skills/knowledge/intelligent-routing/SKILL.md +237 -164
- package/templates/skills/knowledge/parallel-agents/SKILL.md +345 -73
- package/templates/skills/knowledge/plugin-discovery/SKILL.md +582 -0
- package/templates/skills/knowledge/plugin-discovery/scripts/platform_setup.py +1083 -0
- package/templates/skills/knowledge/design-md/README.md +0 -34
- package/templates/skills/knowledge/design-md/SKILL.md +0 -193
- package/templates/skills/knowledge/design-md/examples/DESIGN.md +0 -154
- package/templates/skills/knowledge/notebooklm-mcp/SKILL.md +0 -71
- package/templates/skills/knowledge/notebooklm-mcp/assets/example_asset.txt +0 -24
- package/templates/skills/knowledge/notebooklm-mcp/references/api_reference.md +0 -34
- package/templates/skills/knowledge/notebooklm-mcp/scripts/example.py +0 -19
- package/templates/skills/knowledge/react-components/README.md +0 -36
- package/templates/skills/knowledge/react-components/SKILL.md +0 -53
- package/templates/skills/knowledge/react-components/examples/gold-standard-card.tsx +0 -80
- package/templates/skills/knowledge/react-components/package-lock.json +0 -231
- package/templates/skills/knowledge/react-components/package.json +0 -16
- package/templates/skills/knowledge/react-components/resources/architecture-checklist.md +0 -15
- package/templates/skills/knowledge/react-components/resources/component-template.tsx +0 -37
- package/templates/skills/knowledge/react-components/resources/stitch-api-reference.md +0 -14
- package/templates/skills/knowledge/react-components/resources/style-guide.json +0 -27
- package/templates/skills/knowledge/react-components/scripts/fetch-stitch.sh +0 -30
- package/templates/skills/knowledge/react-components/scripts/validate.js +0 -68
- package/templates/skills/knowledge/self-update/SKILL.md +0 -60
- package/templates/skills/knowledge/self-update/scripts/update_kit.py +0 -103
- package/templates/skills/knowledge/stitch-loop/README.md +0 -54
- package/templates/skills/knowledge/stitch-loop/SKILL.md +0 -235
- package/templates/skills/knowledge/stitch-loop/examples/SITE.md +0 -73
- package/templates/skills/knowledge/stitch-loop/examples/next-prompt.md +0 -25
- package/templates/skills/knowledge/stitch-loop/resources/baton-schema.md +0 -61
- package/templates/skills/knowledge/stitch-loop/resources/site-template.md +0 -104
|
@@ -0,0 +1,518 @@
|
|
|
1
|
+
# GitLab Agent Troubleshooting
|
|
2
|
+
|
|
3
|
+
Troubleshooting guide for GitLab Agent for Kubernetes from a **project owner/operator perspective** — focusing on cluster-side diagnostics and fixes you can perform without GitLab server admin access.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Quick Diagnostics
|
|
8
|
+
|
|
9
|
+
### First Steps
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# 1. Check agent pod status
|
|
13
|
+
kubectl get pods -n gitlab-agent
|
|
14
|
+
|
|
15
|
+
# 2. View agent logs (most issues show here)
|
|
16
|
+
kubectl logs -f -l=app.kubernetes.io/name=gitlab-agent -n gitlab-agent
|
|
17
|
+
|
|
18
|
+
# 3. Check events for errors
|
|
19
|
+
kubectl get events -n gitlab-agent --sort-by='.lastTimestamp'
|
|
20
|
+
|
|
21
|
+
# 4. Describe pod for detailed status
|
|
22
|
+
kubectl describe pod -l=app.kubernetes.io/name=gitlab-agent -n gitlab-agent
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### Agent Status in GitLab UI
|
|
26
|
+
|
|
27
|
+
Navigate to: **Project → Operate → Kubernetes clusters**
|
|
28
|
+
|
|
29
|
+
- ✅ **Connected** — Agent is healthy
|
|
30
|
+
- ⚠️ **Never connected** — Token or network issue
|
|
31
|
+
- ❌ **Not connected** — Agent was connected but lost connection
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Connection Issues
|
|
36
|
+
|
|
37
|
+
### Error: "failed to WebSocket dial"
|
|
38
|
+
|
|
39
|
+
**Symptom in logs:**
|
|
40
|
+
|
|
41
|
+
```json
|
|
42
|
+
{
|
|
43
|
+
"level": "warn",
|
|
44
|
+
"msg": "GetConfiguration failed",
|
|
45
|
+
"error": "transport: Error while dialing failed to WebSocket dial: ... no such host"
|
|
46
|
+
}
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**Causes & Solutions:**
|
|
50
|
+
|
|
51
|
+
| Cause | Solution |
|
|
52
|
+
| ----------------------- | -------------------------------------------------- |
|
|
53
|
+
| Wrong KAS address | Verify `config.kasAddress` in Helm values |
|
|
54
|
+
| DNS resolution failure | Check cluster DNS, verify GitLab hostname resolves |
|
|
55
|
+
| Network policy blocking | Allow egress to GitLab on port 443 |
|
|
56
|
+
| Proxy/Firewall blocking | Whitelist GitLab host, allow WebSocket upgrade |
|
|
57
|
+
|
|
58
|
+
**Fix:**
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
# Verify DNS from cluster
|
|
62
|
+
kubectl run dns-test --rm -it --restart=Never --image=busybox -- \
|
|
63
|
+
nslookup gitlab.example.com
|
|
64
|
+
|
|
65
|
+
# Test connectivity
|
|
66
|
+
kubectl run curl-test --rm -it --restart=Never --image=curlimages/curl -- \
|
|
67
|
+
curl -v "https://gitlab.example.com/-/kubernetes-agent/"
|
|
68
|
+
|
|
69
|
+
# Check/update Helm values
|
|
70
|
+
helm get values gitlab-agent -n gitlab-agent
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
### Error: "HTTP 301" on Handshake
|
|
76
|
+
|
|
77
|
+
**Symptom in logs:**
|
|
78
|
+
|
|
79
|
+
```json
|
|
80
|
+
{
|
|
81
|
+
"error": "expected handshake response status code 101 but got 301"
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
**Cause:** Missing trailing slash in KAS address.
|
|
86
|
+
|
|
87
|
+
**Fix:**
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
# WRONG
|
|
91
|
+
--set config.kasAddress="wss://gitlab.example.com/-/kubernetes-agent"
|
|
92
|
+
|
|
93
|
+
# CORRECT (with trailing slash)
|
|
94
|
+
--set config.kasAddress="wss://gitlab.example.com/-/kubernetes-agent/"
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
helm upgrade gitlab-agent gitlab/gitlab-agent \
|
|
99
|
+
--namespace gitlab-agent \
|
|
100
|
+
--reuse-values \
|
|
101
|
+
--set config.kasAddress="wss://gitlab.example.com/-/kubernetes-agent/"
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
### Error: "certificate signed by unknown authority"
|
|
107
|
+
|
|
108
|
+
**Symptom in logs:**
|
|
109
|
+
|
|
110
|
+
```json
|
|
111
|
+
{
|
|
112
|
+
"error": "x509: certificate signed by unknown authority"
|
|
113
|
+
}
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
**Cause:** GitLab uses a self-signed certificate or internal CA that the agent doesn't trust.
|
|
117
|
+
|
|
118
|
+
**Fix:**
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
# 1. Get the CA certificate from GitLab
|
|
122
|
+
openssl s_client -connect gitlab.example.com:443 -showcerts </dev/null 2>/dev/null | \
|
|
123
|
+
openssl x509 -outform PEM > gitlab-ca.pem
|
|
124
|
+
|
|
125
|
+
# 2. Verify it's the right cert
|
|
126
|
+
openssl x509 -in gitlab-ca.pem -text -noout | head -20
|
|
127
|
+
|
|
128
|
+
# 3. Reinstall agent with CA
|
|
129
|
+
helm upgrade --install gitlab-agent gitlab/gitlab-agent \
|
|
130
|
+
--namespace gitlab-agent \
|
|
131
|
+
--set config.token="${AGENT_TOKEN}" \
|
|
132
|
+
--set config.kasAddress="wss://gitlab.example.com/-/kubernetes-agent/" \
|
|
133
|
+
--set-file config.kasCaCert=./gitlab-ca.pem
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
**Verify CA is mounted:**
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
kubectl get configmap -l=app=gitlab-agent -n gitlab-agent -o yaml | grep -A20 "ca.crt"
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
### Error: "Decompressor not installed for grpc-encoding"
|
|
145
|
+
|
|
146
|
+
**Symptom in logs:**
|
|
147
|
+
|
|
148
|
+
```json
|
|
149
|
+
{
|
|
150
|
+
"error": "grpc: Decompressor is not installed for grpc-encoding \"gzip\""
|
|
151
|
+
}
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**Cause:** Agent version is newer than KAS server version.
|
|
155
|
+
|
|
156
|
+
**Fix:** Downgrade agent to match GitLab version:
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
# Check current agent version
|
|
160
|
+
kubectl get deployment gitlab-agent -n gitlab-agent -o jsonpath='{.spec.template.spec.containers[0].image}'
|
|
161
|
+
|
|
162
|
+
# Ask GitLab admin for GitLab version, or check in GitLab UI (Help → GitLab version)
|
|
163
|
+
# Agent should match major.minor version
|
|
164
|
+
|
|
165
|
+
helm upgrade gitlab-agent gitlab/gitlab-agent \
|
|
166
|
+
--namespace gitlab-agent \
|
|
167
|
+
--reuse-values \
|
|
168
|
+
--set image.tag=v17.6.0 # Match your GitLab version
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## Authentication Issues
|
|
174
|
+
|
|
175
|
+
### Error: "Failed to register agent pod"
|
|
176
|
+
|
|
177
|
+
**Symptom in logs:**
|
|
178
|
+
|
|
179
|
+
```json
|
|
180
|
+
{
|
|
181
|
+
"msg": "Failed to register agent pod. Please make sure the agent version matches the server version"
|
|
182
|
+
}
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
**Causes:**
|
|
186
|
+
|
|
187
|
+
1. Version mismatch between agent and GitLab
|
|
188
|
+
2. Invalid or expired token
|
|
189
|
+
3. KAS service not running (GitLab server issue)
|
|
190
|
+
|
|
191
|
+
**Fixes you can try:**
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
# 1. Verify token is active via API
|
|
195
|
+
curl --header "PRIVATE-TOKEN: ${GITLAB_TOKEN}" \
|
|
196
|
+
"https://${GITLAB_HOST}/api/v4/projects/${PROJECT_ID}/cluster_agents/${AGENT_ID}/tokens" | jq
|
|
197
|
+
|
|
198
|
+
# 2. Create new token if needed
|
|
199
|
+
NEW_TOKEN=$(curl --silent --request POST \
|
|
200
|
+
--header "PRIVATE-TOKEN: ${GITLAB_TOKEN}" \
|
|
201
|
+
--header "Content-Type: application/json" \
|
|
202
|
+
--url "https://${GITLAB_HOST}/api/v4/projects/${PROJECT_ID}/cluster_agents/${AGENT_ID}/tokens" \
|
|
203
|
+
--data '{"name":"refresh-token"}' | jq -r '.token')
|
|
204
|
+
|
|
205
|
+
# 3. Update agent with new token
|
|
206
|
+
helm upgrade gitlab-agent gitlab/gitlab-agent \
|
|
207
|
+
--namespace gitlab-agent \
|
|
208
|
+
--reuse-values \
|
|
209
|
+
--set config.token="${NEW_TOKEN}"
|
|
210
|
+
|
|
211
|
+
# 4. Match agent version to GitLab
|
|
212
|
+
helm upgrade gitlab-agent gitlab/gitlab-agent \
|
|
213
|
+
--namespace gitlab-agent \
|
|
214
|
+
--reuse-values \
|
|
215
|
+
--set image.tag=v17.6.0
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
**If still failing:** Contact GitLab admin to verify KAS is running:
|
|
219
|
+
|
|
220
|
+
```bash
|
|
221
|
+
# GitLab admin command (not accessible to project owners)
|
|
222
|
+
gitlab-ctl status gitlab-kas
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
---
|
|
226
|
+
|
|
227
|
+
### Agent Shows "Never Connected" in UI
|
|
228
|
+
|
|
229
|
+
**Checklist:**
|
|
230
|
+
|
|
231
|
+
- [ ] Token was created and saved correctly
|
|
232
|
+
- [ ] Token is active (not revoked): Check via API
|
|
233
|
+
- [ ] Helm install completed successfully
|
|
234
|
+
- [ ] Pod is Running: `kubectl get pods -n gitlab-agent`
|
|
235
|
+
- [ ] No errors in logs: `kubectl logs -f -l=app.kubernetes.io/name=gitlab-agent -n gitlab-agent`
|
|
236
|
+
- [ ] Network allows outbound to GitLab on 443
|
|
237
|
+
|
|
238
|
+
**Debug:**
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
# Check pod status
|
|
242
|
+
kubectl get pods -n gitlab-agent -o wide
|
|
243
|
+
|
|
244
|
+
# Check for crashloops
|
|
245
|
+
kubectl describe pod -l=app.kubernetes.io/name=gitlab-agent -n gitlab-agent | grep -A5 "State:"
|
|
246
|
+
|
|
247
|
+
# Verify token in secret
|
|
248
|
+
kubectl get secret -n gitlab-agent gitlab-agent-token -o jsonpath='{.data.token}' | base64 -d
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
---
|
|
252
|
+
|
|
253
|
+
## CI/CD Issues
|
|
254
|
+
|
|
255
|
+
### Error: "kubectl config use-context: context not found"
|
|
256
|
+
|
|
257
|
+
**Symptom in pipeline:**
|
|
258
|
+
|
|
259
|
+
```
|
|
260
|
+
error: no context exists with the name: "my-project/my-agent:agent-name"
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
**Causes:**
|
|
264
|
+
|
|
265
|
+
1. Agent name mismatch
|
|
266
|
+
2. Project path format incorrect
|
|
267
|
+
3. CI/CD access not configured
|
|
268
|
+
|
|
269
|
+
**Fix:**
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
# 1. List available contexts in pipeline
|
|
273
|
+
kubectl config get-contexts
|
|
274
|
+
|
|
275
|
+
# 2. Use correct format: <path/with/namespace>:<agent-name>
|
|
276
|
+
kubectl config use-context "group/project:eks-nonprod-agent"
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
**Configure CI/CD access in agent config:**
|
|
280
|
+
|
|
281
|
+
```yaml
|
|
282
|
+
# .gitlab/agents/eks-nonprod-agent/config.yaml
|
|
283
|
+
ci_access:
|
|
284
|
+
projects:
|
|
285
|
+
- id: path/to/your/project
|
|
286
|
+
groups:
|
|
287
|
+
- id: path/to/your/group
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
---
|
|
291
|
+
|
|
292
|
+
### Error: "x509: certificate signed by unknown authority" in Pipeline
|
|
293
|
+
|
|
294
|
+
**Symptom:** kubectl commands fail in CI/CD with certificate errors.
|
|
295
|
+
|
|
296
|
+
**Fix:** Configure KAS CA in gitlab-ci.yml:
|
|
297
|
+
|
|
298
|
+
```yaml
|
|
299
|
+
deploy:
|
|
300
|
+
image: bitnami/kubectl:latest
|
|
301
|
+
before_script:
|
|
302
|
+
# Trust the GitLab CA
|
|
303
|
+
- echo "$KUBE_CA_CERT" > /tmp/gitlab-ca.crt
|
|
304
|
+
- export SSL_CERT_FILE=/tmp/gitlab-ca.crt
|
|
305
|
+
script:
|
|
306
|
+
- kubectl config use-context path/to/project:agent-name
|
|
307
|
+
- kubectl get pods
|
|
308
|
+
variables:
|
|
309
|
+
KUBE_CA_CERT: |
|
|
310
|
+
-----BEGIN CERTIFICATE-----
|
|
311
|
+
... your CA certificate ...
|
|
312
|
+
-----END CERTIFICATE-----
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
Or use a CI/CD variable for the certificate:
|
|
316
|
+
|
|
317
|
+
1. Go to **Project → Settings → CI/CD → Variables**
|
|
318
|
+
2. Add `KUBE_CA_CERT` with the CA certificate content
|
|
319
|
+
3. Set as "File" type
|
|
320
|
+
|
|
321
|
+
---
|
|
322
|
+
|
|
323
|
+
### Error: "permission denied" in kubectl Commands
|
|
324
|
+
|
|
325
|
+
**Cause:** Agent service account lacks permissions.
|
|
326
|
+
|
|
327
|
+
**Fix:** Expand RBAC for agent (you control this in EKS):
|
|
328
|
+
|
|
329
|
+
```yaml
|
|
330
|
+
# cluster-role-expansion.yaml
|
|
331
|
+
apiVersion: rbac.authorization.k8s.io/v1
|
|
332
|
+
kind: ClusterRole
|
|
333
|
+
metadata:
|
|
334
|
+
name: gitlab-agent-role
|
|
335
|
+
rules:
|
|
336
|
+
- apiGroups: [""]
|
|
337
|
+
resources: ["pods", "services", "configmaps", "secrets"]
|
|
338
|
+
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
|
|
339
|
+
- apiGroups: ["apps"]
|
|
340
|
+
resources: ["deployments", "statefulsets", "daemonsets"]
|
|
341
|
+
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
|
|
342
|
+
# Add more as needed
|
|
343
|
+
---
|
|
344
|
+
apiVersion: rbac.authorization.k8s.io/v1
|
|
345
|
+
kind: ClusterRoleBinding
|
|
346
|
+
metadata:
|
|
347
|
+
name: gitlab-agent-binding
|
|
348
|
+
roleRef:
|
|
349
|
+
apiGroup: rbac.authorization.k8s.io
|
|
350
|
+
kind: ClusterRole
|
|
351
|
+
name: gitlab-agent-role
|
|
352
|
+
subjects:
|
|
353
|
+
- kind: ServiceAccount
|
|
354
|
+
name: gitlab-agent
|
|
355
|
+
namespace: gitlab-agent
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
```bash
|
|
359
|
+
kubectl apply -f cluster-role-expansion.yaml
|
|
360
|
+
|
|
361
|
+
# Update Helm to use custom role
|
|
362
|
+
helm upgrade gitlab-agent gitlab/gitlab-agent \
|
|
363
|
+
--namespace gitlab-agent \
|
|
364
|
+
--reuse-values \
|
|
365
|
+
--set rbac.useExistingRole=gitlab-agent-role
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
---
|
|
369
|
+
|
|
370
|
+
## Agent Version Issues
|
|
371
|
+
|
|
372
|
+
### "Agent version mismatch" Warning in UI
|
|
373
|
+
|
|
374
|
+
**Meaning:** Multiple agent pods with different versions, or version cache issue.
|
|
375
|
+
|
|
376
|
+
**Fixes:**
|
|
377
|
+
|
|
378
|
+
```bash
|
|
379
|
+
# 1. Check all agent pods have same version
|
|
380
|
+
kubectl get pods -n gitlab-agent -o jsonpath='{.items[*].spec.containers[*].image}'
|
|
381
|
+
|
|
382
|
+
# 2. Force single replica during upgrade
|
|
383
|
+
helm upgrade gitlab-agent gitlab/gitlab-agent \
|
|
384
|
+
--namespace gitlab-agent \
|
|
385
|
+
--reuse-values \
|
|
386
|
+
--set replicaCount=1 \
|
|
387
|
+
--set image.tag=v17.6.0
|
|
388
|
+
|
|
389
|
+
# 3. Wait for rollout
|
|
390
|
+
kubectl rollout status deployment/gitlab-agent -n gitlab-agent
|
|
391
|
+
|
|
392
|
+
# 4. Scale back up (if needed)
|
|
393
|
+
helm upgrade gitlab-agent gitlab/gitlab-agent \
|
|
394
|
+
--namespace gitlab-agent \
|
|
395
|
+
--reuse-values \
|
|
396
|
+
--set replicaCount=2
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
**Note:** Wait ~20 minutes for GitLab's agent version cache to update.
|
|
400
|
+
|
|
401
|
+
---
|
|
402
|
+
|
|
403
|
+
## Pod Issues
|
|
404
|
+
|
|
405
|
+
### CrashLoopBackOff
|
|
406
|
+
|
|
407
|
+
```bash
|
|
408
|
+
# Check events
|
|
409
|
+
kubectl describe pod -l=app.kubernetes.io/name=gitlab-agent -n gitlab-agent
|
|
410
|
+
|
|
411
|
+
# Check logs from crashed container
|
|
412
|
+
kubectl logs -l=app.kubernetes.io/name=gitlab-agent -n gitlab-agent --previous
|
|
413
|
+
|
|
414
|
+
# Common causes:
|
|
415
|
+
# - Invalid token (check token is correctly set)
|
|
416
|
+
# - Invalid KAS address
|
|
417
|
+
# - OOM (increase memory limits)
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
### OOMKilled
|
|
421
|
+
|
|
422
|
+
```bash
|
|
423
|
+
# Check current limits
|
|
424
|
+
kubectl get deployment gitlab-agent -n gitlab-agent -o jsonpath='{.spec.template.spec.containers[0].resources}'
|
|
425
|
+
|
|
426
|
+
# Increase limits
|
|
427
|
+
helm upgrade gitlab-agent gitlab/gitlab-agent \
|
|
428
|
+
--namespace gitlab-agent \
|
|
429
|
+
--reuse-values \
|
|
430
|
+
--set resources.limits.memory=512Mi \
|
|
431
|
+
--set resources.requests.memory=256Mi
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
---
|
|
435
|
+
|
|
436
|
+
## Flux/GitOps Issues
|
|
437
|
+
|
|
438
|
+
### Flux Not Syncing
|
|
439
|
+
|
|
440
|
+
```bash
|
|
441
|
+
# Check Flux sources
|
|
442
|
+
flux get sources git
|
|
443
|
+
|
|
444
|
+
# Check Flux kustomizations
|
|
445
|
+
flux get kustomization
|
|
446
|
+
|
|
447
|
+
# Force reconciliation
|
|
448
|
+
flux reconcile source git my-app --with-source
|
|
449
|
+
```
|
|
450
|
+
|
|
451
|
+
### "Unable to clone repository" in Flux
|
|
452
|
+
|
|
453
|
+
**Check GitLab secret:**
|
|
454
|
+
|
|
455
|
+
```bash
|
|
456
|
+
kubectl get secret gitlab-token -n flux-system -o yaml
|
|
457
|
+
```
|
|
458
|
+
|
|
459
|
+
**Recreate if needed:**
|
|
460
|
+
|
|
461
|
+
```bash
|
|
462
|
+
kubectl delete secret gitlab-token -n flux-system
|
|
463
|
+
kubectl create secret generic gitlab-token \
|
|
464
|
+
--namespace=flux-system \
|
|
465
|
+
--from-literal=username=git \
|
|
466
|
+
--from-literal=password=${GITLAB_TOKEN}
|
|
467
|
+
```
|
|
468
|
+
|
|
469
|
+
---
|
|
470
|
+
|
|
471
|
+
## Diagnostic Commands Summary
|
|
472
|
+
|
|
473
|
+
```bash
|
|
474
|
+
# Agent status
|
|
475
|
+
kubectl get pods -n gitlab-agent
|
|
476
|
+
kubectl logs -f -l=app.kubernetes.io/name=gitlab-agent -n gitlab-agent
|
|
477
|
+
kubectl describe pod -l=app.kubernetes.io/name=gitlab-agent -n gitlab-agent
|
|
478
|
+
|
|
479
|
+
# Helm status
|
|
480
|
+
helm list -n gitlab-agent
|
|
481
|
+
helm get values gitlab-agent -n gitlab-agent
|
|
482
|
+
helm history gitlab-agent -n gitlab-agent
|
|
483
|
+
|
|
484
|
+
# API checks
|
|
485
|
+
curl --header "PRIVATE-TOKEN: ${GITLAB_TOKEN}" \
|
|
486
|
+
"https://${GITLAB_HOST}/api/v4/projects/${PROJECT_ID}/cluster_agents" | jq
|
|
487
|
+
curl --header "PRIVATE-TOKEN: ${GITLAB_TOKEN}" \
|
|
488
|
+
"https://${GITLAB_HOST}/api/v4/projects/${PROJECT_ID}/cluster_agents/${AGENT_ID}/tokens" | jq
|
|
489
|
+
|
|
490
|
+
# Network tests
|
|
491
|
+
kubectl run curl-test --rm -it --restart=Never --image=curlimages/curl -- \
|
|
492
|
+
curl -v "https://${GITLAB_HOST}/-/kubernetes-agent/"
|
|
493
|
+
|
|
494
|
+
# Config verification
|
|
495
|
+
kubectl get configmap -l=app=gitlab-agent -n gitlab-agent -o yaml
|
|
496
|
+
kubectl get secret -n gitlab-agent -o yaml
|
|
497
|
+
```
|
|
498
|
+
|
|
499
|
+
---
|
|
500
|
+
|
|
501
|
+
## When to Escalate to GitLab Admin
|
|
502
|
+
|
|
503
|
+
Escalate these issues — they require GitLab server access:
|
|
504
|
+
|
|
505
|
+
| Issue | Why Admin Needed |
|
|
506
|
+
| ----------------------------------- | ---------------------------- |
|
|
507
|
+
| KAS not responding at all | Server-side service issue |
|
|
508
|
+
| "KAS internal error" in agent logs | Server-side configuration |
|
|
509
|
+
| GitLab upgrade broke agents | Server version change |
|
|
510
|
+
| Need higher token limits | License/configuration change |
|
|
511
|
+
| Certificate issues at GitLab server | Server TLS configuration |
|
|
512
|
+
|
|
513
|
+
**Information to provide:**
|
|
514
|
+
|
|
515
|
+
1. Agent logs: `kubectl logs -l=app.kubernetes.io/name=gitlab-agent -n gitlab-agent --tail=100`
|
|
516
|
+
2. Agent version: `kubectl get deployment gitlab-agent -n gitlab-agent -o jsonpath='{.spec.template.spec.containers[0].image}'`
|
|
517
|
+
3. Error messages (exact JSON from logs)
|
|
518
|
+
4. Project path and agent name
|