zoe-agent 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +154 -0
- package/LICENSE +96 -0
- package/README.md +568 -0
- package/dist/adapters/cli/agent.d.ts +59 -0
- package/dist/adapters/cli/agent.js +232 -0
- package/dist/adapters/cli/bootstrap.d.ts +25 -0
- package/dist/adapters/cli/bootstrap.js +204 -0
- package/dist/adapters/cli/commands/build-registry.d.ts +14 -0
- package/dist/adapters/cli/commands/build-registry.js +88 -0
- package/dist/adapters/cli/commands/clear.d.ts +7 -0
- package/dist/adapters/cli/commands/clear.js +10 -0
- package/dist/adapters/cli/commands/compact.d.ts +13 -0
- package/dist/adapters/cli/commands/compact.js +96 -0
- package/dist/adapters/cli/commands/exit.d.ts +7 -0
- package/dist/adapters/cli/commands/exit.js +9 -0
- package/dist/adapters/cli/commands/gateway.d.ts +7 -0
- package/dist/adapters/cli/commands/gateway.js +152 -0
- package/dist/adapters/cli/commands/help.d.ts +9 -0
- package/dist/adapters/cli/commands/help.js +12 -0
- package/dist/adapters/cli/commands/models.d.ts +10 -0
- package/dist/adapters/cli/commands/models.js +32 -0
- package/dist/adapters/cli/commands/registry.d.ts +70 -0
- package/dist/adapters/cli/commands/registry.js +111 -0
- package/dist/adapters/cli/commands/settings-utils.d.ts +38 -0
- package/dist/adapters/cli/commands/settings-utils.js +182 -0
- package/dist/adapters/cli/commands/settings.d.ts +9 -0
- package/dist/adapters/cli/commands/settings.js +395 -0
- package/dist/adapters/cli/commands/skills.d.ts +7 -0
- package/dist/adapters/cli/commands/skills.js +21 -0
- package/dist/adapters/cli/config-loader.d.ts +27 -0
- package/dist/adapters/cli/config-loader.js +48 -0
- package/dist/adapters/cli/docker-utils.d.ts +37 -0
- package/dist/adapters/cli/docker-utils.js +90 -0
- package/dist/adapters/cli/index.d.ts +2 -0
- package/dist/adapters/cli/index.js +88 -0
- package/dist/adapters/cli/repl.d.ts +22 -0
- package/dist/adapters/cli/repl.js +256 -0
- package/dist/adapters/cli/setup.d.ts +19 -0
- package/dist/adapters/cli/setup.js +613 -0
- package/dist/adapters/cli/system-prompts.d.ts +56 -0
- package/dist/adapters/cli/system-prompts.js +131 -0
- package/dist/adapters/cli/tui/app.d.ts +58 -0
- package/dist/adapters/cli/tui/app.js +314 -0
- package/dist/adapters/cli/tui/components/assistant-message.d.ts +5 -0
- package/dist/adapters/cli/tui/components/assistant-message.js +9 -0
- package/dist/adapters/cli/tui/components/autocomplete.d.ts +19 -0
- package/dist/adapters/cli/tui/components/autocomplete.js +75 -0
- package/dist/adapters/cli/tui/components/command-palette.d.ts +15 -0
- package/dist/adapters/cli/tui/components/command-palette.js +50 -0
- package/dist/adapters/cli/tui/components/diff-viewer.d.ts +5 -0
- package/dist/adapters/cli/tui/components/diff-viewer.js +109 -0
- package/dist/adapters/cli/tui/components/error-message.d.ts +5 -0
- package/dist/adapters/cli/tui/components/error-message.js +8 -0
- package/dist/adapters/cli/tui/components/footer.d.ts +20 -0
- package/dist/adapters/cli/tui/components/footer.js +19 -0
- package/dist/adapters/cli/tui/components/goal-status.d.ts +12 -0
- package/dist/adapters/cli/tui/components/goal-status.js +22 -0
- package/dist/adapters/cli/tui/components/info-message.d.ts +5 -0
- package/dist/adapters/cli/tui/components/info-message.js +8 -0
- package/dist/adapters/cli/tui/components/logo-banner.d.ts +7 -0
- package/dist/adapters/cli/tui/components/logo-banner.js +33 -0
- package/dist/adapters/cli/tui/components/markdown.d.ts +9 -0
- package/dist/adapters/cli/tui/components/markdown.js +92 -0
- package/dist/adapters/cli/tui/components/message-area.d.ts +19 -0
- package/dist/adapters/cli/tui/components/message-area.js +55 -0
- package/dist/adapters/cli/tui/components/permission-prompt.d.ts +13 -0
- package/dist/adapters/cli/tui/components/permission-prompt.js +32 -0
- package/dist/adapters/cli/tui/components/prompt-area.d.ts +22 -0
- package/dist/adapters/cli/tui/components/prompt-area.js +68 -0
- package/dist/adapters/cli/tui/components/text-input.d.ts +27 -0
- package/dist/adapters/cli/tui/components/text-input.js +142 -0
- package/dist/adapters/cli/tui/components/tool-call-block.d.ts +11 -0
- package/dist/adapters/cli/tui/components/tool-call-block.js +68 -0
- package/dist/adapters/cli/tui/components/user-message.d.ts +5 -0
- package/dist/adapters/cli/tui/components/user-message.js +8 -0
- package/dist/adapters/cli/tui/diff/file-write-meta.d.ts +11 -0
- package/dist/adapters/cli/tui/diff/file-write-meta.js +11 -0
- package/dist/adapters/cli/tui/diff/line-diff.d.ts +17 -0
- package/dist/adapters/cli/tui/diff/line-diff.js +44 -0
- package/dist/adapters/cli/tui/feed-serializer.d.ts +29 -0
- package/dist/adapters/cli/tui/feed-serializer.js +70 -0
- package/dist/adapters/cli/tui/file-index.d.ts +8 -0
- package/dist/adapters/cli/tui/file-index.js +41 -0
- package/dist/adapters/cli/tui/hooks/use-agent.d.ts +54 -0
- package/dist/adapters/cli/tui/hooks/use-agent.js +177 -0
- package/dist/adapters/cli/tui/hooks/use-feed.d.ts +16 -0
- package/dist/adapters/cli/tui/hooks/use-feed.js +25 -0
- package/dist/adapters/cli/tui/hooks/use-file-watcher.d.ts +10 -0
- package/dist/adapters/cli/tui/hooks/use-file-watcher.js +43 -0
- package/dist/adapters/cli/tui/hooks/use-keybindings.d.ts +16 -0
- package/dist/adapters/cli/tui/hooks/use-keybindings.js +25 -0
- package/dist/adapters/cli/tui/hooks/use-theme.d.ts +8 -0
- package/dist/adapters/cli/tui/hooks/use-theme.js +12 -0
- package/dist/adapters/cli/tui/index.d.ts +19 -0
- package/dist/adapters/cli/tui/index.js +206 -0
- package/dist/adapters/cli/tui/ink-reset.d.ts +29 -0
- package/dist/adapters/cli/tui/ink-reset.js +57 -0
- package/dist/adapters/cli/tui/layout.d.ts +15 -0
- package/dist/adapters/cli/tui/layout.js +15 -0
- package/dist/adapters/cli/tui/logo/gradient.d.ts +11 -0
- package/dist/adapters/cli/tui/logo/gradient.js +31 -0
- package/dist/adapters/cli/tui/overlays/help-dialog.d.ts +4 -0
- package/dist/adapters/cli/tui/overlays/help-dialog.js +26 -0
- package/dist/adapters/cli/tui/overlays/model-selector.d.ts +14 -0
- package/dist/adapters/cli/tui/overlays/model-selector.js +43 -0
- package/dist/adapters/cli/tui/overlays/session-selector.d.ts +35 -0
- package/dist/adapters/cli/tui/overlays/session-selector.js +162 -0
- package/dist/adapters/cli/tui/overlays/settings-overlay.d.ts +24 -0
- package/dist/adapters/cli/tui/overlays/settings-overlay.js +126 -0
- package/dist/adapters/cli/tui/session-export.d.ts +21 -0
- package/dist/adapters/cli/tui/session-export.js +63 -0
- package/dist/adapters/cli/tui/theme.d.ts +23 -0
- package/dist/adapters/cli/tui/theme.js +22 -0
- package/dist/adapters/cli/tui/types.d.ts +52 -0
- package/dist/adapters/cli/tui/types.js +12 -0
- package/dist/adapters/sdk/agent.d.ts +20 -0
- package/dist/adapters/sdk/agent.js +356 -0
- package/dist/adapters/sdk/http.d.ts +43 -0
- package/dist/adapters/sdk/http.js +61 -0
- package/dist/adapters/sdk/index.d.ts +58 -0
- package/dist/adapters/sdk/index.js +209 -0
- package/dist/adapters/sdk/settings.d.ts +18 -0
- package/dist/adapters/sdk/settings.js +57 -0
- package/dist/adapters/sdk/tools.d.ts +7 -0
- package/dist/adapters/sdk/tools.js +13 -0
- package/dist/adapters/server/auth.d.ts +53 -0
- package/dist/adapters/server/auth.js +168 -0
- package/dist/adapters/server/index.d.ts +40 -0
- package/dist/adapters/server/index.js +255 -0
- package/dist/adapters/server/rest-gateway.d.ts +13 -0
- package/dist/adapters/server/rest-gateway.js +218 -0
- package/dist/adapters/server/rest.d.ts +37 -0
- package/dist/adapters/server/rest.js +341 -0
- package/dist/adapters/server/server-core.d.ts +55 -0
- package/dist/adapters/server/server-core.js +121 -0
- package/dist/adapters/server/session-store.d.ts +81 -0
- package/dist/adapters/server/session-store.js +272 -0
- package/dist/adapters/server/settings-handlers.d.ts +24 -0
- package/dist/adapters/server/settings-handlers.js +360 -0
- package/dist/adapters/server/standalone.d.ts +19 -0
- package/dist/adapters/server/standalone.js +113 -0
- package/dist/adapters/server/websocket.d.ts +26 -0
- package/dist/adapters/server/websocket.js +68 -0
- package/dist/adapters/server/ws-handlers.d.ts +32 -0
- package/dist/adapters/server/ws-handlers.js +523 -0
- package/dist/adapters/server/ws-types.d.ts +304 -0
- package/dist/adapters/server/ws-types.js +7 -0
- package/dist/core/agent-loop.d.ts +68 -0
- package/dist/core/agent-loop.js +423 -0
- package/dist/core/config.d.ts +115 -0
- package/dist/core/config.js +189 -0
- package/dist/core/errors.d.ts +58 -0
- package/dist/core/errors.js +88 -0
- package/dist/core/hooks.d.ts +35 -0
- package/dist/core/hooks.js +49 -0
- package/dist/core/index.d.ts +23 -0
- package/dist/core/index.js +29 -0
- package/dist/core/message-convert.d.ts +41 -0
- package/dist/core/message-convert.js +94 -0
- package/dist/core/middleware/auth.d.ts +24 -0
- package/dist/core/middleware/auth.js +28 -0
- package/dist/core/middleware/logging.d.ts +23 -0
- package/dist/core/middleware/logging.js +28 -0
- package/dist/core/middleware/rate-limit.d.ts +27 -0
- package/dist/core/middleware/rate-limit.js +38 -0
- package/dist/core/middleware/semantic-tools.d.ts +10 -0
- package/dist/core/middleware/semantic-tools.js +43 -0
- package/dist/core/middleware.d.ts +48 -0
- package/dist/core/middleware.js +38 -0
- package/dist/core/permission.d.ts +25 -0
- package/dist/core/permission.js +50 -0
- package/dist/core/provider-config.d.ts +129 -0
- package/dist/core/provider-config.js +273 -0
- package/dist/core/provider-env.d.ts +39 -0
- package/dist/core/provider-env.js +142 -0
- package/dist/core/provider-resolver.d.ts +12 -0
- package/dist/core/provider-resolver.js +12 -0
- package/dist/core/session-store.d.ts +75 -0
- package/dist/core/session-store.js +245 -0
- package/dist/core/settings-manager.d.ts +57 -0
- package/dist/core/settings-manager.js +359 -0
- package/dist/core/settings-schema.d.ts +38 -0
- package/dist/core/settings-schema.js +171 -0
- package/dist/core/skill-catalog.d.ts +6 -0
- package/dist/core/skill-catalog.js +17 -0
- package/dist/core/skill-invoker.d.ts +127 -0
- package/dist/core/skill-invoker.js +182 -0
- package/dist/core/stream-accumulator.d.ts +21 -0
- package/dist/core/stream-accumulator.js +51 -0
- package/dist/core/stream-manager.d.ts +58 -0
- package/dist/core/stream-manager.js +212 -0
- package/dist/core/tool-executor.d.ts +84 -0
- package/dist/core/tool-executor.js +256 -0
- package/dist/core/types.d.ts +259 -0
- package/dist/core/types.js +11 -0
- package/dist/gateway/gateway.d.ts +52 -0
- package/dist/gateway/gateway.js +537 -0
- package/dist/gateway/index.d.ts +21 -0
- package/dist/gateway/index.js +31 -0
- package/dist/gateway/openapi-importer.d.ts +15 -0
- package/dist/gateway/openapi-importer.js +66 -0
- package/dist/gateway/semantic-scorer.d.ts +7 -0
- package/dist/gateway/semantic-scorer.js +24 -0
- package/dist/gateway/settings-adapter.d.ts +49 -0
- package/dist/gateway/settings-adapter.js +137 -0
- package/dist/gateway/tool-factory.d.ts +9 -0
- package/dist/gateway/tool-factory.js +414 -0
- package/dist/gateway/types.d.ts +68 -0
- package/dist/gateway/types.js +7 -0
- package/dist/models-catalog.js +46 -0
- package/dist/providers/anthropic.d.ts +22 -0
- package/dist/providers/anthropic.js +148 -0
- package/dist/providers/factory.d.ts +10 -0
- package/dist/providers/factory.js +25 -0
- package/dist/providers/openai.d.ts +15 -0
- package/dist/providers/openai.js +71 -0
- package/dist/providers/types.d.ts +48 -0
- package/dist/providers/types.js +1 -0
- package/dist/skills/args.d.ts +37 -0
- package/dist/skills/args.js +99 -0
- package/dist/skills/index.d.ts +11 -0
- package/dist/skills/index.js +23 -0
- package/dist/skills/loader.d.ts +3 -0
- package/dist/skills/loader.js +59 -0
- package/dist/skills/parser.d.ts +7 -0
- package/dist/skills/parser.js +152 -0
- package/dist/skills/registry.d.ts +13 -0
- package/dist/skills/registry.js +74 -0
- package/dist/skills/resolver.d.ts +19 -0
- package/dist/skills/resolver.js +116 -0
- package/dist/skills/types.d.ts +74 -0
- package/dist/skills/types.js +50 -0
- package/dist/tools/browser.d.ts +2 -0
- package/dist/tools/browser.js +68 -0
- package/dist/tools/core.d.ts +20 -0
- package/dist/tools/core.js +244 -0
- package/dist/tools/email.d.ts +2 -0
- package/dist/tools/email.js +61 -0
- package/dist/tools/image.d.ts +2 -0
- package/dist/tools/image.js +257 -0
- package/dist/tools/index.d.ts +2 -0
- package/dist/tools/index.js +88 -0
- package/dist/tools/interface.d.ts +22 -0
- package/dist/tools/interface.js +1 -0
- package/dist/tools/notify.d.ts +2 -0
- package/dist/tools/notify.js +100 -0
- package/dist/tools/prompt-optimizer.d.ts +2 -0
- package/dist/tools/prompt-optimizer.js +65 -0
- package/dist/tools/screenshot.d.ts +2 -0
- package/dist/tools/screenshot.js +184 -0
- package/dist/tools/search.d.ts +2 -0
- package/dist/tools/search.js +78 -0
- package/dist/tools/todos.d.ts +10 -0
- package/dist/tools/todos.js +50 -0
- package/package.json +119 -0
- package/skills/docker-ops/SKILL.md +329 -0
- package/skills/k8s-deploy/SKILL.md +397 -0
- package/skills/log-analyzer/SKILL.md +331 -0
- package/skills/speckit-analyze/SKILL.md +260 -0
- package/skills/speckit-checklist/SKILL.md +374 -0
- package/skills/speckit-clarify/SKILL.md +286 -0
- package/skills/speckit-constitution/SKILL.md +157 -0
- package/skills/speckit-implement/SKILL.md +224 -0
- package/skills/speckit-plan/SKILL.md +171 -0
- package/skills/speckit-specify/SKILL.md +346 -0
- package/skills/speckit-tasks/SKILL.md +215 -0
- package/skills/speckit-taskstoissues/SKILL.md +107 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: docker-ops
|
|
3
|
+
description: >
|
|
4
|
+
Docker container operations for production and development environments.
|
|
5
|
+
Activates when the user asks to build, run, stop, or remove Docker containers;
|
|
6
|
+
manage Docker images; write or debug Dockerfiles or docker-compose.yml files;
|
|
7
|
+
troubleshoot container failures, networking, or volumes; inspect container logs
|
|
8
|
+
or resource usage; push or pull images from registries; prune unused Docker
|
|
9
|
+
resources; set up multi-stage builds; configure container health checks;
|
|
10
|
+
manage Docker networks and persistent storage; run docker-compose up, down,
|
|
11
|
+
or ps; debug "container won't start", "image build fails", "port already in
|
|
12
|
+
use", "volume mount not working", or "container keeps restarting" issues.
|
|
13
|
+
Also triggers on mentions of Docker Swarm, Docker secret management, or
|
|
14
|
+
container orchestration at the single-host level.
|
|
15
|
+
version: 1.0.0
|
|
16
|
+
tags:
|
|
17
|
+
- docker
|
|
18
|
+
- containers
|
|
19
|
+
- devops
|
|
20
|
+
- docker-compose
|
|
21
|
+
- dockerfile
|
|
22
|
+
- containerization
|
|
23
|
+
- images
|
|
24
|
+
- volumes
|
|
25
|
+
- networking
|
|
26
|
+
allowedTools:
|
|
27
|
+
- execute_shell_command
|
|
28
|
+
- read_file
|
|
29
|
+
- write_file
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
# Docker Operations Skill
|
|
33
|
+
|
|
34
|
+
This skill provides procedures for building, running, managing, and troubleshooting
|
|
35
|
+
Docker containers and Docker Compose stacks. It assumes Docker is available in the
|
|
36
|
+
execution environment and the agent has shell access.
|
|
37
|
+
|
|
38
|
+
## Constraints
|
|
39
|
+
|
|
40
|
+
- **Never use `:latest` tag in production** -- always pin a specific version or digest.
|
|
41
|
+
- **Never run containers as root in production** -- add `USER` directive to Dockerfiles.
|
|
42
|
+
- **Never store secrets in Dockerfiles or compose files** -- use Docker secrets, environment
|
|
43
|
+
variables passed at runtime, or external secret managers.
|
|
44
|
+
- **Never use `docker system prune -f` without user confirmation** -- it deletes all unused
|
|
45
|
+
data indiscriminately.
|
|
46
|
+
- **Always use `--rm` for ephemeral containers** to prevent accumulation.
|
|
47
|
+
- **Never expose the Docker daemon socket (`/var/run/docker.sock`) to containers** unless
|
|
48
|
+
explicitly requested by the user.
|
|
49
|
+
|
|
50
|
+
## Building Images
|
|
51
|
+
|
|
52
|
+
### Standard Dockerfile Structure
|
|
53
|
+
|
|
54
|
+
Follow this order for cache efficiency:
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
# 1. Base image (pinned version)
|
|
58
|
+
FROM node:20.11-alpine AS base
|
|
59
|
+
|
|
60
|
+
# 2. System dependencies (rarely change)
|
|
61
|
+
RUN apk add --no-cache curl
|
|
62
|
+
|
|
63
|
+
# 3. Workdir
|
|
64
|
+
WORKDIR /app
|
|
65
|
+
|
|
66
|
+
# 4. Copy dependency manifests first (cache layer)
|
|
67
|
+
COPY package.json pnpm-lock.yaml ./
|
|
68
|
+
|
|
69
|
+
# 5. Install dependencies
|
|
70
|
+
RUN corepack enable && pnpm install --frozen-lockfile --prod
|
|
71
|
+
|
|
72
|
+
# 6. Copy application code
|
|
73
|
+
COPY . .
|
|
74
|
+
|
|
75
|
+
# 7. Non-root user
|
|
76
|
+
USER node
|
|
77
|
+
|
|
78
|
+
# 8. Health check
|
|
79
|
+
HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
|
|
80
|
+
CMD curl -f http://localhost:3000/health || exit 1
|
|
81
|
+
|
|
82
|
+
# 9. Expose port and entrypoint
|
|
83
|
+
EXPOSE 3000
|
|
84
|
+
CMD ["node", "dist/index.js"]
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Multi-Stage Builds
|
|
88
|
+
|
|
89
|
+
Use multi-stage builds to keep final images small. A typical pattern:
|
|
90
|
+
|
|
91
|
+
1. **Build stage**: Full SDK, compile source, run tests.
|
|
92
|
+
2. **Runtime stage**: Minimal base image, copy only compiled output and production deps.
|
|
93
|
+
|
|
94
|
+
```dockerfile
|
|
95
|
+
FROM node:20.11-alpine AS builder
|
|
96
|
+
WORKDIR /app
|
|
97
|
+
COPY package.json pnpm-lock.yaml ./
|
|
98
|
+
RUN corepack enable && pnpm install --frozen-lockfile
|
|
99
|
+
COPY . .
|
|
100
|
+
RUN pnpm run build
|
|
101
|
+
|
|
102
|
+
FROM node:20.11-alpine AS runtime
|
|
103
|
+
WORKDIR /app
|
|
104
|
+
COPY package.json pnpm-lock.yaml ./
|
|
105
|
+
RUN corepack enable && pnpm install --frozen-lockfile --prod
|
|
106
|
+
COPY --from=builder /app/dist ./dist
|
|
107
|
+
USER node
|
|
108
|
+
CMD ["node", "dist/index.js"]
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Build Commands
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
# Build with explicit tag and no cache
|
|
115
|
+
docker build --no-cache -t myapp:1.2.0 .
|
|
116
|
+
|
|
117
|
+
# Build with build arguments
|
|
118
|
+
docker build --build-arg NODE_ENV=production -t myapp:1.2.0 .
|
|
119
|
+
|
|
120
|
+
# Build for multiple platforms
|
|
121
|
+
docker buildx build --platform linux/amd64,linux/arm64 -t myapp:1.2.0 .
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Running Containers
|
|
125
|
+
|
|
126
|
+
### Common Run Patterns
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
# Run detached with auto-restart and resource limits
|
|
130
|
+
docker run -d \
|
|
131
|
+
--name myapp \
|
|
132
|
+
--restart unless-stopped \
|
|
133
|
+
--memory 512m --cpus 1.0 \
|
|
134
|
+
-p 3000:3000 \
|
|
135
|
+
-e NODE_ENV=production \
|
|
136
|
+
-v myapp-data:/app/data \
|
|
137
|
+
myapp:1.2.0
|
|
138
|
+
|
|
139
|
+
# Run interactively for debugging
|
|
140
|
+
docker run -it --rm \
|
|
141
|
+
-v $(pwd):/app \
|
|
142
|
+
--entrypoint /bin/sh \
|
|
143
|
+
myapp:1.2.0
|
|
144
|
+
|
|
145
|
+
# Run with health check override
|
|
146
|
+
docker run -d \
|
|
147
|
+
--name myapp \
|
|
148
|
+
--health-cmd "curl -f http://localhost:3000/health" \
|
|
149
|
+
--health-interval 10s \
|
|
150
|
+
--health-timeout 3s \
|
|
151
|
+
myapp:1.2.0
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Inspecting Running Containers
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
# Check resource usage (all running containers)
|
|
158
|
+
docker stats --no-stream
|
|
159
|
+
|
|
160
|
+
# Detailed container info (IP, mounts, env vars)
|
|
161
|
+
docker inspect myapp --format '{{ .NetworkSettings.IPAddress }}'
|
|
162
|
+
|
|
163
|
+
# Follow logs with timestamps
|
|
164
|
+
docker logs -f --since 1h --timestamps myapp
|
|
165
|
+
|
|
166
|
+
# List containers with size
|
|
167
|
+
docker ps -s
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Docker Compose
|
|
171
|
+
|
|
172
|
+
### Compose File Best Practices
|
|
173
|
+
|
|
174
|
+
```yaml
|
|
175
|
+
services:
|
|
176
|
+
app:
|
|
177
|
+
build:
|
|
178
|
+
context: .
|
|
179
|
+
dockerfile: Dockerfile
|
|
180
|
+
args:
|
|
181
|
+
NODE_ENV: production
|
|
182
|
+
image: myapp:1.2.0
|
|
183
|
+
restart: unless-stopped
|
|
184
|
+
ports:
|
|
185
|
+
- "3000:3000"
|
|
186
|
+
environment:
|
|
187
|
+
NODE_ENV: production
|
|
188
|
+
DATABASE_URL: ${DATABASE_URL}
|
|
189
|
+
volumes:
|
|
190
|
+
- app-data:/app/data
|
|
191
|
+
depends_on:
|
|
192
|
+
db:
|
|
193
|
+
condition: service_healthy
|
|
194
|
+
deploy:
|
|
195
|
+
resources:
|
|
196
|
+
limits:
|
|
197
|
+
memory: 512M
|
|
198
|
+
cpus: "1.0"
|
|
199
|
+
healthcheck:
|
|
200
|
+
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
|
|
201
|
+
interval: 30s
|
|
202
|
+
timeout: 5s
|
|
203
|
+
retries: 3
|
|
204
|
+
|
|
205
|
+
db:
|
|
206
|
+
image: postgres:16.2-alpine
|
|
207
|
+
restart: unless-stopped
|
|
208
|
+
environment:
|
|
209
|
+
POSTGRES_DB: myapp
|
|
210
|
+
POSTGRES_USER: ${DB_USER}
|
|
211
|
+
POSTGRES_PASSWORD: ${DB_PASSWORD}
|
|
212
|
+
volumes:
|
|
213
|
+
- db-data:/var/lib/postgresql/data
|
|
214
|
+
healthcheck:
|
|
215
|
+
test: ["CMD-SHELL", "pg_isready -U $$POSTGRES_USER"]
|
|
216
|
+
interval: 10s
|
|
217
|
+
timeout: 5s
|
|
218
|
+
retries: 5
|
|
219
|
+
|
|
220
|
+
volumes:
|
|
221
|
+
app-data:
|
|
222
|
+
db-data:
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Compose Commands
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
# Start stack in detached mode
|
|
229
|
+
docker compose up -d
|
|
230
|
+
|
|
231
|
+
# Rebuild and restart a single service
|
|
232
|
+
docker compose up -d --build app
|
|
233
|
+
|
|
234
|
+
# View logs for a specific service
|
|
235
|
+
docker compose logs -f app
|
|
236
|
+
|
|
237
|
+
# Scale a service
|
|
238
|
+
docker compose up -d --scale worker=3
|
|
239
|
+
|
|
240
|
+
# Tear down with volumes
|
|
241
|
+
docker compose down -v
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
## Troubleshooting Decision Tree
|
|
245
|
+
|
|
246
|
+
### Container exits immediately
|
|
247
|
+
|
|
248
|
+
1. Check exit code: `docker inspect myapp --format '{{ .State.ExitCode }}'`
|
|
249
|
+
2. Exit code 0: Process completed -- likely a missing foreground process or CMD issue.
|
|
250
|
+
3. Exit code 1: Application error -- check logs with `docker logs myapp`.
|
|
251
|
+
4. Exit code 137: OOM killed -- increase memory limit or investigate memory leaks.
|
|
252
|
+
5. Exit code 139: Segfault -- check base image compatibility (glibc vs musl).
|
|
253
|
+
|
|
254
|
+
### Container won't start / keeps restarting
|
|
255
|
+
|
|
256
|
+
1. `docker logs --tail 50 myapp` -- look for error messages.
|
|
257
|
+
2. `docker inspect myapp --format '{{ json .State }}'` -- check restart count and status.
|
|
258
|
+
3. Verify port is not already in use: `ss -tlnp | grep <port>`.
|
|
259
|
+
4. Verify volume mounts exist and have correct permissions.
|
|
260
|
+
5. Check if health check is failing too aggressively -- widen intervals.
|
|
261
|
+
|
|
262
|
+
### Networking issues
|
|
263
|
+
|
|
264
|
+
1. List networks: `docker network ls`
|
|
265
|
+
2. Inspect a network: `docker network inspect bridge`
|
|
266
|
+
3. Test connectivity from inside a container: `docker exec myapp ping db`
|
|
267
|
+
4. DNS resolution in Docker: containers on the same custom network resolve by
|
|
268
|
+
service name automatically.
|
|
269
|
+
5. Port mapping issues: use `docker port myapp` to verify published ports.
|
|
270
|
+
|
|
271
|
+
### Volume mount problems
|
|
272
|
+
|
|
273
|
+
1. Check mount points: `docker inspect myapp --format '{{ json .Mounts }}'`
|
|
274
|
+
2. File permissions: the host directory's UID/GID must match what the container
|
|
275
|
+
process expects. This is common with Alpine images running as non-root users.
|
|
276
|
+
3. SELinux (on RHEL/CentOS): append `:z` or `:Z` to volume mounts.
|
|
277
|
+
4. Named volumes vs bind mounts: named volumes are managed by Docker, bind mounts
|
|
278
|
+
map directly to host paths.
|
|
279
|
+
|
|
280
|
+
## Image Management
|
|
281
|
+
|
|
282
|
+
```bash
|
|
283
|
+
# List images with dangling filter
|
|
284
|
+
docker images -f dangling=true
|
|
285
|
+
|
|
286
|
+
# Remove dangling images
|
|
287
|
+
docker image prune
|
|
288
|
+
|
|
289
|
+
# Tag and push to registry
|
|
290
|
+
docker tag myapp:1.2.0 registry.example.com/myapp:1.2.0
|
|
291
|
+
docker push registry.example.com/myapp:1.2.0
|
|
292
|
+
|
|
293
|
+
# Inspect image layers
|
|
294
|
+
docker history myapp:1.2.0 --no-trunc
|
|
295
|
+
|
|
296
|
+
# Save/load image for air-gapped transfer
|
|
297
|
+
docker save myapp:1.2.0 | gzip > myapp-1.2.0.tar.gz
|
|
298
|
+
docker load < myapp-1.2.0.tar.gz
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
## Cleanup Procedures
|
|
302
|
+
|
|
303
|
+
```bash
|
|
304
|
+
# Remove stopped containers
|
|
305
|
+
docker container prune
|
|
306
|
+
|
|
307
|
+
# Remove unused images
|
|
308
|
+
docker image prune -a --filter "until=168h"
|
|
309
|
+
|
|
310
|
+
# Remove unused volumes (CAUTION: data loss)
|
|
311
|
+
docker volume prune
|
|
312
|
+
|
|
313
|
+
# Full cleanup (ask user first)
|
|
314
|
+
docker system prune --volumes
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
## Registry Operations
|
|
318
|
+
|
|
319
|
+
```bash
|
|
320
|
+
# Login to a private registry
|
|
321
|
+
docker login registry.example.com
|
|
322
|
+
|
|
323
|
+
# List tags in a registry (via API)
|
|
324
|
+
curl -s -u "$USER:$PASS" https://registry.example.com/v2/myapp/tags/list | jq .
|
|
325
|
+
|
|
326
|
+
# Remove a manifest by digest (garbage collection required on registry)
|
|
327
|
+
curl -s -u "$USER:$PASS" -X DELETE \
|
|
328
|
+
https://registry.example.com/v2/myapp/manifests/$DIGEST
|
|
329
|
+
```
|
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: k8s-deploy
|
|
3
|
+
description: >
|
|
4
|
+
Kubernetes deployment operations using kubectl and helm. Activates when the user
|
|
5
|
+
asks to deploy applications to Kubernetes clusters; create or update Deployments,
|
|
6
|
+
Services, ConfigMaps, Secrets, Ingress resources, or PersistentVolumeClaims;
|
|
7
|
+
manage rollouts, rollbacks, and scaling; troubleshoot pod failures, CrashLoopBackOff,
|
|
8
|
+
ImagePullBackOff, or pending pods; inspect cluster resources and events; work with
|
|
9
|
+
Namespaces, Labels, and Selectors; configure resource limits, requests, and
|
|
10
|
+
horizontal pod autoscalers (HPA); manage helm charts and releases; perform canary
|
|
11
|
+
or blue-green deployments; debug service discovery, DNS resolution, or network
|
|
12
|
+
policies; review or generate Kubernetes manifests; manage ServiceAccounts, RBAC,
|
|
13
|
+
and PodSecurityPolicies; handle node maintenance, cordoning, and draining; run
|
|
14
|
+
kubectl exec, logs, port-forward, or describe commands.
|
|
15
|
+
version: 1.0.0
|
|
16
|
+
tags:
|
|
17
|
+
- kubernetes
|
|
18
|
+
- k8s
|
|
19
|
+
- kubectl
|
|
20
|
+
- helm
|
|
21
|
+
- deployment
|
|
22
|
+
- pods
|
|
23
|
+
- services
|
|
24
|
+
- devops
|
|
25
|
+
- orchestration
|
|
26
|
+
- rollout
|
|
27
|
+
- rollback
|
|
28
|
+
allowedTools:
|
|
29
|
+
- execute_shell_command
|
|
30
|
+
- read_file
|
|
31
|
+
- write_file
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
# Kubernetes Deployment Skill
|
|
35
|
+
|
|
36
|
+
This skill covers deploying, managing, and troubleshooting applications on
|
|
37
|
+
Kubernetes clusters using kubectl and helm. It assumes kubectl is configured
|
|
38
|
+
with appropriate kubeconfig access and the agent has shell access.
|
|
39
|
+
|
|
40
|
+
## Constraints
|
|
41
|
+
|
|
42
|
+
- **Never use `:latest` tag in production Deployments** -- always pin a specific
|
|
43
|
+
container image tag or digest.
|
|
44
|
+
- **Never run pods as root in production** -- set `securityContext.runAsNonRoot: true`
|
|
45
|
+
and `runAsUser: 1000+`.
|
|
46
|
+
- **Always set resource requests and limits** -- unbounded pods can starve other
|
|
47
|
+
workloads and trigger node-level OOM.
|
|
48
|
+
- **Never expose the Kubernetes API server to the public internet** without
|
|
49
|
+
authentication.
|
|
50
|
+
- **Never delete Namespaces without user confirmation** -- cascading deletes are
|
|
51
|
+
irreversible.
|
|
52
|
+
- **Always use `--dry-run=client -o yaml` before applying** to preview manifests.
|
|
53
|
+
- **Prefer `kubectl apply` over `kubectl create`** for idempotent operations.
|
|
54
|
+
- **Never modify resources in `kube-system` namespace** unless explicitly asked.
|
|
55
|
+
|
|
56
|
+
## Pre-Flight Checks
|
|
57
|
+
|
|
58
|
+
Before any deployment, verify cluster access and context:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
# Verify connectivity
|
|
62
|
+
kubectl cluster-info
|
|
63
|
+
|
|
64
|
+
# Check current context (never deploy to the wrong cluster)
|
|
65
|
+
kubectl config current-context
|
|
66
|
+
|
|
67
|
+
# List available namespaces
|
|
68
|
+
kubectl get namespaces
|
|
69
|
+
|
|
70
|
+
# Check node health
|
|
71
|
+
kubectl get nodes -o wide
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
If the context points to a production cluster, **warn the user and confirm
|
|
75
|
+
before proceeding**.
|
|
76
|
+
|
|
77
|
+
## Deployment Manifests
|
|
78
|
+
|
|
79
|
+
### Standard Deployment Template
|
|
80
|
+
|
|
81
|
+
```yaml
|
|
82
|
+
apiVersion: apps/v1
|
|
83
|
+
kind: Deployment
|
|
84
|
+
metadata:
|
|
85
|
+
name: myapp
|
|
86
|
+
namespace: production
|
|
87
|
+
labels:
|
|
88
|
+
app: myapp
|
|
89
|
+
version: "1.2.0"
|
|
90
|
+
spec:
|
|
91
|
+
replicas: 3
|
|
92
|
+
strategy:
|
|
93
|
+
type: RollingUpdate
|
|
94
|
+
rollingUpdate:
|
|
95
|
+
maxSurge: 1
|
|
96
|
+
maxUnavailable: 0
|
|
97
|
+
selector:
|
|
98
|
+
matchLabels:
|
|
99
|
+
app: myapp
|
|
100
|
+
template:
|
|
101
|
+
metadata:
|
|
102
|
+
labels:
|
|
103
|
+
app: myapp
|
|
104
|
+
version: "1.2.0"
|
|
105
|
+
spec:
|
|
106
|
+
serviceAccountName: myapp
|
|
107
|
+
securityContext:
|
|
108
|
+
runAsNonRoot: true
|
|
109
|
+
runAsUser: 1000
|
|
110
|
+
fsGroup: 1000
|
|
111
|
+
containers:
|
|
112
|
+
- name: myapp
|
|
113
|
+
image: registry.example.com/myapp:1.2.0
|
|
114
|
+
ports:
|
|
115
|
+
- containerPort: 3000
|
|
116
|
+
env:
|
|
117
|
+
- name: NODE_ENV
|
|
118
|
+
value: "production"
|
|
119
|
+
- name: DATABASE_URL
|
|
120
|
+
valueFrom:
|
|
121
|
+
secretKeyRef:
|
|
122
|
+
name: myapp-secrets
|
|
123
|
+
key: database-url
|
|
124
|
+
resources:
|
|
125
|
+
requests:
|
|
126
|
+
memory: "128Mi"
|
|
127
|
+
cpu: "100m"
|
|
128
|
+
limits:
|
|
129
|
+
memory: "512Mi"
|
|
130
|
+
cpu: "500m"
|
|
131
|
+
readinessProbe:
|
|
132
|
+
httpGet:
|
|
133
|
+
path: /health
|
|
134
|
+
port: 3000
|
|
135
|
+
initialDelaySeconds: 5
|
|
136
|
+
periodSeconds: 10
|
|
137
|
+
livenessProbe:
|
|
138
|
+
httpGet:
|
|
139
|
+
path: /health
|
|
140
|
+
port: 3000
|
|
141
|
+
initialDelaySeconds: 15
|
|
142
|
+
periodSeconds: 20
|
|
143
|
+
volumeMounts:
|
|
144
|
+
- name: config
|
|
145
|
+
mountPath: /app/config
|
|
146
|
+
readOnly: true
|
|
147
|
+
volumes:
|
|
148
|
+
- name: config
|
|
149
|
+
configMap:
|
|
150
|
+
name: myapp-config
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Service Template
|
|
154
|
+
|
|
155
|
+
```yaml
|
|
156
|
+
apiVersion: v1
|
|
157
|
+
kind: Service
|
|
158
|
+
metadata:
|
|
159
|
+
name: myapp
|
|
160
|
+
namespace: production
|
|
161
|
+
spec:
|
|
162
|
+
selector:
|
|
163
|
+
app: myapp
|
|
164
|
+
ports:
|
|
165
|
+
- port: 80
|
|
166
|
+
targetPort: 3000
|
|
167
|
+
protocol: TCP
|
|
168
|
+
type: ClusterIP
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Ingress Template
|
|
172
|
+
|
|
173
|
+
```yaml
|
|
174
|
+
apiVersion: networking.k8s.io/v1
|
|
175
|
+
kind: Ingress
|
|
176
|
+
metadata:
|
|
177
|
+
name: myapp
|
|
178
|
+
namespace: production
|
|
179
|
+
annotations:
|
|
180
|
+
cert-manager.io/cluster-issuer: letsencrypt-prod
|
|
181
|
+
spec:
|
|
182
|
+
ingressClassName: nginx
|
|
183
|
+
tls:
|
|
184
|
+
- hosts:
|
|
185
|
+
- myapp.example.com
|
|
186
|
+
secretName: myapp-tls
|
|
187
|
+
rules:
|
|
188
|
+
- host: myapp.example.com
|
|
189
|
+
http:
|
|
190
|
+
paths:
|
|
191
|
+
- path: /
|
|
192
|
+
pathType: Prefix
|
|
193
|
+
backend:
|
|
194
|
+
service:
|
|
195
|
+
name: myapp
|
|
196
|
+
port:
|
|
197
|
+
number: 80
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Deployment Workflow
|
|
201
|
+
|
|
202
|
+
### 1. Deploy or Update an Application
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
# Apply manifests
|
|
206
|
+
kubectl apply -f deployment.yaml -f service.yaml -f ingress.yaml
|
|
207
|
+
|
|
208
|
+
# Check rollout status
|
|
209
|
+
kubectl rollout status deployment/myapp -n production
|
|
210
|
+
|
|
211
|
+
# Verify pods are running
|
|
212
|
+
kubectl get pods -n production -l app=myapp
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### 2. Rolling Update (Image Change)
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
# Update the image
|
|
219
|
+
kubectl set image deployment/myapp myapp=registry.example.com/myapp:1.3.0 \
|
|
220
|
+
-n production
|
|
221
|
+
|
|
222
|
+
# Monitor rollout
|
|
223
|
+
kubectl rollout status deployment/myapp -n production
|
|
224
|
+
|
|
225
|
+
# Check revision history
|
|
226
|
+
kubectl rollout history deployment/myapp -n production
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### 3. Rollback
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
# Check rollout history
|
|
233
|
+
kubectl rollout history deployment/myapp -n production
|
|
234
|
+
|
|
235
|
+
# Rollback to previous revision
|
|
236
|
+
kubectl rollout undo deployment/myapp -n production
|
|
237
|
+
|
|
238
|
+
# Rollback to a specific revision
|
|
239
|
+
kubectl rollout undo deployment/myapp --to-revision=2 -n production
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
### 4. Scale
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
# Scale manually
|
|
246
|
+
kubectl scale deployment/myapp --replicas=5 -n production
|
|
247
|
+
|
|
248
|
+
# Check HPA status
|
|
249
|
+
kubectl get hpa -n production
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
## Helm Operations
|
|
253
|
+
|
|
254
|
+
### Install and Upgrade
|
|
255
|
+
|
|
256
|
+
```bash
|
|
257
|
+
# Install a chart
|
|
258
|
+
helm install myapp ./charts/myapp -n production -f values-production.yaml
|
|
259
|
+
|
|
260
|
+
# Upgrade with new values or image tag
|
|
261
|
+
helm upgrade myapp ./charts/myapp -n production \
|
|
262
|
+
-f values-production.yaml \
|
|
263
|
+
--set image.tag=1.3.0
|
|
264
|
+
|
|
265
|
+
# Dry-run to preview changes
|
|
266
|
+
helm upgrade myapp ./charts/myapp -n production \
|
|
267
|
+
-f values-production.yaml \
|
|
268
|
+
--dry-run --debug
|
|
269
|
+
|
|
270
|
+
# Rollback a helm release
|
|
271
|
+
helm rollback myapp 2 -n production
|
|
272
|
+
|
|
273
|
+
# List releases
|
|
274
|
+
helm list -n production
|
|
275
|
+
|
|
276
|
+
# Show rendered templates without applying
|
|
277
|
+
helm template myapp ./charts/myapp -f values-production.yaml
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
### Values File Best Practices
|
|
281
|
+
|
|
282
|
+
```yaml
|
|
283
|
+
# values-production.yaml
|
|
284
|
+
replicaCount: 3
|
|
285
|
+
|
|
286
|
+
image:
|
|
287
|
+
repository: registry.example.com/myapp
|
|
288
|
+
tag: "1.2.0" # never "latest"
|
|
289
|
+
pullPolicy: IfNotPresent
|
|
290
|
+
|
|
291
|
+
resources:
|
|
292
|
+
requests:
|
|
293
|
+
memory: "128Mi"
|
|
294
|
+
cpu: "100m"
|
|
295
|
+
limits:
|
|
296
|
+
memory: "512Mi"
|
|
297
|
+
cpu: "500m"
|
|
298
|
+
|
|
299
|
+
autoscaling:
|
|
300
|
+
enabled: true
|
|
301
|
+
minReplicas: 3
|
|
302
|
+
maxReplicas: 10
|
|
303
|
+
targetCPUUtilizationPercentage: 70
|
|
304
|
+
targetMemoryUtilizationPercentage: 80
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
## Troubleshooting Decision Tree
|
|
308
|
+
|
|
309
|
+
### Pod is Pending
|
|
310
|
+
|
|
311
|
+
1. `kubectl describe pod <pod> -n <ns>` -- check the Events section at the bottom.
|
|
312
|
+
2. Common causes:
|
|
313
|
+
- Insufficient resources: check `kubectl describe node <node>` for allocatable
|
|
314
|
+
capacity and existing requests.
|
|
315
|
+
- PersistentVolumeClaim not bound: `kubectl get pvc -n <ns>`.
|
|
316
|
+
- NodeSelector or affinity rules too restrictive: verify matching node labels.
|
|
317
|
+
- Taints preventing scheduling: `kubectl describe node <node> | grep Taints`.
|
|
318
|
+
|
|
319
|
+
### Pod is CrashLoopBackOff
|
|
320
|
+
|
|
321
|
+
1. `kubectl logs <pod> -n <ns> --previous` -- logs from the last failed container.
|
|
322
|
+
2. `kubectl describe pod <pod> -n <ns>` -- check last state and exit code.
|
|
323
|
+
3. Common causes:
|
|
324
|
+
- Exit code 1: Application error -- check config, env vars, missing secrets.
|
|
325
|
+
- Exit code 137: OOMKilled -- increase memory limit or investigate memory usage.
|
|
326
|
+
- Exit code 139: Segfault -- architecture mismatch (e.g., ARM image on AMD64 node).
|
|
327
|
+
- Application crashes on startup: probe too aggressive, missing init dependencies.
|
|
328
|
+
|
|
329
|
+
### Pod is ImagePullBackOff
|
|
330
|
+
|
|
331
|
+
1. `kubectl describe pod <pod> -n <ns>` -- check the exact error message.
|
|
332
|
+
2. Common causes:
|
|
333
|
+
- Image does not exist: verify tag name and registry path.
|
|
334
|
+
- Registry requires authentication: create a `docker-registry` secret and
|
|
335
|
+
reference it as `imagePullSecrets`.
|
|
336
|
+
- Private registry unreachable from cluster: check network policies and firewall.
|
|
337
|
+
|
|
338
|
+
### Service Discovery Issues
|
|
339
|
+
|
|
340
|
+
1. Verify the Service selector matches pod labels:
|
|
341
|
+
`kubectl get pods -n <ns> --show-labels`
|
|
342
|
+
2. Test DNS resolution from inside the cluster:
|
|
343
|
+
`kubectl run tmp --rm -it --image=busybox --restart=Never -- nslookup myapp.production.svc.cluster.local`
|
|
344
|
+
3. Check endpoints: `kubectl get endpoints myapp -n <ns>`
|
|
345
|
+
4. If endpoints are empty, the selector is wrong or pods are not ready.
|
|
346
|
+
|
|
347
|
+
### High Restart Count
|
|
348
|
+
|
|
349
|
+
```bash
|
|
350
|
+
# Check restart counts
|
|
351
|
+
kubectl get pods -n <ns> -o wide
|
|
352
|
+
|
|
353
|
+
# Get detailed pod metrics
|
|
354
|
+
kubectl top pods -n <ns>
|
|
355
|
+
|
|
356
|
+
# Check events sorted by time
|
|
357
|
+
kubectl get events -n <ns> --sort-by='.lastTimestamp'
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
## Useful Diagnostic Commands
|
|
361
|
+
|
|
362
|
+
```bash
|
|
363
|
+
# Exec into a running pod
|
|
364
|
+
kubectl exec -it <pod> -n <ns> -- /bin/sh
|
|
365
|
+
|
|
366
|
+
# Port-forward for local debugging
|
|
367
|
+
kubectl port-forward svc/myapp 8080:80 -n <ns>
|
|
368
|
+
|
|
369
|
+
# Copy files from/to a pod
|
|
370
|
+
kubectl cp <pod>:/app/logs/app.log ./app.log -n <ns>
|
|
371
|
+
|
|
372
|
+
# Watch resources in real-time
|
|
373
|
+
kubectl get pods -n <ns> -w
|
|
374
|
+
|
|
375
|
+
# Get resource usage
|
|
376
|
+
kubectl top nodes
|
|
377
|
+
kubectl top pods -n <ns>
|
|
378
|
+
|
|
379
|
+
# Extract a secret (base64 decoded)
|
|
380
|
+
kubectl get secret myapp-secrets -n <ns> -o jsonpath='{.data.database-url}' | base64 -d
|
|
381
|
+
|
|
382
|
+
# Check cluster-level events
|
|
383
|
+
kubectl get events -A --sort-by='.lastTimestamp' --field-selector type=Warning
|
|
384
|
+
```
|
|
385
|
+
|
|
386
|
+
## Node Maintenance
|
|
387
|
+
|
|
388
|
+
```bash
|
|
389
|
+
# Cordon a node (prevent new pods)
|
|
390
|
+
kubectl cordon <node>
|
|
391
|
+
|
|
392
|
+
# Drain a node (evict pods gracefully)
|
|
393
|
+
kubectl drain <node> --ignore-daemonsets --delete-emptydir-data
|
|
394
|
+
|
|
395
|
+
# Uncordon after maintenance
|
|
396
|
+
kubectl uncordon <node>
|
|
397
|
+
```
|