kubectl-mcp-server 1.15.0__py3-none-any.whl → 1.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {kubectl_mcp_server-1.15.0.dist-info → kubectl_mcp_server-1.17.0.dist-info}/METADATA +34 -13
  2. kubectl_mcp_server-1.17.0.dist-info/RECORD +75 -0
  3. kubectl_mcp_tool/__init__.py +1 -1
  4. kubectl_mcp_tool/cli/cli.py +83 -9
  5. kubectl_mcp_tool/cli/output.py +14 -0
  6. kubectl_mcp_tool/config/__init__.py +46 -0
  7. kubectl_mcp_tool/config/loader.py +386 -0
  8. kubectl_mcp_tool/config/schema.py +184 -0
  9. kubectl_mcp_tool/crd_detector.py +247 -0
  10. kubectl_mcp_tool/k8s_config.py +19 -0
  11. kubectl_mcp_tool/mcp_server.py +246 -8
  12. kubectl_mcp_tool/observability/__init__.py +59 -0
  13. kubectl_mcp_tool/observability/metrics.py +223 -0
  14. kubectl_mcp_tool/observability/stats.py +255 -0
  15. kubectl_mcp_tool/observability/tracing.py +335 -0
  16. kubectl_mcp_tool/prompts/__init__.py +43 -0
  17. kubectl_mcp_tool/prompts/builtin.py +695 -0
  18. kubectl_mcp_tool/prompts/custom.py +298 -0
  19. kubectl_mcp_tool/prompts/prompts.py +180 -4
  20. kubectl_mcp_tool/safety.py +155 -0
  21. kubectl_mcp_tool/tools/__init__.py +20 -0
  22. kubectl_mcp_tool/tools/backup.py +881 -0
  23. kubectl_mcp_tool/tools/capi.py +727 -0
  24. kubectl_mcp_tool/tools/certs.py +709 -0
  25. kubectl_mcp_tool/tools/cilium.py +582 -0
  26. kubectl_mcp_tool/tools/cluster.py +384 -0
  27. kubectl_mcp_tool/tools/gitops.py +552 -0
  28. kubectl_mcp_tool/tools/keda.py +464 -0
  29. kubectl_mcp_tool/tools/kiali.py +652 -0
  30. kubectl_mcp_tool/tools/kubevirt.py +803 -0
  31. kubectl_mcp_tool/tools/policy.py +554 -0
  32. kubectl_mcp_tool/tools/rollouts.py +790 -0
  33. tests/test_browser.py +2 -2
  34. tests/test_config.py +386 -0
  35. tests/test_ecosystem.py +331 -0
  36. tests/test_mcp_integration.py +251 -0
  37. tests/test_observability.py +521 -0
  38. tests/test_prompts.py +716 -0
  39. tests/test_safety.py +218 -0
  40. tests/test_tools.py +70 -8
  41. kubectl_mcp_server-1.15.0.dist-info/RECORD +0 -49
  42. {kubectl_mcp_server-1.15.0.dist-info → kubectl_mcp_server-1.17.0.dist-info}/WHEEL +0 -0
  43. {kubectl_mcp_server-1.15.0.dist-info → kubectl_mcp_server-1.17.0.dist-info}/entry_points.txt +0 -0
  44. {kubectl_mcp_server-1.15.0.dist-info → kubectl_mcp_server-1.17.0.dist-info}/licenses/LICENSE +0 -0
  45. {kubectl_mcp_server-1.15.0.dist-info → kubectl_mcp_server-1.17.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,695 @@
1
+ """
2
+ Built-in prompts for kubectl-mcp-server.
3
+
4
+ These prompts provide comprehensive workflows for common Kubernetes tasks.
5
+ Users can override any of these by defining a prompt with the same name
6
+ in their custom configuration.
7
+ """
8
+
9
+ from .custom import CustomPrompt, PromptArgument, PromptMessage
10
+
11
+
12
+ # Cluster Health Check Prompt
13
+ CLUSTER_HEALTH_CHECK = CustomPrompt(
14
+ name="cluster-health-check",
15
+ title="Cluster Health Check",
16
+ description="Comprehensive health assessment of your Kubernetes cluster",
17
+ arguments=[
18
+ PromptArgument(
19
+ name="namespace",
20
+ description="Limit to specific namespace (optional)",
21
+ required=False
22
+ ),
23
+ PromptArgument(
24
+ name="check_events",
25
+ description="Include recent events in the check (true/false)",
26
+ required=False,
27
+ default="true"
28
+ ),
29
+ PromptArgument(
30
+ name="check_metrics",
31
+ description="Include resource metrics (true/false)",
32
+ required=False,
33
+ default="true"
34
+ ),
35
+ ],
36
+ messages=[
37
+ PromptMessage(
38
+ role="user",
39
+ content="""Perform a comprehensive health check of the Kubernetes cluster{{#namespace}} in namespace {{namespace}}{{/namespace}}.
40
+
41
+ ## Check Categories
42
+
43
+ ### 1. Node Health
44
+ - Check node status and conditions (Ready, MemoryPressure, DiskPressure, PIDPressure)
45
+ - Verify all nodes are reporting Ready
46
+ - Check node resource capacity and allocatable resources
47
+ {{#check_metrics}}- Review node CPU and memory utilization{{/check_metrics}}
48
+
49
+ ### 2. Pod Health{{#namespace}} in {{namespace}}{{/namespace}}
50
+ - Identify pods in problematic states:
51
+ - CrashLoopBackOff: Application crash on start
52
+ - ImagePullBackOff: Image not found or registry auth issues
53
+ - Pending: Scheduling issues or resource constraints
54
+ - OOMKilled: Memory limit exceeded
55
+ - Error: Container exited with error
56
+ - Check restart counts for running pods
57
+ {{#check_metrics}}- Review pod resource usage vs limits{{/check_metrics}}
58
+
59
+ ### 3. Workload Status{{#namespace}} in {{namespace}}{{/namespace}}
60
+ - Deployment replica status (desired vs ready)
61
+ - StatefulSet replica status
62
+ - DaemonSet desired vs scheduled vs ready
63
+ - Job completion status
64
+ - CronJob schedule health
65
+
66
+ ### 4. Storage Health
67
+ - PVC binding status (Bound, Pending, Lost)
68
+ - PV availability and reclaim status
69
+ - StorageClass availability
70
+
71
+ ### 5. Networking Health
72
+ - Service endpoint health (services with no endpoints)
73
+ - Ingress configuration validity
74
+ - NetworkPolicy presence and coverage
75
+
76
+ {{#check_events}}### 6. Recent Events{{#namespace}} in {{namespace}}{{/namespace}}
77
+ - Warning events in the last hour
78
+ - Error events indicating failures
79
+ - Event patterns suggesting issues{{/check_events}}
80
+
81
+ ## Output Required
82
+
83
+ Provide:
84
+ 1. **Overall Health Status**: Healthy, Warning, or Critical
85
+ 2. **Summary Statistics**:
86
+ - Total nodes and healthy count
87
+ - Total pods and healthy count
88
+ - Workloads with issues
89
+ 3. **Critical Issues** (require immediate attention)
90
+ 4. **Warnings** (should be addressed soon)
91
+ 5. **Recommendations** for improvements
92
+
93
+ Start the health check now using the appropriate kubectl tools."""
94
+ ),
95
+ ]
96
+ )
97
+
98
+
99
+ # Debug Workload Prompt
100
+ DEBUG_WORKLOAD = CustomPrompt(
101
+ name="debug-workload",
102
+ title="Debug Workload Issues",
103
+ description="Diagnose and troubleshoot Kubernetes workload problems",
104
+ arguments=[
105
+ PromptArgument(
106
+ name="workload_name",
107
+ description="Name of the workload to debug",
108
+ required=True
109
+ ),
110
+ PromptArgument(
111
+ name="namespace",
112
+ description="Namespace of the workload",
113
+ required=False,
114
+ default="default"
115
+ ),
116
+ PromptArgument(
117
+ name="workload_type",
118
+ description="Type of workload (deployment, statefulset, daemonset, pod)",
119
+ required=False,
120
+ default="deployment"
121
+ ),
122
+ PromptArgument(
123
+ name="include_related",
124
+ description="Check related resources (services, configmaps, secrets)",
125
+ required=False,
126
+ default="true"
127
+ ),
128
+ ],
129
+ messages=[
130
+ PromptMessage(
131
+ role="user",
132
+ content="""Debug the {{workload_type}} '{{workload_name}}' in namespace '{{namespace}}'.
133
+
134
+ ## Debugging Steps
135
+
136
+ ### Step 1: Identify the Workload
137
+ - Get the {{workload_type}} details
138
+ - List all pods belonging to this workload
139
+ - Note the current status and any error conditions
140
+
141
+ ### Step 2: Pod Status Analysis
142
+ For each pod:
143
+ 1. Check phase (Running, Pending, Failed, Unknown)
144
+ 2. Check container readiness
145
+ 3. Count restarts
146
+ 4. Identify the problematic state if any
147
+
148
+ ### Step 3: Event Investigation
149
+ - Get events for the {{workload_type}}
150
+ - Get events for each pod
151
+ - Look for:
152
+ - FailedScheduling
153
+ - FailedCreate
154
+ - FailedMount
155
+ - Unhealthy (probe failures)
156
+ - BackOff (crash loops)
157
+
158
+ ### Step 4: Log Analysis
159
+ For pods not in Running/Ready state:
160
+ - Get current logs (last 100 lines)
161
+ - If crashed, get previous container logs
162
+ - Look for:
163
+ - Stack traces / exceptions
164
+ - Connection errors
165
+ - Configuration errors
166
+ - Permission issues
167
+
168
+ ### Step 5: Resource Analysis
169
+ - Check resource requests vs limits
170
+ - Verify if pods are OOMKilled
171
+ - Check if resources are available on nodes
172
+
173
+ {{#include_related}}### Step 6: Related Resources
174
+ Check dependencies:
175
+ - Services pointing to this workload
176
+ - ConfigMaps mounted by pods
177
+ - Secrets referenced by pods
178
+ - PVCs used by pods
179
+ - ServiceAccount permissions{{/include_related}}
180
+
181
+ ## Output Required
182
+
183
+ For each issue found, provide:
184
+ 1. **Issue Description**: What is wrong
185
+ 2. **Evidence**: Specific log lines, events, or status
186
+ 3. **Root Cause**: Likely reason for the issue
187
+ 4. **Resolution**: Steps to fix the problem
188
+ 5. **Verification**: How to confirm the fix worked
189
+
190
+ Start debugging now."""
191
+ ),
192
+ ]
193
+ )
194
+
195
+
196
+ # Resource Usage Analysis Prompt
197
+ RESOURCE_USAGE = CustomPrompt(
198
+ name="resource-usage",
199
+ title="Resource Usage Analysis",
200
+ description="Analyze resource consumption and identify optimization opportunities",
201
+ arguments=[
202
+ PromptArgument(
203
+ name="namespace",
204
+ description="Namespace to analyze (optional, defaults to all)",
205
+ required=False
206
+ ),
207
+ PromptArgument(
208
+ name="threshold_cpu",
209
+ description="CPU utilization threshold percentage for alerts",
210
+ required=False,
211
+ default="80"
212
+ ),
213
+ PromptArgument(
214
+ name="threshold_memory",
215
+ description="Memory utilization threshold percentage for alerts",
216
+ required=False,
217
+ default="80"
218
+ ),
219
+ PromptArgument(
220
+ name="include_recommendations",
221
+ description="Include right-sizing recommendations",
222
+ required=False,
223
+ default="true"
224
+ ),
225
+ ],
226
+ messages=[
227
+ PromptMessage(
228
+ role="user",
229
+ content="""Analyze resource usage{{#namespace}} in namespace {{namespace}}{{/namespace}} and identify optimization opportunities.
230
+
231
+ ## Analysis Areas
232
+
233
+ ### 1. Node Resource Utilization
234
+ - Current CPU and memory usage per node
235
+ - Identify nodes exceeding {{threshold_cpu}}% CPU
236
+ - Identify nodes exceeding {{threshold_memory}}% memory
237
+ - Node capacity vs allocatable resources
238
+
239
+ ### 2. Pod Resource Consumption{{#namespace}} in {{namespace}}{{/namespace}}
240
+ - Top CPU consumers
241
+ - Top memory consumers
242
+ - Pods near resource limits
243
+ - Pods with no resource limits (risk)
244
+
245
+ ### 3. Request vs Usage Analysis
246
+ Compare for each workload:
247
+ - CPU requested vs actual usage
248
+ - Memory requested vs actual usage
249
+ - Identify over-provisioned resources (waste)
250
+ - Identify under-provisioned resources (risk)
251
+
252
+ ### 4. Resource Efficiency
253
+ Calculate:
254
+ - Overall cluster utilization
255
+ - Namespace-level utilization{{#namespace}} for {{namespace}}{{/namespace}}
256
+ - Cost of unused/reserved resources
257
+
258
+ {{#include_recommendations}}### 5. Right-sizing Recommendations
259
+ For each over/under-provisioned workload:
260
+ - Current requests/limits
261
+ - Recommended requests/limits
262
+ - Estimated savings/risk reduction
263
+ - Priority (high/medium/low){{/include_recommendations}}
264
+
265
+ ### 6. Autoscaling Candidates
266
+ Identify workloads that would benefit from:
267
+ - Horizontal Pod Autoscaler (HPA)
268
+ - Vertical Pod Autoscaler (VPA)
269
+ - Cluster Autoscaler configuration
270
+
271
+ ## Output Required
272
+
273
+ Provide:
274
+ 1. **Summary Dashboard**:
275
+ - Cluster utilization overview
276
+ - Resource efficiency score
277
+ - Potential savings estimate
278
+
279
+ 2. **Alerts** (immediate attention):
280
+ - Workloads exceeding thresholds
281
+ - Workloads without limits
282
+ - Nodes at capacity
283
+
284
+ 3. **Optimization Actions** (prioritized list):
285
+ - Quick wins (immediate impact)
286
+ - Medium-term improvements
287
+ - Long-term architecture changes
288
+
289
+ Start the resource analysis now."""
290
+ ),
291
+ ]
292
+ )
293
+
294
+
295
+ # Security Posture Review Prompt
296
+ SECURITY_POSTURE = CustomPrompt(
297
+ name="security-posture",
298
+ title="Security Posture Review",
299
+ description="Comprehensive security assessment of your Kubernetes cluster",
300
+ arguments=[
301
+ PromptArgument(
302
+ name="namespace",
303
+ description="Namespace to analyze (optional, defaults to all)",
304
+ required=False
305
+ ),
306
+ PromptArgument(
307
+ name="check_rbac",
308
+ description="Include RBAC analysis",
309
+ required=False,
310
+ default="true"
311
+ ),
312
+ PromptArgument(
313
+ name="check_network",
314
+ description="Include network policy analysis",
315
+ required=False,
316
+ default="true"
317
+ ),
318
+ PromptArgument(
319
+ name="check_secrets",
320
+ description="Include secrets management analysis",
321
+ required=False,
322
+ default="true"
323
+ ),
324
+ ],
325
+ messages=[
326
+ PromptMessage(
327
+ role="user",
328
+ content="""Perform a security posture review{{#namespace}} for namespace {{namespace}}{{/namespace}}.
329
+
330
+ ## Security Assessment Areas
331
+
332
+ {{#check_rbac}}### 1. RBAC Analysis
333
+ - Review ClusterRoles with elevated privileges
334
+ - Identify ClusterRoleBindings to cluster-admin
335
+ - Check for overly permissive roles (wildcards)
336
+ - ServiceAccount analysis:
337
+ - Default SA usage
338
+ - Token automounting
339
+ - Unnecessary privileges{{/check_rbac}}
340
+
341
+ ### 2. Pod Security Standards{{#namespace}} in {{namespace}}{{/namespace}}
342
+ Check for security anti-patterns:
343
+ - [ ] Privileged containers
344
+ - [ ] Running as root
345
+ - [ ] Host namespace usage (hostNetwork, hostPID, hostIPC)
346
+ - [ ] Host path mounts
347
+ - [ ] Privilege escalation allowed
348
+ - [ ] Missing security contexts
349
+ - [ ] Writable root filesystem
350
+
351
+ ### 3. Image Security
352
+ - Images using 'latest' tag
353
+ - Images from untrusted registries
354
+ - Missing imagePullPolicy
355
+ - Missing imagePullSecrets
356
+
357
+ {{#check_network}}### 4. Network Security
358
+ - Namespaces without NetworkPolicies
359
+ - Missing default deny policies
360
+ - Overly permissive ingress rules
361
+ - Unprotected egress traffic{{/check_network}}
362
+
363
+ {{#check_secrets}}### 5. Secrets Management
364
+ - Secrets mounted as environment variables (less secure)
365
+ - Secrets in pod specs (should use references)
366
+ - Unused secrets
367
+ - Secrets without encryption at rest{{/check_secrets}}
368
+
369
+ ### 6. Supply Chain Security
370
+ - Image vulnerability scanning status
371
+ - Admission controller configuration
372
+ - Pod Security Standards enforcement
373
+
374
+ ## Risk Categories
375
+
376
+ For each finding, classify as:
377
+ - **Critical**: Immediate exploitation risk
378
+ - **High**: Significant security gap
379
+ - **Medium**: Best practice violation
380
+ - **Low**: Hardening opportunity
381
+
382
+ ## Output Required
383
+
384
+ Provide:
385
+ 1. **Security Score**: Overall assessment (A-F grade)
386
+
387
+ 2. **Critical Findings** (fix immediately):
388
+ - Issue description
389
+ - Affected resources
390
+ - Remediation steps
391
+
392
+ 3. **High Priority Issues**:
393
+ - Issue description
394
+ - Risk explanation
395
+ - Fix recommendation
396
+
397
+ 4. **Compliance Checklist**:
398
+ - Pod Security Standards
399
+ - CIS Kubernetes Benchmark highlights
400
+ - Network segmentation
401
+
402
+ 5. **Remediation Plan** (prioritized):
403
+ - Quick wins
404
+ - Medium-term hardening
405
+ - Long-term architecture
406
+
407
+ Start the security review now."""
408
+ ),
409
+ ]
410
+ )
411
+
412
+
413
+ # Deployment Checklist Prompt
414
+ DEPLOYMENT_CHECKLIST = CustomPrompt(
415
+ name="deployment-checklist",
416
+ title="Production Deployment Checklist",
417
+ description="Pre-deployment verification checklist for production workloads",
418
+ arguments=[
419
+ PromptArgument(
420
+ name="app_name",
421
+ description="Application name being deployed",
422
+ required=True
423
+ ),
424
+ PromptArgument(
425
+ name="namespace",
426
+ description="Target namespace for deployment",
427
+ required=True
428
+ ),
429
+ PromptArgument(
430
+ name="image",
431
+ description="Container image being deployed",
432
+ required=True
433
+ ),
434
+ PromptArgument(
435
+ name="replicas",
436
+ description="Number of replicas",
437
+ required=False,
438
+ default="2"
439
+ ),
440
+ ],
441
+ messages=[
442
+ PromptMessage(
443
+ role="user",
444
+ content="""Verify deployment readiness for '{{app_name}}' ({{image}}) in namespace '{{namespace}}' with {{replicas}} replicas.
445
+
446
+ ## Pre-Deployment Checklist
447
+
448
+ ### 1. Namespace Preparation
449
+ - [ ] Namespace '{{namespace}}' exists
450
+ - [ ] Resource quotas are set
451
+ - [ ] LimitRanges are configured
452
+ - [ ] Network policies exist
453
+
454
+ ### 2. Image Verification
455
+ - [ ] Image '{{image}}' uses specific tag (not :latest)
456
+ - [ ] Image exists and is pullable
457
+ - [ ] Image has been scanned for vulnerabilities
458
+ - [ ] imagePullSecrets are configured if needed
459
+
460
+ ### 3. Resource Configuration
461
+ - [ ] CPU requests and limits are set
462
+ - [ ] Memory requests and limits are set
463
+ - [ ] Resources are appropriate for workload
464
+ - [ ] Pod Disruption Budget is configured for {{replicas}} replicas
465
+
466
+ ### 4. Health Checks
467
+ - [ ] Liveness probe is configured
468
+ - [ ] Readiness probe is configured
469
+ - [ ] Startup probe (if slow starting)
470
+ - [ ] Probe endpoints are implemented
471
+
472
+ ### 5. Security Configuration
473
+ - [ ] runAsNonRoot: true
474
+ - [ ] readOnlyRootFilesystem: true
475
+ - [ ] allowPrivilegeEscalation: false
476
+ - [ ] Capabilities dropped
477
+ - [ ] SecurityContext is set
478
+
479
+ ### 6. Configuration Management
480
+ - [ ] ConfigMaps are created
481
+ - [ ] Secrets are created
482
+ - [ ] Environment variables are set
483
+ - [ ] Volume mounts are configured
484
+
485
+ ### 7. Service Configuration
486
+ - [ ] Service is created
487
+ - [ ] Ports are correctly mapped
488
+ - [ ] Service selectors match pod labels
489
+ - [ ] Service type is appropriate
490
+
491
+ ### 8. Scaling Configuration
492
+ - [ ] HPA is configured (if needed)
493
+ - [ ] Scaling metrics are appropriate
494
+ - [ ] Min/max replicas are set correctly
495
+
496
+ ### 9. Observability
497
+ - [ ] Logging is configured
498
+ - [ ] Metrics endpoints exposed
499
+ - [ ] Tracing enabled (if applicable)
500
+ - [ ] Alerts are configured
501
+
502
+ ### 10. Rollout Strategy
503
+ - [ ] RollingUpdate strategy configured
504
+ - [ ] maxSurge and maxUnavailable set
505
+ - [ ] Rollback plan documented
506
+
507
+ ## Verification Commands
508
+
509
+ Run these checks before deployment:
510
+ 1. Verify namespace: `get_namespaces()`
511
+ 2. Check existing resources: `get_deployments(namespace="{{namespace}}")`
512
+ 3. Verify configmaps: `get_configmaps(namespace="{{namespace}}")`
513
+ 4. Check services: `get_services(namespace="{{namespace}}")`
514
+
515
+ ## Post-Deployment Verification
516
+
517
+ After deploying:
518
+ 1. Watch rollout: `kubectl_rollout_status("deployment", "{{app_name}}", "{{namespace}}")`
519
+ 2. Check pods: `get_pods(namespace="{{namespace}}")`
520
+ 3. Get logs: `get_logs(pod_name, "{{namespace}}")`
521
+ 4. Verify endpoints: Check service has endpoints
522
+
523
+ Start the deployment checklist verification now."""
524
+ ),
525
+ ]
526
+ )
527
+
528
+
529
+ # Incident Response Prompt
530
+ INCIDENT_RESPONSE = CustomPrompt(
531
+ name="incident-response",
532
+ title="Incident Response Guide",
533
+ description="Structured incident response workflow for Kubernetes issues",
534
+ arguments=[
535
+ PromptArgument(
536
+ name="incident_type",
537
+ description="Type of incident (pod-crash, high-latency, oom, network, storage)",
538
+ required=True
539
+ ),
540
+ PromptArgument(
541
+ name="affected_service",
542
+ description="Name of affected service/workload",
543
+ required=True
544
+ ),
545
+ PromptArgument(
546
+ name="namespace",
547
+ description="Namespace of affected resources",
548
+ required=True
549
+ ),
550
+ PromptArgument(
551
+ name="severity",
552
+ description="Incident severity (critical, high, medium, low)",
553
+ required=False,
554
+ default="high"
555
+ ),
556
+ ],
557
+ messages=[
558
+ PromptMessage(
559
+ role="user",
560
+ content="""## INCIDENT RESPONSE: {{incident_type}}
561
+
562
+ **Affected**: {{affected_service}} in namespace {{namespace}}
563
+ **Severity**: {{severity}}
564
+
565
+ ## Phase 1: Triage (First 5 minutes)
566
+
567
+ ### Immediate Assessment
568
+ 1. Verify the issue:
569
+ - `get_pods(namespace="{{namespace}}")` - Check pod status
570
+ - `kubectl_describe("deployment", "{{affected_service}}", "{{namespace}}")` - Get details
571
+
572
+ 2. Determine blast radius:
573
+ - Is this isolated to {{affected_service}}?
574
+ - Are other services in {{namespace}} affected?
575
+ - Are other namespaces affected?
576
+
577
+ 3. User impact assessment:
578
+ - Is the service completely down?
579
+ - Is it degraded?
580
+ - Are errors being returned?
581
+
582
+ ## Phase 2: Investigation (Next 15 minutes)
583
+
584
+ ### For {{incident_type}} incident:
585
+
586
+ {{#incident_type}}
587
+ **Pod Crash Investigation:**
588
+ - `get_pod_events("{{affected_service}}", "{{namespace}}")` - Recent events
589
+ - `get_logs(pod_name, "{{namespace}}", tail=200)` - Current logs
590
+ - `get_logs(pod_name, "{{namespace}}", previous=true)` - Previous container logs
591
+ - Check for OOMKilled, CrashLoopBackOff patterns
592
+
593
+ **High Latency Investigation:**
594
+ - Check pod resource usage
595
+ - Review HPA scaling events
596
+ - Check dependent services
597
+ - Review ingress/load balancer metrics
598
+
599
+ **OOM Investigation:**
600
+ - `kubectl_describe("pod", pod_name, "{{namespace}}")` - Check memory limits
601
+ - Review container memory usage patterns
602
+ - Check for memory leaks in logs
603
+ - Compare to historical usage
604
+
605
+ **Network Investigation:**
606
+ - Check service endpoints exist
607
+ - Verify NetworkPolicies aren't blocking
608
+ - Test DNS resolution
609
+ - Check for connection errors in logs
610
+
611
+ **Storage Investigation:**
612
+ - `kubectl_get("pvc", namespace="{{namespace}}")` - PVC status
613
+ - Check if PV is bound
614
+ - Verify storage class availability
615
+ - Check node storage capacity
616
+ {{/incident_type}}
617
+
618
+ ## Phase 3: Mitigation
619
+
620
+ ### Quick Mitigation Options:
621
+ 1. **Scale up**: Increase replicas if resource-related
622
+ 2. **Rollback**: Revert to previous working version
623
+ 3. **Restart**: Delete problematic pods
624
+ 4. **Cordon**: Remove problematic node from scheduling
625
+
626
+ ### Execute mitigation:
627
+ ```
628
+ # Option 1: Scale up
629
+ kubectl_scale("deployment", "{{affected_service}}", replicas+2, "{{namespace}}")
630
+
631
+ # Option 2: Rollback
632
+ kubectl_rollout("undo", "deployment", "{{affected_service}}", "{{namespace}}")
633
+
634
+ # Option 3: Restart
635
+ kubectl_delete_pod(pod_name, "{{namespace}}", force=false)
636
+ ```
637
+
638
+ ## Phase 4: Verification
639
+
640
+ 1. Confirm mitigation worked:
641
+ - `get_pods(namespace="{{namespace}}")` - All pods healthy
642
+ - Service is responding normally
643
+ - Error rates decreased
644
+
645
+ 2. Monitor for recurrence:
646
+ - Watch pod events
647
+ - Monitor resource usage
648
+ - Track error rates
649
+
650
+ ## Phase 5: Post-Incident
651
+
652
+ After incident is resolved:
653
+ 1. Document timeline
654
+ 2. Capture root cause
655
+ 3. Identify preventive measures
656
+ 4. Update runbooks
657
+
658
+ ## Incident Log Template
659
+
660
+ | Time | Action | Result |
661
+ |------|--------|--------|
662
+ | | Issue reported | |
663
+ | | Investigation started | |
664
+ | | Root cause identified | |
665
+ | | Mitigation applied | |
666
+ | | Service restored | |
667
+
668
+ Start incident response now."""
669
+ ),
670
+ ]
671
+ )
672
+
673
+
674
+ # All built-in prompts
675
+ BUILTIN_PROMPTS = [
676
+ CLUSTER_HEALTH_CHECK,
677
+ DEBUG_WORKLOAD,
678
+ RESOURCE_USAGE,
679
+ SECURITY_POSTURE,
680
+ DEPLOYMENT_CHECKLIST,
681
+ INCIDENT_RESPONSE,
682
+ ]
683
+
684
+
685
+ def get_builtin_prompts() -> list:
686
+ """Return all built-in prompts."""
687
+ return BUILTIN_PROMPTS.copy()
688
+
689
+
690
+ def get_builtin_prompt_by_name(name: str) -> CustomPrompt | None:
691
+ """Get a built-in prompt by name."""
692
+ for prompt in BUILTIN_PROMPTS:
693
+ if prompt.name == name:
694
+ return prompt
695
+ return None