@techwavedev/agi-agent-kit 1.1.7 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of @techwavedev/agi-agent-kit might be problematic. Click here for more details.

Files changed (111) hide show
  1. package/CHANGELOG.md +82 -1
  2. package/README.md +190 -12
  3. package/bin/init.js +30 -2
  4. package/package.json +6 -3
  5. package/templates/base/AGENTS.md +54 -23
  6. package/templates/base/README.md +325 -0
  7. package/templates/base/directives/memory_integration.md +95 -0
  8. package/templates/base/execution/memory_manager.py +309 -0
  9. package/templates/base/execution/session_boot.py +218 -0
  10. package/templates/base/execution/session_init.py +320 -0
  11. package/templates/base/skill-creator/SKILL_skillcreator.md +23 -36
  12. package/templates/base/skill-creator/scripts/init_skill.py +18 -135
  13. package/templates/skills/ec/README.md +31 -0
  14. package/templates/skills/ec/aws/SKILL.md +1020 -0
  15. package/templates/skills/ec/aws/defaults.yaml +13 -0
  16. package/templates/skills/ec/aws/references/common_patterns.md +80 -0
  17. package/templates/skills/ec/aws/references/mcp_servers.md +98 -0
  18. package/templates/skills/ec/aws-terraform/SKILL.md +349 -0
  19. package/templates/skills/ec/aws-terraform/references/best_practices.md +394 -0
  20. package/templates/skills/ec/aws-terraform/references/checkov_reference.md +337 -0
  21. package/templates/skills/ec/aws-terraform/scripts/configure_mcp.py +150 -0
  22. package/templates/skills/ec/confluent-kafka/SKILL.md +655 -0
  23. package/templates/skills/ec/confluent-kafka/references/ansible_playbooks.md +792 -0
  24. package/templates/skills/ec/confluent-kafka/references/ec_deployment.md +579 -0
  25. package/templates/skills/ec/confluent-kafka/references/kraft_migration.md +490 -0
  26. package/templates/skills/ec/confluent-kafka/references/troubleshooting.md +778 -0
  27. package/templates/skills/ec/confluent-kafka/references/upgrade_7x_to_8x.md +488 -0
  28. package/templates/skills/ec/confluent-kafka/scripts/kafka_health_check.py +435 -0
  29. package/templates/skills/ec/confluent-kafka/scripts/upgrade_preflight.py +568 -0
  30. package/templates/skills/ec/confluent-kafka/scripts/validate_config.py +455 -0
  31. package/templates/skills/ec/consul/SKILL.md +427 -0
  32. package/templates/skills/ec/consul/references/acl_setup.md +168 -0
  33. package/templates/skills/ec/consul/references/ha_config.md +196 -0
  34. package/templates/skills/ec/consul/references/troubleshooting.md +267 -0
  35. package/templates/skills/ec/consul/references/upgrades.md +213 -0
  36. package/templates/skills/ec/consul/scripts/consul_health_report.py +530 -0
  37. package/templates/skills/ec/consul/scripts/consul_status.py +264 -0
  38. package/templates/skills/ec/consul/scripts/generate_values.py +170 -0
  39. package/templates/skills/ec/documentation/SKILL.md +351 -0
  40. package/templates/skills/ec/documentation/references/best_practices.md +201 -0
  41. package/templates/skills/ec/documentation/scripts/analyze_code.py +307 -0
  42. package/templates/skills/ec/documentation/scripts/detect_changes.py +460 -0
  43. package/templates/skills/ec/documentation/scripts/generate_changelog.py +312 -0
  44. package/templates/skills/ec/documentation/scripts/sync_docs.py +272 -0
  45. package/templates/skills/ec/documentation/scripts/update_skill_docs.py +366 -0
  46. package/templates/skills/ec/gitlab/SKILL.md +529 -0
  47. package/templates/skills/ec/gitlab/references/agent_installation.md +416 -0
  48. package/templates/skills/ec/gitlab/references/api_reference.md +508 -0
  49. package/templates/skills/ec/gitlab/references/gitops_flux.md +465 -0
  50. package/templates/skills/ec/gitlab/references/troubleshooting.md +518 -0
  51. package/templates/skills/ec/gitlab/scripts/generate_agent_values.py +329 -0
  52. package/templates/skills/ec/gitlab/scripts/gitlab_agent_status.py +414 -0
  53. package/templates/skills/ec/jira/SKILL.md +484 -0
  54. package/templates/skills/ec/jira/references/jql_reference.md +148 -0
  55. package/templates/skills/ec/jira/scripts/add_comment.py +91 -0
  56. package/templates/skills/ec/jira/scripts/bulk_log_work.py +124 -0
  57. package/templates/skills/ec/jira/scripts/create_ticket.py +162 -0
  58. package/templates/skills/ec/jira/scripts/get_ticket.py +191 -0
  59. package/templates/skills/ec/jira/scripts/jira_client.py +383 -0
  60. package/templates/skills/ec/jira/scripts/log_work.py +154 -0
  61. package/templates/skills/ec/jira/scripts/search_tickets.py +104 -0
  62. package/templates/skills/ec/jira/scripts/update_comment.py +67 -0
  63. package/templates/skills/ec/jira/scripts/update_ticket.py +161 -0
  64. package/templates/skills/ec/karpenter/SKILL.md +301 -0
  65. package/templates/skills/ec/karpenter/references/ec2nodeclasses.md +421 -0
  66. package/templates/skills/ec/karpenter/references/migration.md +396 -0
  67. package/templates/skills/ec/karpenter/references/nodepools.md +400 -0
  68. package/templates/skills/ec/karpenter/references/troubleshooting.md +359 -0
  69. package/templates/skills/ec/karpenter/scripts/generate_ec2nodeclass.py +187 -0
  70. package/templates/skills/ec/karpenter/scripts/generate_nodepool.py +245 -0
  71. package/templates/skills/ec/karpenter/scripts/karpenter_status.py +359 -0
  72. package/templates/skills/ec/opensearch/SKILL.md +720 -0
  73. package/templates/skills/ec/opensearch/references/ml_neural_search.md +576 -0
  74. package/templates/skills/ec/opensearch/references/operator.md +532 -0
  75. package/templates/skills/ec/opensearch/references/query_dsl.md +532 -0
  76. package/templates/skills/ec/opensearch/scripts/configure_mcp.py +148 -0
  77. package/templates/skills/ec/victoriametrics/SKILL.md +598 -0
  78. package/templates/skills/ec/victoriametrics/references/kubernetes.md +531 -0
  79. package/templates/skills/ec/victoriametrics/references/prometheus_migration.md +333 -0
  80. package/templates/skills/ec/victoriametrics/references/troubleshooting.md +442 -0
  81. package/templates/skills/knowledge/SKILLS_CATALOG.md +274 -4
  82. package/templates/skills/knowledge/intelligent-routing/SKILL.md +237 -164
  83. package/templates/skills/knowledge/parallel-agents/SKILL.md +345 -73
  84. package/templates/skills/knowledge/plugin-discovery/SKILL.md +582 -0
  85. package/templates/skills/knowledge/plugin-discovery/scripts/platform_setup.py +1083 -0
  86. package/templates/skills/knowledge/design-md/README.md +0 -34
  87. package/templates/skills/knowledge/design-md/SKILL.md +0 -193
  88. package/templates/skills/knowledge/design-md/examples/DESIGN.md +0 -154
  89. package/templates/skills/knowledge/notebooklm-mcp/SKILL.md +0 -71
  90. package/templates/skills/knowledge/notebooklm-mcp/assets/example_asset.txt +0 -24
  91. package/templates/skills/knowledge/notebooklm-mcp/references/api_reference.md +0 -34
  92. package/templates/skills/knowledge/notebooklm-mcp/scripts/example.py +0 -19
  93. package/templates/skills/knowledge/react-components/README.md +0 -36
  94. package/templates/skills/knowledge/react-components/SKILL.md +0 -53
  95. package/templates/skills/knowledge/react-components/examples/gold-standard-card.tsx +0 -80
  96. package/templates/skills/knowledge/react-components/package-lock.json +0 -231
  97. package/templates/skills/knowledge/react-components/package.json +0 -16
  98. package/templates/skills/knowledge/react-components/resources/architecture-checklist.md +0 -15
  99. package/templates/skills/knowledge/react-components/resources/component-template.tsx +0 -37
  100. package/templates/skills/knowledge/react-components/resources/stitch-api-reference.md +0 -14
  101. package/templates/skills/knowledge/react-components/resources/style-guide.json +0 -27
  102. package/templates/skills/knowledge/react-components/scripts/fetch-stitch.sh +0 -30
  103. package/templates/skills/knowledge/react-components/scripts/validate.js +0 -68
  104. package/templates/skills/knowledge/self-update/SKILL.md +0 -60
  105. package/templates/skills/knowledge/self-update/scripts/update_kit.py +0 -103
  106. package/templates/skills/knowledge/stitch-loop/README.md +0 -54
  107. package/templates/skills/knowledge/stitch-loop/SKILL.md +0 -235
  108. package/templates/skills/knowledge/stitch-loop/examples/SITE.md +0 -73
  109. package/templates/skills/knowledge/stitch-loop/examples/next-prompt.md +0 -25
  110. package/templates/skills/knowledge/stitch-loop/resources/baton-schema.md +0 -61
  111. package/templates/skills/knowledge/stitch-loop/resources/site-template.md +0 -104
@@ -0,0 +1,245 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script: generate_nodepool.py
4
+ Purpose: Generate Karpenter NodePool YAML configuration
5
+
6
+ Usage:
7
+ python generate_nodepool.py --name <name> [options]
8
+
9
+ Arguments:
10
+ --name NodePool name (required)
11
+ --nodeclass EC2NodeClass name (default: default)
12
+ --instance-types Comma-separated instance types (e.g., "m5.large,m5.xlarge")
13
+ --instance-category Comma-separated categories (e.g., "c,m,r")
14
+ --instance-gen Minimum instance generation (default: 5)
15
+ --capacity-type Capacity type: spot, on-demand, both (default: both)
16
+ --arch Architecture: amd64, arm64, both (default: amd64)
17
+ --cpu-limit CPU limit for the pool (default: 1000)
18
+ --memory-limit Memory limit in Gi (default: 2000)
19
+ --consolidation Consolidation policy: WhenEmpty, WhenEmptyOrUnderutilized (default: WhenEmptyOrUnderutilized)
20
+ --consolidate-after Time before consolidation (default: 1m)
21
+ --expire-after Node expiration time (default: 720h)
22
+ --taints Comma-separated taints (format: key=value:effect)
23
+ --labels Comma-separated labels (format: key=value)
24
+ --output Output file (default: stdout)
25
+
26
+ Exit Codes:
27
+ 0 - Success
28
+ 1 - Invalid arguments
29
+ """
30
+
31
+ import argparse
32
+ import json
33
+ import sys
34
+ from typing import Any
35
+
36
+ import yaml
37
+
38
+
39
+ def parse_taints(taints_str: str) -> list[dict]:
40
+ """Parse taint string into list of taint dicts."""
41
+ if not taints_str:
42
+ return []
43
+
44
+ taints = []
45
+ for taint in taints_str.split(","):
46
+ taint = taint.strip()
47
+ if ":" not in taint:
48
+ continue
49
+
50
+ key_value, effect = taint.rsplit(":", 1)
51
+ if "=" in key_value:
52
+ key, value = key_value.split("=", 1)
53
+ else:
54
+ key, value = key_value, ""
55
+
56
+ taints.append({
57
+ "key": key,
58
+ "value": value,
59
+ "effect": effect
60
+ })
61
+
62
+ return taints
63
+
64
+
65
+ def parse_labels(labels_str: str) -> dict:
66
+ """Parse label string into dict."""
67
+ if not labels_str:
68
+ return {}
69
+
70
+ labels = {}
71
+ for label in labels_str.split(","):
72
+ label = label.strip()
73
+ if "=" in label:
74
+ key, value = label.split("=", 1)
75
+ labels[key] = value
76
+
77
+ return labels
78
+
79
+
80
+ def generate_nodepool(args: argparse.Namespace) -> dict:
81
+ """Generate NodePool configuration."""
82
+ # Build requirements
83
+ requirements = []
84
+
85
+ # Architecture
86
+ if args.arch == "both":
87
+ requirements.append({
88
+ "key": "kubernetes.io/arch",
89
+ "operator": "In",
90
+ "values": ["amd64", "arm64"]
91
+ })
92
+ else:
93
+ requirements.append({
94
+ "key": "kubernetes.io/arch",
95
+ "operator": "In",
96
+ "values": [args.arch]
97
+ })
98
+
99
+ # OS (always Linux for now)
100
+ requirements.append({
101
+ "key": "kubernetes.io/os",
102
+ "operator": "In",
103
+ "values": ["linux"]
104
+ })
105
+
106
+ # Capacity type
107
+ if args.capacity_type == "both":
108
+ requirements.append({
109
+ "key": "karpenter.sh/capacity-type",
110
+ "operator": "In",
111
+ "values": ["spot", "on-demand"]
112
+ })
113
+ else:
114
+ requirements.append({
115
+ "key": "karpenter.sh/capacity-type",
116
+ "operator": "In",
117
+ "values": [args.capacity_type]
118
+ })
119
+
120
+ # Instance types or categories
121
+ if args.instance_types:
122
+ requirements.append({
123
+ "key": "node.kubernetes.io/instance-type",
124
+ "operator": "In",
125
+ "values": [t.strip() for t in args.instance_types.split(",")]
126
+ })
127
+ elif args.instance_category:
128
+ requirements.append({
129
+ "key": "karpenter.k8s.aws/instance-category",
130
+ "operator": "In",
131
+ "values": [c.strip() for c in args.instance_category.split(",")]
132
+ })
133
+ requirements.append({
134
+ "key": "karpenter.k8s.aws/instance-generation",
135
+ "operator": "Gt",
136
+ "values": [str(args.instance_gen)]
137
+ })
138
+
139
+ # Build spec.template
140
+ template_spec: dict[str, Any] = {
141
+ "nodeClassRef": {
142
+ "group": "karpenter.k8s.aws",
143
+ "kind": "EC2NodeClass",
144
+ "name": args.nodeclass
145
+ },
146
+ "requirements": requirements
147
+ }
148
+
149
+ # Add expireAfter
150
+ if args.expire_after:
151
+ template_spec["expireAfter"] = args.expire_after
152
+
153
+ # Add taints
154
+ taints = parse_taints(args.taints)
155
+ if taints:
156
+ template_spec["taints"] = taints
157
+
158
+ # Build template metadata
159
+ template_metadata = {}
160
+ labels = parse_labels(args.labels)
161
+ if labels:
162
+ template_metadata["labels"] = labels
163
+
164
+ # Build limits
165
+ limits = {}
166
+ if args.cpu_limit:
167
+ limits["cpu"] = args.cpu_limit
168
+ if args.memory_limit:
169
+ limits["memory"] = f"{args.memory_limit}Gi"
170
+
171
+ # Build disruption
172
+ disruption = {
173
+ "consolidationPolicy": args.consolidation,
174
+ "consolidateAfter": args.consolidate_after
175
+ }
176
+
177
+ # Assemble NodePool
178
+ nodepool = {
179
+ "apiVersion": "karpenter.sh/v1",
180
+ "kind": "NodePool",
181
+ "metadata": {
182
+ "name": args.name
183
+ },
184
+ "spec": {
185
+ "template": {
186
+ "spec": template_spec
187
+ },
188
+ "limits": limits,
189
+ "disruption": disruption
190
+ }
191
+ }
192
+
193
+ # Add template metadata if present
194
+ if template_metadata:
195
+ nodepool["spec"]["template"]["metadata"] = template_metadata
196
+
197
+ return nodepool
198
+
199
+
200
+ def main():
201
+ parser = argparse.ArgumentParser(description="Generate Karpenter NodePool YAML")
202
+ parser.add_argument("--name", required=True, help="NodePool name")
203
+ parser.add_argument("--nodeclass", default="default", help="EC2NodeClass name")
204
+ parser.add_argument("--instance-types", help="Comma-separated instance types")
205
+ parser.add_argument("--instance-category", default="c,m,r", help="Instance categories")
206
+ parser.add_argument("--instance-gen", type=int, default=5, help="Minimum instance generation")
207
+ parser.add_argument("--capacity-type", choices=["spot", "on-demand", "both"], default="both")
208
+ parser.add_argument("--arch", choices=["amd64", "arm64", "both"], default="amd64")
209
+ parser.add_argument("--cpu-limit", type=int, default=1000, help="CPU limit")
210
+ parser.add_argument("--memory-limit", type=int, default=2000, help="Memory limit in Gi")
211
+ parser.add_argument("--consolidation", default="WhenEmptyOrUnderutilized",
212
+ choices=["WhenEmpty", "WhenEmptyOrUnderutilized"])
213
+ parser.add_argument("--consolidate-after", default="1m", help="Consolidation delay")
214
+ parser.add_argument("--expire-after", default="720h", help="Node expiration")
215
+ parser.add_argument("--taints", help="Comma-separated taints: key=value:effect")
216
+ parser.add_argument("--labels", help="Comma-separated labels: key=value")
217
+ parser.add_argument("--output", help="Output file (default: stdout)")
218
+ parser.add_argument("--format", choices=["yaml", "json"], default="yaml", help="Output format")
219
+
220
+ args = parser.parse_args()
221
+
222
+ try:
223
+ nodepool = generate_nodepool(args)
224
+
225
+ if args.format == "json":
226
+ output = json.dumps(nodepool, indent=2)
227
+ else:
228
+ output = yaml.dump(nodepool, default_flow_style=False, sort_keys=False)
229
+
230
+ if args.output:
231
+ with open(args.output, "w") as f:
232
+ f.write(output)
233
+ print(f"NodePool configuration written to: {args.output}")
234
+ else:
235
+ print(output)
236
+
237
+ sys.exit(0)
238
+
239
+ except Exception as e:
240
+ print(f"Error: {e}", file=sys.stderr)
241
+ sys.exit(1)
242
+
243
+
244
+ if __name__ == "__main__":
245
+ main()
@@ -0,0 +1,359 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script: karpenter_status.py
4
+ Purpose: Get comprehensive status of Karpenter deployment and resources in an EKS cluster
5
+
6
+ Usage:
7
+ python karpenter_status.py --cluster <cluster-name> [--region <region>] [--namespace karpenter]
8
+
9
+ Arguments:
10
+ --cluster EKS cluster name (required)
11
+ --region AWS region (default: eu-west-1)
12
+ --namespace Karpenter namespace (default: karpenter)
13
+ --output Output format: text, json, yaml (default: text)
14
+
15
+ Exit Codes:
16
+ 0 - Success
17
+ 1 - Invalid arguments
18
+ 2 - kubectl not available or cluster not accessible
19
+ 3 - Karpenter not installed
20
+ """
21
+
22
+ import argparse
23
+ import json
24
+ import subprocess
25
+ import sys
26
+ from datetime import datetime
27
+
28
+
29
+ def run_kubectl(args: list, namespace: str = None) -> tuple[bool, str]:
30
+ """Run kubectl command and return success status and output."""
31
+ cmd = ["kubectl"]
32
+ if namespace:
33
+ cmd.extend(["-n", namespace])
34
+ cmd.extend(args)
35
+
36
+ try:
37
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
38
+ return result.returncode == 0, result.stdout.strip() if result.returncode == 0 else result.stderr.strip()
39
+ except subprocess.TimeoutExpired:
40
+ return False, "Command timed out"
41
+ except FileNotFoundError:
42
+ return False, "kubectl not found"
43
+
44
+
45
+ def get_karpenter_status(namespace: str) -> dict:
46
+ """Get Karpenter controller status."""
47
+ success, output = run_kubectl(
48
+ ["get", "pods", "-l", "app.kubernetes.io/name=karpenter", "-o", "json"],
49
+ namespace=namespace
50
+ )
51
+
52
+ if not success:
53
+ return {"status": "not_found", "error": output}
54
+
55
+ try:
56
+ pods = json.loads(output)
57
+ pod_list = pods.get("items", [])
58
+
59
+ if not pod_list:
60
+ return {"status": "not_installed", "pods": []}
61
+
62
+ status = {
63
+ "status": "running",
64
+ "pods": [],
65
+ "total": len(pod_list),
66
+ "ready": 0
67
+ }
68
+
69
+ for pod in pod_list:
70
+ name = pod["metadata"]["name"]
71
+ phase = pod["status"].get("phase", "Unknown")
72
+ container_statuses = pod["status"].get("containerStatuses", [])
73
+
74
+ ready = all(cs.get("ready", False) for cs in container_statuses)
75
+ restarts = sum(cs.get("restartCount", 0) for cs in container_statuses)
76
+
77
+ if ready and phase == "Running":
78
+ status["ready"] += 1
79
+
80
+ status["pods"].append({
81
+ "name": name,
82
+ "phase": phase,
83
+ "ready": ready,
84
+ "restarts": restarts
85
+ })
86
+
87
+ if status["ready"] < status["total"]:
88
+ status["status"] = "degraded"
89
+ if status["ready"] == 0:
90
+ status["status"] = "unhealthy"
91
+
92
+ return status
93
+
94
+ except json.JSONDecodeError:
95
+ return {"status": "error", "error": "Failed to parse pod output"}
96
+
97
+
98
+ def get_nodepools() -> dict:
99
+ """Get all NodePools and their status."""
100
+ success, output = run_kubectl(["get", "nodepools", "-o", "json"])
101
+
102
+ if not success:
103
+ return {"count": 0, "error": output, "pools": []}
104
+
105
+ try:
106
+ data = json.loads(output)
107
+ pools = []
108
+
109
+ for item in data.get("items", []):
110
+ name = item["metadata"]["name"]
111
+ spec = item.get("spec", {})
112
+ status = item.get("status", {})
113
+
114
+ limits = spec.get("limits", {})
115
+ resources = status.get("resources", {})
116
+
117
+ # Get conditions
118
+ conditions = {c["type"]: c["status"] for c in status.get("conditions", [])}
119
+
120
+ pools.append({
121
+ "name": name,
122
+ "limits": limits,
123
+ "current_resources": resources,
124
+ "conditions": conditions,
125
+ "disruption_policy": spec.get("disruption", {}).get("consolidationPolicy", "Unknown")
126
+ })
127
+
128
+ return {"count": len(pools), "pools": pools}
129
+
130
+ except json.JSONDecodeError:
131
+ return {"count": 0, "error": "Failed to parse output", "pools": []}
132
+
133
+
134
+ def get_ec2nodeclasses() -> dict:
135
+ """Get all EC2NodeClasses and their status."""
136
+ success, output = run_kubectl(["get", "ec2nodeclasses", "-o", "json"])
137
+
138
+ if not success:
139
+ return {"count": 0, "error": output, "classes": []}
140
+
141
+ try:
142
+ data = json.loads(output)
143
+ classes = []
144
+
145
+ for item in data.get("items", []):
146
+ name = item["metadata"]["name"]
147
+ spec = item.get("spec", {})
148
+ status = item.get("status", {})
149
+
150
+ classes.append({
151
+ "name": name,
152
+ "role": spec.get("role", "N/A"),
153
+ "ami_family": spec.get("amiFamily", "Default"),
154
+ "instance_profile": spec.get("instanceProfile", "N/A"),
155
+ "subnets": len(status.get("subnets", [])),
156
+ "security_groups": len(status.get("securityGroups", [])),
157
+ "amis": len(status.get("amis", []))
158
+ })
159
+
160
+ return {"count": len(classes), "classes": classes}
161
+
162
+ except json.JSONDecodeError:
163
+ return {"count": 0, "error": "Failed to parse output", "classes": []}
164
+
165
+
166
+ def get_nodeclaims() -> dict:
167
+ """Get all NodeClaims and their status."""
168
+ success, output = run_kubectl(["get", "nodeclaims", "-o", "json"])
169
+
170
+ if not success:
171
+ return {"count": 0, "error": output, "claims": []}
172
+
173
+ try:
174
+ data = json.loads(output)
175
+ claims = []
176
+
177
+ for item in data.get("items", []):
178
+ name = item["metadata"]["name"]
179
+ spec = item.get("spec", {})
180
+ status = item.get("status", {})
181
+
182
+ conditions = {c["type"]: c["status"] for c in status.get("conditions", [])}
183
+
184
+ claims.append({
185
+ "name": name,
186
+ "nodepool": item["metadata"].get("labels", {}).get("karpenter.sh/nodepool", "Unknown"),
187
+ "instance_type": status.get("instanceType", "Pending"),
188
+ "capacity_type": status.get("capacity", "Unknown"),
189
+ "zone": status.get("zone", "Unknown"),
190
+ "node_name": status.get("nodeName", "Pending"),
191
+ "conditions": conditions
192
+ })
193
+
194
+ return {"count": len(claims), "claims": claims}
195
+
196
+ except json.JSONDecodeError:
197
+ return {"count": 0, "error": "Failed to parse output", "claims": []}
198
+
199
+
200
+ def get_karpenter_nodes() -> dict:
201
+ """Get all nodes managed by Karpenter."""
202
+ success, output = run_kubectl(["get", "nodes", "-l", "karpenter.sh/nodepool", "-o", "json"])
203
+
204
+ if not success:
205
+ return {"count": 0, "error": output, "nodes": []}
206
+
207
+ try:
208
+ data = json.loads(output)
209
+ nodes = []
210
+
211
+ for item in data.get("items", []):
212
+ name = item["metadata"]["name"]
213
+ labels = item["metadata"].get("labels", {})
214
+ status = item.get("status", {})
215
+
216
+ # Get node conditions
217
+ conditions = {c["type"]: c["status"] for c in status.get("conditions", [])}
218
+ ready = conditions.get("Ready", "Unknown")
219
+
220
+ # Get capacity
221
+ capacity = status.get("capacity", {})
222
+ allocatable = status.get("allocatable", {})
223
+
224
+ nodes.append({
225
+ "name": name,
226
+ "nodepool": labels.get("karpenter.sh/nodepool", "Unknown"),
227
+ "instance_type": labels.get("node.kubernetes.io/instance-type", "Unknown"),
228
+ "capacity_type": labels.get("karpenter.sh/capacity-type", "Unknown"),
229
+ "zone": labels.get("topology.kubernetes.io/zone", "Unknown"),
230
+ "ready": ready,
231
+ "cpu_capacity": capacity.get("cpu", "0"),
232
+ "memory_capacity": capacity.get("memory", "0"),
233
+ "pods_capacity": capacity.get("pods", "0")
234
+ })
235
+
236
+ return {"count": len(nodes), "nodes": nodes}
237
+
238
+ except json.JSONDecodeError:
239
+ return {"count": 0, "error": "Failed to parse output", "nodes": []}
240
+
241
+
242
+ def format_text_output(status: dict) -> str:
243
+ """Format status as human-readable text."""
244
+ lines = []
245
+ lines.append("=" * 60)
246
+ lines.append(f"KARPENTER STATUS REPORT - {status['timestamp']}")
247
+ lines.append(f"Cluster: {status['cluster']}")
248
+ lines.append("=" * 60)
249
+
250
+ # Controller Status
251
+ controller = status["controller"]
252
+ lines.append(f"\nšŸ“¦ CONTROLLER STATUS: {controller['status'].upper()}")
253
+ if controller.get("pods"):
254
+ for pod in controller["pods"]:
255
+ status_icon = "āœ…" if pod["ready"] else "āŒ"
256
+ lines.append(f" {status_icon} {pod['name']} - {pod['phase']} (restarts: {pod['restarts']})")
257
+
258
+ # NodePools
259
+ nodepools = status["nodepools"]
260
+ lines.append(f"\nšŸŽÆ NODEPOOLS ({nodepools['count']})")
261
+ if nodepools.get("pools"):
262
+ for pool in nodepools["pools"]:
263
+ lines.append(f"\n Pool: {pool['name']}")
264
+ lines.append(f" ā”œā”€ Disruption: {pool['disruption_policy']}")
265
+ if pool.get("limits"):
266
+ lines.append(f" ā”œā”€ Limits: {pool['limits']}")
267
+ if pool.get("current_resources"):
268
+ lines.append(f" └─ Current: {pool['current_resources']}")
269
+
270
+ # EC2NodeClasses
271
+ classes = status["ec2nodeclasses"]
272
+ lines.append(f"\nšŸ”§ EC2NODECLASSES ({classes['count']})")
273
+ if classes.get("classes"):
274
+ for nc in classes["classes"]:
275
+ lines.append(f"\n Class: {nc['name']}")
276
+ lines.append(f" ā”œā”€ Role: {nc['role']}")
277
+ lines.append(f" ā”œā”€ AMI Family: {nc['ami_family']}")
278
+ lines.append(f" └─ Subnets: {nc['subnets']}, SGs: {nc['security_groups']}, AMIs: {nc['amis']}")
279
+
280
+ # NodeClaims
281
+ claims = status["nodeclaims"]
282
+ lines.append(f"\nšŸ“‹ NODECLAIMS ({claims['count']})")
283
+ if claims.get("claims"):
284
+ for claim in claims["claims"]:
285
+ lines.append(f"\n Claim: {claim['name']}")
286
+ lines.append(f" ā”œā”€ NodePool: {claim['nodepool']}")
287
+ lines.append(f" ā”œā”€ Instance: {claim['instance_type']} ({claim['capacity_type']})")
288
+ lines.append(f" ā”œā”€ Zone: {claim['zone']}")
289
+ lines.append(f" └─ Node: {claim['node_name']}")
290
+
291
+ # Nodes
292
+ nodes = status["nodes"]
293
+ lines.append(f"\nšŸ–„ļø KARPENTER NODES ({nodes['count']})")
294
+ if nodes.get("nodes"):
295
+ for node in nodes["nodes"]:
296
+ ready_icon = "āœ…" if node["ready"] == "True" else "āŒ"
297
+ lines.append(f"\n {ready_icon} {node['name']}")
298
+ lines.append(f" ā”œā”€ Type: {node['instance_type']} ({node['capacity_type']})")
299
+ lines.append(f" ā”œā”€ Zone: {node['zone']}")
300
+ lines.append(f" └─ Capacity: {node['cpu_capacity']} CPU, {node['memory_capacity']} Memory")
301
+
302
+ lines.append("\n" + "=" * 60)
303
+ return "\n".join(lines)
304
+
305
+
306
+ def main():
307
+ parser = argparse.ArgumentParser(description="Get Karpenter status in an EKS cluster")
308
+ parser.add_argument("--cluster", required=True, help="EKS cluster name")
309
+ parser.add_argument("--region", default="eu-west-1", help="AWS region")
310
+ parser.add_argument("--namespace", default="karpenter", help="Karpenter namespace")
311
+ parser.add_argument("--output", choices=["text", "json", "yaml"], default="text", help="Output format")
312
+ args = parser.parse_args()
313
+
314
+ # Update kubeconfig for the cluster
315
+ update_cmd = ["aws", "eks", "update-kubeconfig", "--name", args.cluster, "--region", args.region]
316
+ try:
317
+ subprocess.run(update_cmd, capture_output=True, check=True, timeout=30)
318
+ except subprocess.CalledProcessError as e:
319
+ print(json.dumps({"status": "error", "message": f"Failed to update kubeconfig: {e.stderr}"}))
320
+ sys.exit(2)
321
+ except FileNotFoundError:
322
+ print(json.dumps({"status": "error", "message": "AWS CLI not found"}))
323
+ sys.exit(2)
324
+
325
+ # Collect all status information
326
+ status = {
327
+ "timestamp": datetime.utcnow().isoformat() + "Z",
328
+ "cluster": args.cluster,
329
+ "region": args.region,
330
+ "controller": get_karpenter_status(args.namespace),
331
+ "nodepools": get_nodepools(),
332
+ "ec2nodeclasses": get_ec2nodeclasses(),
333
+ "nodeclaims": get_nodeclaims(),
334
+ "nodes": get_karpenter_nodes()
335
+ }
336
+
337
+ # Output based on format
338
+ if args.output == "json":
339
+ print(json.dumps(status, indent=2))
340
+ elif args.output == "yaml":
341
+ try:
342
+ import yaml
343
+ print(yaml.dump(status, default_flow_style=False))
344
+ except ImportError:
345
+ print(json.dumps(status, indent=2))
346
+ else:
347
+ print(format_text_output(status))
348
+
349
+ # Exit with appropriate code
350
+ if status["controller"]["status"] == "not_installed":
351
+ sys.exit(3)
352
+ elif status["controller"]["status"] in ["unhealthy", "error"]:
353
+ sys.exit(1)
354
+
355
+ sys.exit(0)
356
+
357
+
358
+ if __name__ == "__main__":
359
+ main()