@techwavedev/agi-agent-kit 1.1.7 ā 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @techwavedev/agi-agent-kit might be problematic. Click here for more details.
- package/CHANGELOG.md +82 -1
- package/README.md +190 -12
- package/bin/init.js +30 -2
- package/package.json +6 -3
- package/templates/base/AGENTS.md +54 -23
- package/templates/base/README.md +325 -0
- package/templates/base/directives/memory_integration.md +95 -0
- package/templates/base/execution/memory_manager.py +309 -0
- package/templates/base/execution/session_boot.py +218 -0
- package/templates/base/execution/session_init.py +320 -0
- package/templates/base/skill-creator/SKILL_skillcreator.md +23 -36
- package/templates/base/skill-creator/scripts/init_skill.py +18 -135
- package/templates/skills/ec/README.md +31 -0
- package/templates/skills/ec/aws/SKILL.md +1020 -0
- package/templates/skills/ec/aws/defaults.yaml +13 -0
- package/templates/skills/ec/aws/references/common_patterns.md +80 -0
- package/templates/skills/ec/aws/references/mcp_servers.md +98 -0
- package/templates/skills/ec/aws-terraform/SKILL.md +349 -0
- package/templates/skills/ec/aws-terraform/references/best_practices.md +394 -0
- package/templates/skills/ec/aws-terraform/references/checkov_reference.md +337 -0
- package/templates/skills/ec/aws-terraform/scripts/configure_mcp.py +150 -0
- package/templates/skills/ec/confluent-kafka/SKILL.md +655 -0
- package/templates/skills/ec/confluent-kafka/references/ansible_playbooks.md +792 -0
- package/templates/skills/ec/confluent-kafka/references/ec_deployment.md +579 -0
- package/templates/skills/ec/confluent-kafka/references/kraft_migration.md +490 -0
- package/templates/skills/ec/confluent-kafka/references/troubleshooting.md +778 -0
- package/templates/skills/ec/confluent-kafka/references/upgrade_7x_to_8x.md +488 -0
- package/templates/skills/ec/confluent-kafka/scripts/kafka_health_check.py +435 -0
- package/templates/skills/ec/confluent-kafka/scripts/upgrade_preflight.py +568 -0
- package/templates/skills/ec/confluent-kafka/scripts/validate_config.py +455 -0
- package/templates/skills/ec/consul/SKILL.md +427 -0
- package/templates/skills/ec/consul/references/acl_setup.md +168 -0
- package/templates/skills/ec/consul/references/ha_config.md +196 -0
- package/templates/skills/ec/consul/references/troubleshooting.md +267 -0
- package/templates/skills/ec/consul/references/upgrades.md +213 -0
- package/templates/skills/ec/consul/scripts/consul_health_report.py +530 -0
- package/templates/skills/ec/consul/scripts/consul_status.py +264 -0
- package/templates/skills/ec/consul/scripts/generate_values.py +170 -0
- package/templates/skills/ec/documentation/SKILL.md +351 -0
- package/templates/skills/ec/documentation/references/best_practices.md +201 -0
- package/templates/skills/ec/documentation/scripts/analyze_code.py +307 -0
- package/templates/skills/ec/documentation/scripts/detect_changes.py +460 -0
- package/templates/skills/ec/documentation/scripts/generate_changelog.py +312 -0
- package/templates/skills/ec/documentation/scripts/sync_docs.py +272 -0
- package/templates/skills/ec/documentation/scripts/update_skill_docs.py +366 -0
- package/templates/skills/ec/gitlab/SKILL.md +529 -0
- package/templates/skills/ec/gitlab/references/agent_installation.md +416 -0
- package/templates/skills/ec/gitlab/references/api_reference.md +508 -0
- package/templates/skills/ec/gitlab/references/gitops_flux.md +465 -0
- package/templates/skills/ec/gitlab/references/troubleshooting.md +518 -0
- package/templates/skills/ec/gitlab/scripts/generate_agent_values.py +329 -0
- package/templates/skills/ec/gitlab/scripts/gitlab_agent_status.py +414 -0
- package/templates/skills/ec/jira/SKILL.md +484 -0
- package/templates/skills/ec/jira/references/jql_reference.md +148 -0
- package/templates/skills/ec/jira/scripts/add_comment.py +91 -0
- package/templates/skills/ec/jira/scripts/bulk_log_work.py +124 -0
- package/templates/skills/ec/jira/scripts/create_ticket.py +162 -0
- package/templates/skills/ec/jira/scripts/get_ticket.py +191 -0
- package/templates/skills/ec/jira/scripts/jira_client.py +383 -0
- package/templates/skills/ec/jira/scripts/log_work.py +154 -0
- package/templates/skills/ec/jira/scripts/search_tickets.py +104 -0
- package/templates/skills/ec/jira/scripts/update_comment.py +67 -0
- package/templates/skills/ec/jira/scripts/update_ticket.py +161 -0
- package/templates/skills/ec/karpenter/SKILL.md +301 -0
- package/templates/skills/ec/karpenter/references/ec2nodeclasses.md +421 -0
- package/templates/skills/ec/karpenter/references/migration.md +396 -0
- package/templates/skills/ec/karpenter/references/nodepools.md +400 -0
- package/templates/skills/ec/karpenter/references/troubleshooting.md +359 -0
- package/templates/skills/ec/karpenter/scripts/generate_ec2nodeclass.py +187 -0
- package/templates/skills/ec/karpenter/scripts/generate_nodepool.py +245 -0
- package/templates/skills/ec/karpenter/scripts/karpenter_status.py +359 -0
- package/templates/skills/ec/opensearch/SKILL.md +720 -0
- package/templates/skills/ec/opensearch/references/ml_neural_search.md +576 -0
- package/templates/skills/ec/opensearch/references/operator.md +532 -0
- package/templates/skills/ec/opensearch/references/query_dsl.md +532 -0
- package/templates/skills/ec/opensearch/scripts/configure_mcp.py +148 -0
- package/templates/skills/ec/victoriametrics/SKILL.md +598 -0
- package/templates/skills/ec/victoriametrics/references/kubernetes.md +531 -0
- package/templates/skills/ec/victoriametrics/references/prometheus_migration.md +333 -0
- package/templates/skills/ec/victoriametrics/references/troubleshooting.md +442 -0
- package/templates/skills/knowledge/SKILLS_CATALOG.md +274 -4
- package/templates/skills/knowledge/intelligent-routing/SKILL.md +237 -164
- package/templates/skills/knowledge/parallel-agents/SKILL.md +345 -73
- package/templates/skills/knowledge/plugin-discovery/SKILL.md +582 -0
- package/templates/skills/knowledge/plugin-discovery/scripts/platform_setup.py +1083 -0
- package/templates/skills/knowledge/design-md/README.md +0 -34
- package/templates/skills/knowledge/design-md/SKILL.md +0 -193
- package/templates/skills/knowledge/design-md/examples/DESIGN.md +0 -154
- package/templates/skills/knowledge/notebooklm-mcp/SKILL.md +0 -71
- package/templates/skills/knowledge/notebooklm-mcp/assets/example_asset.txt +0 -24
- package/templates/skills/knowledge/notebooklm-mcp/references/api_reference.md +0 -34
- package/templates/skills/knowledge/notebooklm-mcp/scripts/example.py +0 -19
- package/templates/skills/knowledge/react-components/README.md +0 -36
- package/templates/skills/knowledge/react-components/SKILL.md +0 -53
- package/templates/skills/knowledge/react-components/examples/gold-standard-card.tsx +0 -80
- package/templates/skills/knowledge/react-components/package-lock.json +0 -231
- package/templates/skills/knowledge/react-components/package.json +0 -16
- package/templates/skills/knowledge/react-components/resources/architecture-checklist.md +0 -15
- package/templates/skills/knowledge/react-components/resources/component-template.tsx +0 -37
- package/templates/skills/knowledge/react-components/resources/stitch-api-reference.md +0 -14
- package/templates/skills/knowledge/react-components/resources/style-guide.json +0 -27
- package/templates/skills/knowledge/react-components/scripts/fetch-stitch.sh +0 -30
- package/templates/skills/knowledge/react-components/scripts/validate.js +0 -68
- package/templates/skills/knowledge/self-update/SKILL.md +0 -60
- package/templates/skills/knowledge/self-update/scripts/update_kit.py +0 -103
- package/templates/skills/knowledge/stitch-loop/README.md +0 -54
- package/templates/skills/knowledge/stitch-loop/SKILL.md +0 -235
- package/templates/skills/knowledge/stitch-loop/examples/SITE.md +0 -73
- package/templates/skills/knowledge/stitch-loop/examples/next-prompt.md +0 -25
- package/templates/skills/knowledge/stitch-loop/resources/baton-schema.md +0 -61
- package/templates/skills/knowledge/stitch-loop/resources/site-template.md +0 -104
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Script: generate_nodepool.py
|
|
4
|
+
Purpose: Generate Karpenter NodePool YAML configuration
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
python generate_nodepool.py --name <name> [options]
|
|
8
|
+
|
|
9
|
+
Arguments:
|
|
10
|
+
--name NodePool name (required)
|
|
11
|
+
--nodeclass EC2NodeClass name (default: default)
|
|
12
|
+
--instance-types Comma-separated instance types (e.g., "m5.large,m5.xlarge")
|
|
13
|
+
--instance-category Comma-separated categories (e.g., "c,m,r")
|
|
14
|
+
--instance-gen Minimum instance generation (default: 5)
|
|
15
|
+
--capacity-type Capacity type: spot, on-demand, both (default: both)
|
|
16
|
+
--arch Architecture: amd64, arm64, both (default: amd64)
|
|
17
|
+
--cpu-limit CPU limit for the pool (default: 1000)
|
|
18
|
+
--memory-limit Memory limit in Gi (default: 2000)
|
|
19
|
+
--consolidation Consolidation policy: WhenEmpty, WhenEmptyOrUnderutilized (default: WhenEmptyOrUnderutilized)
|
|
20
|
+
--consolidate-after Time before consolidation (default: 1m)
|
|
21
|
+
--expire-after Node expiration time (default: 720h)
|
|
22
|
+
--taints Comma-separated taints (format: key=value:effect)
|
|
23
|
+
--labels Comma-separated labels (format: key=value)
|
|
24
|
+
--output Output file (default: stdout)
|
|
25
|
+
|
|
26
|
+
Exit Codes:
|
|
27
|
+
0 - Success
|
|
28
|
+
1 - Invalid arguments
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
import argparse
|
|
32
|
+
import json
|
|
33
|
+
import sys
|
|
34
|
+
from typing import Any
|
|
35
|
+
|
|
36
|
+
import yaml
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def parse_taints(taints_str: str) -> list[dict]:
|
|
40
|
+
"""Parse taint string into list of taint dicts."""
|
|
41
|
+
if not taints_str:
|
|
42
|
+
return []
|
|
43
|
+
|
|
44
|
+
taints = []
|
|
45
|
+
for taint in taints_str.split(","):
|
|
46
|
+
taint = taint.strip()
|
|
47
|
+
if ":" not in taint:
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
key_value, effect = taint.rsplit(":", 1)
|
|
51
|
+
if "=" in key_value:
|
|
52
|
+
key, value = key_value.split("=", 1)
|
|
53
|
+
else:
|
|
54
|
+
key, value = key_value, ""
|
|
55
|
+
|
|
56
|
+
taints.append({
|
|
57
|
+
"key": key,
|
|
58
|
+
"value": value,
|
|
59
|
+
"effect": effect
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
return taints
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def parse_labels(labels_str: str) -> dict:
|
|
66
|
+
"""Parse label string into dict."""
|
|
67
|
+
if not labels_str:
|
|
68
|
+
return {}
|
|
69
|
+
|
|
70
|
+
labels = {}
|
|
71
|
+
for label in labels_str.split(","):
|
|
72
|
+
label = label.strip()
|
|
73
|
+
if "=" in label:
|
|
74
|
+
key, value = label.split("=", 1)
|
|
75
|
+
labels[key] = value
|
|
76
|
+
|
|
77
|
+
return labels
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def generate_nodepool(args: argparse.Namespace) -> dict:
|
|
81
|
+
"""Generate NodePool configuration."""
|
|
82
|
+
# Build requirements
|
|
83
|
+
requirements = []
|
|
84
|
+
|
|
85
|
+
# Architecture
|
|
86
|
+
if args.arch == "both":
|
|
87
|
+
requirements.append({
|
|
88
|
+
"key": "kubernetes.io/arch",
|
|
89
|
+
"operator": "In",
|
|
90
|
+
"values": ["amd64", "arm64"]
|
|
91
|
+
})
|
|
92
|
+
else:
|
|
93
|
+
requirements.append({
|
|
94
|
+
"key": "kubernetes.io/arch",
|
|
95
|
+
"operator": "In",
|
|
96
|
+
"values": [args.arch]
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
# OS (always Linux for now)
|
|
100
|
+
requirements.append({
|
|
101
|
+
"key": "kubernetes.io/os",
|
|
102
|
+
"operator": "In",
|
|
103
|
+
"values": ["linux"]
|
|
104
|
+
})
|
|
105
|
+
|
|
106
|
+
# Capacity type
|
|
107
|
+
if args.capacity_type == "both":
|
|
108
|
+
requirements.append({
|
|
109
|
+
"key": "karpenter.sh/capacity-type",
|
|
110
|
+
"operator": "In",
|
|
111
|
+
"values": ["spot", "on-demand"]
|
|
112
|
+
})
|
|
113
|
+
else:
|
|
114
|
+
requirements.append({
|
|
115
|
+
"key": "karpenter.sh/capacity-type",
|
|
116
|
+
"operator": "In",
|
|
117
|
+
"values": [args.capacity_type]
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
# Instance types or categories
|
|
121
|
+
if args.instance_types:
|
|
122
|
+
requirements.append({
|
|
123
|
+
"key": "node.kubernetes.io/instance-type",
|
|
124
|
+
"operator": "In",
|
|
125
|
+
"values": [t.strip() for t in args.instance_types.split(",")]
|
|
126
|
+
})
|
|
127
|
+
elif args.instance_category:
|
|
128
|
+
requirements.append({
|
|
129
|
+
"key": "karpenter.k8s.aws/instance-category",
|
|
130
|
+
"operator": "In",
|
|
131
|
+
"values": [c.strip() for c in args.instance_category.split(",")]
|
|
132
|
+
})
|
|
133
|
+
requirements.append({
|
|
134
|
+
"key": "karpenter.k8s.aws/instance-generation",
|
|
135
|
+
"operator": "Gt",
|
|
136
|
+
"values": [str(args.instance_gen)]
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
# Build spec.template
|
|
140
|
+
template_spec: dict[str, Any] = {
|
|
141
|
+
"nodeClassRef": {
|
|
142
|
+
"group": "karpenter.k8s.aws",
|
|
143
|
+
"kind": "EC2NodeClass",
|
|
144
|
+
"name": args.nodeclass
|
|
145
|
+
},
|
|
146
|
+
"requirements": requirements
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
# Add expireAfter
|
|
150
|
+
if args.expire_after:
|
|
151
|
+
template_spec["expireAfter"] = args.expire_after
|
|
152
|
+
|
|
153
|
+
# Add taints
|
|
154
|
+
taints = parse_taints(args.taints)
|
|
155
|
+
if taints:
|
|
156
|
+
template_spec["taints"] = taints
|
|
157
|
+
|
|
158
|
+
# Build template metadata
|
|
159
|
+
template_metadata = {}
|
|
160
|
+
labels = parse_labels(args.labels)
|
|
161
|
+
if labels:
|
|
162
|
+
template_metadata["labels"] = labels
|
|
163
|
+
|
|
164
|
+
# Build limits
|
|
165
|
+
limits = {}
|
|
166
|
+
if args.cpu_limit:
|
|
167
|
+
limits["cpu"] = args.cpu_limit
|
|
168
|
+
if args.memory_limit:
|
|
169
|
+
limits["memory"] = f"{args.memory_limit}Gi"
|
|
170
|
+
|
|
171
|
+
# Build disruption
|
|
172
|
+
disruption = {
|
|
173
|
+
"consolidationPolicy": args.consolidation,
|
|
174
|
+
"consolidateAfter": args.consolidate_after
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
# Assemble NodePool
|
|
178
|
+
nodepool = {
|
|
179
|
+
"apiVersion": "karpenter.sh/v1",
|
|
180
|
+
"kind": "NodePool",
|
|
181
|
+
"metadata": {
|
|
182
|
+
"name": args.name
|
|
183
|
+
},
|
|
184
|
+
"spec": {
|
|
185
|
+
"template": {
|
|
186
|
+
"spec": template_spec
|
|
187
|
+
},
|
|
188
|
+
"limits": limits,
|
|
189
|
+
"disruption": disruption
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
# Add template metadata if present
|
|
194
|
+
if template_metadata:
|
|
195
|
+
nodepool["spec"]["template"]["metadata"] = template_metadata
|
|
196
|
+
|
|
197
|
+
return nodepool
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def main():
|
|
201
|
+
parser = argparse.ArgumentParser(description="Generate Karpenter NodePool YAML")
|
|
202
|
+
parser.add_argument("--name", required=True, help="NodePool name")
|
|
203
|
+
parser.add_argument("--nodeclass", default="default", help="EC2NodeClass name")
|
|
204
|
+
parser.add_argument("--instance-types", help="Comma-separated instance types")
|
|
205
|
+
parser.add_argument("--instance-category", default="c,m,r", help="Instance categories")
|
|
206
|
+
parser.add_argument("--instance-gen", type=int, default=5, help="Minimum instance generation")
|
|
207
|
+
parser.add_argument("--capacity-type", choices=["spot", "on-demand", "both"], default="both")
|
|
208
|
+
parser.add_argument("--arch", choices=["amd64", "arm64", "both"], default="amd64")
|
|
209
|
+
parser.add_argument("--cpu-limit", type=int, default=1000, help="CPU limit")
|
|
210
|
+
parser.add_argument("--memory-limit", type=int, default=2000, help="Memory limit in Gi")
|
|
211
|
+
parser.add_argument("--consolidation", default="WhenEmptyOrUnderutilized",
|
|
212
|
+
choices=["WhenEmpty", "WhenEmptyOrUnderutilized"])
|
|
213
|
+
parser.add_argument("--consolidate-after", default="1m", help="Consolidation delay")
|
|
214
|
+
parser.add_argument("--expire-after", default="720h", help="Node expiration")
|
|
215
|
+
parser.add_argument("--taints", help="Comma-separated taints: key=value:effect")
|
|
216
|
+
parser.add_argument("--labels", help="Comma-separated labels: key=value")
|
|
217
|
+
parser.add_argument("--output", help="Output file (default: stdout)")
|
|
218
|
+
parser.add_argument("--format", choices=["yaml", "json"], default="yaml", help="Output format")
|
|
219
|
+
|
|
220
|
+
args = parser.parse_args()
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
nodepool = generate_nodepool(args)
|
|
224
|
+
|
|
225
|
+
if args.format == "json":
|
|
226
|
+
output = json.dumps(nodepool, indent=2)
|
|
227
|
+
else:
|
|
228
|
+
output = yaml.dump(nodepool, default_flow_style=False, sort_keys=False)
|
|
229
|
+
|
|
230
|
+
if args.output:
|
|
231
|
+
with open(args.output, "w") as f:
|
|
232
|
+
f.write(output)
|
|
233
|
+
print(f"NodePool configuration written to: {args.output}")
|
|
234
|
+
else:
|
|
235
|
+
print(output)
|
|
236
|
+
|
|
237
|
+
sys.exit(0)
|
|
238
|
+
|
|
239
|
+
except Exception as e:
|
|
240
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
241
|
+
sys.exit(1)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
if __name__ == "__main__":
|
|
245
|
+
main()
|
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Script: karpenter_status.py
|
|
4
|
+
Purpose: Get comprehensive status of Karpenter deployment and resources in an EKS cluster
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
python karpenter_status.py --cluster <cluster-name> [--region <region>] [--namespace karpenter]
|
|
8
|
+
|
|
9
|
+
Arguments:
|
|
10
|
+
--cluster EKS cluster name (required)
|
|
11
|
+
--region AWS region (default: eu-west-1)
|
|
12
|
+
--namespace Karpenter namespace (default: karpenter)
|
|
13
|
+
--output Output format: text, json, yaml (default: text)
|
|
14
|
+
|
|
15
|
+
Exit Codes:
|
|
16
|
+
0 - Success
|
|
17
|
+
1 - Invalid arguments
|
|
18
|
+
2 - kubectl not available or cluster not accessible
|
|
19
|
+
3 - Karpenter not installed
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import argparse
|
|
23
|
+
import json
|
|
24
|
+
import subprocess
|
|
25
|
+
import sys
|
|
26
|
+
from datetime import datetime
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def run_kubectl(args: list, namespace: str = None) -> tuple[bool, str]:
|
|
30
|
+
"""Run kubectl command and return success status and output."""
|
|
31
|
+
cmd = ["kubectl"]
|
|
32
|
+
if namespace:
|
|
33
|
+
cmd.extend(["-n", namespace])
|
|
34
|
+
cmd.extend(args)
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
|
38
|
+
return result.returncode == 0, result.stdout.strip() if result.returncode == 0 else result.stderr.strip()
|
|
39
|
+
except subprocess.TimeoutExpired:
|
|
40
|
+
return False, "Command timed out"
|
|
41
|
+
except FileNotFoundError:
|
|
42
|
+
return False, "kubectl not found"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_karpenter_status(namespace: str) -> dict:
|
|
46
|
+
"""Get Karpenter controller status."""
|
|
47
|
+
success, output = run_kubectl(
|
|
48
|
+
["get", "pods", "-l", "app.kubernetes.io/name=karpenter", "-o", "json"],
|
|
49
|
+
namespace=namespace
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if not success:
|
|
53
|
+
return {"status": "not_found", "error": output}
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
pods = json.loads(output)
|
|
57
|
+
pod_list = pods.get("items", [])
|
|
58
|
+
|
|
59
|
+
if not pod_list:
|
|
60
|
+
return {"status": "not_installed", "pods": []}
|
|
61
|
+
|
|
62
|
+
status = {
|
|
63
|
+
"status": "running",
|
|
64
|
+
"pods": [],
|
|
65
|
+
"total": len(pod_list),
|
|
66
|
+
"ready": 0
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
for pod in pod_list:
|
|
70
|
+
name = pod["metadata"]["name"]
|
|
71
|
+
phase = pod["status"].get("phase", "Unknown")
|
|
72
|
+
container_statuses = pod["status"].get("containerStatuses", [])
|
|
73
|
+
|
|
74
|
+
ready = all(cs.get("ready", False) for cs in container_statuses)
|
|
75
|
+
restarts = sum(cs.get("restartCount", 0) for cs in container_statuses)
|
|
76
|
+
|
|
77
|
+
if ready and phase == "Running":
|
|
78
|
+
status["ready"] += 1
|
|
79
|
+
|
|
80
|
+
status["pods"].append({
|
|
81
|
+
"name": name,
|
|
82
|
+
"phase": phase,
|
|
83
|
+
"ready": ready,
|
|
84
|
+
"restarts": restarts
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
if status["ready"] < status["total"]:
|
|
88
|
+
status["status"] = "degraded"
|
|
89
|
+
if status["ready"] == 0:
|
|
90
|
+
status["status"] = "unhealthy"
|
|
91
|
+
|
|
92
|
+
return status
|
|
93
|
+
|
|
94
|
+
except json.JSONDecodeError:
|
|
95
|
+
return {"status": "error", "error": "Failed to parse pod output"}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def get_nodepools() -> dict:
|
|
99
|
+
"""Get all NodePools and their status."""
|
|
100
|
+
success, output = run_kubectl(["get", "nodepools", "-o", "json"])
|
|
101
|
+
|
|
102
|
+
if not success:
|
|
103
|
+
return {"count": 0, "error": output, "pools": []}
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
data = json.loads(output)
|
|
107
|
+
pools = []
|
|
108
|
+
|
|
109
|
+
for item in data.get("items", []):
|
|
110
|
+
name = item["metadata"]["name"]
|
|
111
|
+
spec = item.get("spec", {})
|
|
112
|
+
status = item.get("status", {})
|
|
113
|
+
|
|
114
|
+
limits = spec.get("limits", {})
|
|
115
|
+
resources = status.get("resources", {})
|
|
116
|
+
|
|
117
|
+
# Get conditions
|
|
118
|
+
conditions = {c["type"]: c["status"] for c in status.get("conditions", [])}
|
|
119
|
+
|
|
120
|
+
pools.append({
|
|
121
|
+
"name": name,
|
|
122
|
+
"limits": limits,
|
|
123
|
+
"current_resources": resources,
|
|
124
|
+
"conditions": conditions,
|
|
125
|
+
"disruption_policy": spec.get("disruption", {}).get("consolidationPolicy", "Unknown")
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
return {"count": len(pools), "pools": pools}
|
|
129
|
+
|
|
130
|
+
except json.JSONDecodeError:
|
|
131
|
+
return {"count": 0, "error": "Failed to parse output", "pools": []}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def get_ec2nodeclasses() -> dict:
|
|
135
|
+
"""Get all EC2NodeClasses and their status."""
|
|
136
|
+
success, output = run_kubectl(["get", "ec2nodeclasses", "-o", "json"])
|
|
137
|
+
|
|
138
|
+
if not success:
|
|
139
|
+
return {"count": 0, "error": output, "classes": []}
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
data = json.loads(output)
|
|
143
|
+
classes = []
|
|
144
|
+
|
|
145
|
+
for item in data.get("items", []):
|
|
146
|
+
name = item["metadata"]["name"]
|
|
147
|
+
spec = item.get("spec", {})
|
|
148
|
+
status = item.get("status", {})
|
|
149
|
+
|
|
150
|
+
classes.append({
|
|
151
|
+
"name": name,
|
|
152
|
+
"role": spec.get("role", "N/A"),
|
|
153
|
+
"ami_family": spec.get("amiFamily", "Default"),
|
|
154
|
+
"instance_profile": spec.get("instanceProfile", "N/A"),
|
|
155
|
+
"subnets": len(status.get("subnets", [])),
|
|
156
|
+
"security_groups": len(status.get("securityGroups", [])),
|
|
157
|
+
"amis": len(status.get("amis", []))
|
|
158
|
+
})
|
|
159
|
+
|
|
160
|
+
return {"count": len(classes), "classes": classes}
|
|
161
|
+
|
|
162
|
+
except json.JSONDecodeError:
|
|
163
|
+
return {"count": 0, "error": "Failed to parse output", "classes": []}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def get_nodeclaims() -> dict:
|
|
167
|
+
"""Get all NodeClaims and their status."""
|
|
168
|
+
success, output = run_kubectl(["get", "nodeclaims", "-o", "json"])
|
|
169
|
+
|
|
170
|
+
if not success:
|
|
171
|
+
return {"count": 0, "error": output, "claims": []}
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
data = json.loads(output)
|
|
175
|
+
claims = []
|
|
176
|
+
|
|
177
|
+
for item in data.get("items", []):
|
|
178
|
+
name = item["metadata"]["name"]
|
|
179
|
+
spec = item.get("spec", {})
|
|
180
|
+
status = item.get("status", {})
|
|
181
|
+
|
|
182
|
+
conditions = {c["type"]: c["status"] for c in status.get("conditions", [])}
|
|
183
|
+
|
|
184
|
+
claims.append({
|
|
185
|
+
"name": name,
|
|
186
|
+
"nodepool": item["metadata"].get("labels", {}).get("karpenter.sh/nodepool", "Unknown"),
|
|
187
|
+
"instance_type": status.get("instanceType", "Pending"),
|
|
188
|
+
"capacity_type": status.get("capacity", "Unknown"),
|
|
189
|
+
"zone": status.get("zone", "Unknown"),
|
|
190
|
+
"node_name": status.get("nodeName", "Pending"),
|
|
191
|
+
"conditions": conditions
|
|
192
|
+
})
|
|
193
|
+
|
|
194
|
+
return {"count": len(claims), "claims": claims}
|
|
195
|
+
|
|
196
|
+
except json.JSONDecodeError:
|
|
197
|
+
return {"count": 0, "error": "Failed to parse output", "claims": []}
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def get_karpenter_nodes() -> dict:
|
|
201
|
+
"""Get all nodes managed by Karpenter."""
|
|
202
|
+
success, output = run_kubectl(["get", "nodes", "-l", "karpenter.sh/nodepool", "-o", "json"])
|
|
203
|
+
|
|
204
|
+
if not success:
|
|
205
|
+
return {"count": 0, "error": output, "nodes": []}
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
data = json.loads(output)
|
|
209
|
+
nodes = []
|
|
210
|
+
|
|
211
|
+
for item in data.get("items", []):
|
|
212
|
+
name = item["metadata"]["name"]
|
|
213
|
+
labels = item["metadata"].get("labels", {})
|
|
214
|
+
status = item.get("status", {})
|
|
215
|
+
|
|
216
|
+
# Get node conditions
|
|
217
|
+
conditions = {c["type"]: c["status"] for c in status.get("conditions", [])}
|
|
218
|
+
ready = conditions.get("Ready", "Unknown")
|
|
219
|
+
|
|
220
|
+
# Get capacity
|
|
221
|
+
capacity = status.get("capacity", {})
|
|
222
|
+
allocatable = status.get("allocatable", {})
|
|
223
|
+
|
|
224
|
+
nodes.append({
|
|
225
|
+
"name": name,
|
|
226
|
+
"nodepool": labels.get("karpenter.sh/nodepool", "Unknown"),
|
|
227
|
+
"instance_type": labels.get("node.kubernetes.io/instance-type", "Unknown"),
|
|
228
|
+
"capacity_type": labels.get("karpenter.sh/capacity-type", "Unknown"),
|
|
229
|
+
"zone": labels.get("topology.kubernetes.io/zone", "Unknown"),
|
|
230
|
+
"ready": ready,
|
|
231
|
+
"cpu_capacity": capacity.get("cpu", "0"),
|
|
232
|
+
"memory_capacity": capacity.get("memory", "0"),
|
|
233
|
+
"pods_capacity": capacity.get("pods", "0")
|
|
234
|
+
})
|
|
235
|
+
|
|
236
|
+
return {"count": len(nodes), "nodes": nodes}
|
|
237
|
+
|
|
238
|
+
except json.JSONDecodeError:
|
|
239
|
+
return {"count": 0, "error": "Failed to parse output", "nodes": []}
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def format_text_output(status: dict) -> str:
|
|
243
|
+
"""Format status as human-readable text."""
|
|
244
|
+
lines = []
|
|
245
|
+
lines.append("=" * 60)
|
|
246
|
+
lines.append(f"KARPENTER STATUS REPORT - {status['timestamp']}")
|
|
247
|
+
lines.append(f"Cluster: {status['cluster']}")
|
|
248
|
+
lines.append("=" * 60)
|
|
249
|
+
|
|
250
|
+
# Controller Status
|
|
251
|
+
controller = status["controller"]
|
|
252
|
+
lines.append(f"\nš¦ CONTROLLER STATUS: {controller['status'].upper()}")
|
|
253
|
+
if controller.get("pods"):
|
|
254
|
+
for pod in controller["pods"]:
|
|
255
|
+
status_icon = "ā
" if pod["ready"] else "ā"
|
|
256
|
+
lines.append(f" {status_icon} {pod['name']} - {pod['phase']} (restarts: {pod['restarts']})")
|
|
257
|
+
|
|
258
|
+
# NodePools
|
|
259
|
+
nodepools = status["nodepools"]
|
|
260
|
+
lines.append(f"\nšÆ NODEPOOLS ({nodepools['count']})")
|
|
261
|
+
if nodepools.get("pools"):
|
|
262
|
+
for pool in nodepools["pools"]:
|
|
263
|
+
lines.append(f"\n Pool: {pool['name']}")
|
|
264
|
+
lines.append(f" āā Disruption: {pool['disruption_policy']}")
|
|
265
|
+
if pool.get("limits"):
|
|
266
|
+
lines.append(f" āā Limits: {pool['limits']}")
|
|
267
|
+
if pool.get("current_resources"):
|
|
268
|
+
lines.append(f" āā Current: {pool['current_resources']}")
|
|
269
|
+
|
|
270
|
+
# EC2NodeClasses
|
|
271
|
+
classes = status["ec2nodeclasses"]
|
|
272
|
+
lines.append(f"\nš§ EC2NODECLASSES ({classes['count']})")
|
|
273
|
+
if classes.get("classes"):
|
|
274
|
+
for nc in classes["classes"]:
|
|
275
|
+
lines.append(f"\n Class: {nc['name']}")
|
|
276
|
+
lines.append(f" āā Role: {nc['role']}")
|
|
277
|
+
lines.append(f" āā AMI Family: {nc['ami_family']}")
|
|
278
|
+
lines.append(f" āā Subnets: {nc['subnets']}, SGs: {nc['security_groups']}, AMIs: {nc['amis']}")
|
|
279
|
+
|
|
280
|
+
# NodeClaims
|
|
281
|
+
claims = status["nodeclaims"]
|
|
282
|
+
lines.append(f"\nš NODECLAIMS ({claims['count']})")
|
|
283
|
+
if claims.get("claims"):
|
|
284
|
+
for claim in claims["claims"]:
|
|
285
|
+
lines.append(f"\n Claim: {claim['name']}")
|
|
286
|
+
lines.append(f" āā NodePool: {claim['nodepool']}")
|
|
287
|
+
lines.append(f" āā Instance: {claim['instance_type']} ({claim['capacity_type']})")
|
|
288
|
+
lines.append(f" āā Zone: {claim['zone']}")
|
|
289
|
+
lines.append(f" āā Node: {claim['node_name']}")
|
|
290
|
+
|
|
291
|
+
# Nodes
|
|
292
|
+
nodes = status["nodes"]
|
|
293
|
+
lines.append(f"\nš„ļø KARPENTER NODES ({nodes['count']})")
|
|
294
|
+
if nodes.get("nodes"):
|
|
295
|
+
for node in nodes["nodes"]:
|
|
296
|
+
ready_icon = "ā
" if node["ready"] == "True" else "ā"
|
|
297
|
+
lines.append(f"\n {ready_icon} {node['name']}")
|
|
298
|
+
lines.append(f" āā Type: {node['instance_type']} ({node['capacity_type']})")
|
|
299
|
+
lines.append(f" āā Zone: {node['zone']}")
|
|
300
|
+
lines.append(f" āā Capacity: {node['cpu_capacity']} CPU, {node['memory_capacity']} Memory")
|
|
301
|
+
|
|
302
|
+
lines.append("\n" + "=" * 60)
|
|
303
|
+
return "\n".join(lines)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def main():
|
|
307
|
+
parser = argparse.ArgumentParser(description="Get Karpenter status in an EKS cluster")
|
|
308
|
+
parser.add_argument("--cluster", required=True, help="EKS cluster name")
|
|
309
|
+
parser.add_argument("--region", default="eu-west-1", help="AWS region")
|
|
310
|
+
parser.add_argument("--namespace", default="karpenter", help="Karpenter namespace")
|
|
311
|
+
parser.add_argument("--output", choices=["text", "json", "yaml"], default="text", help="Output format")
|
|
312
|
+
args = parser.parse_args()
|
|
313
|
+
|
|
314
|
+
# Update kubeconfig for the cluster
|
|
315
|
+
update_cmd = ["aws", "eks", "update-kubeconfig", "--name", args.cluster, "--region", args.region]
|
|
316
|
+
try:
|
|
317
|
+
subprocess.run(update_cmd, capture_output=True, check=True, timeout=30)
|
|
318
|
+
except subprocess.CalledProcessError as e:
|
|
319
|
+
print(json.dumps({"status": "error", "message": f"Failed to update kubeconfig: {e.stderr}"}))
|
|
320
|
+
sys.exit(2)
|
|
321
|
+
except FileNotFoundError:
|
|
322
|
+
print(json.dumps({"status": "error", "message": "AWS CLI not found"}))
|
|
323
|
+
sys.exit(2)
|
|
324
|
+
|
|
325
|
+
# Collect all status information
|
|
326
|
+
status = {
|
|
327
|
+
"timestamp": datetime.utcnow().isoformat() + "Z",
|
|
328
|
+
"cluster": args.cluster,
|
|
329
|
+
"region": args.region,
|
|
330
|
+
"controller": get_karpenter_status(args.namespace),
|
|
331
|
+
"nodepools": get_nodepools(),
|
|
332
|
+
"ec2nodeclasses": get_ec2nodeclasses(),
|
|
333
|
+
"nodeclaims": get_nodeclaims(),
|
|
334
|
+
"nodes": get_karpenter_nodes()
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
# Output based on format
|
|
338
|
+
if args.output == "json":
|
|
339
|
+
print(json.dumps(status, indent=2))
|
|
340
|
+
elif args.output == "yaml":
|
|
341
|
+
try:
|
|
342
|
+
import yaml
|
|
343
|
+
print(yaml.dump(status, default_flow_style=False))
|
|
344
|
+
except ImportError:
|
|
345
|
+
print(json.dumps(status, indent=2))
|
|
346
|
+
else:
|
|
347
|
+
print(format_text_output(status))
|
|
348
|
+
|
|
349
|
+
# Exit with appropriate code
|
|
350
|
+
if status["controller"]["status"] == "not_installed":
|
|
351
|
+
sys.exit(3)
|
|
352
|
+
elif status["controller"]["status"] in ["unhealthy", "error"]:
|
|
353
|
+
sys.exit(1)
|
|
354
|
+
|
|
355
|
+
sys.exit(0)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
if __name__ == "__main__":
|
|
359
|
+
main()
|