@techwavedev/agi-agent-kit 1.1.7 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @techwavedev/agi-agent-kit might be problematic. Click here for more details.
- package/CHANGELOG.md +82 -1
- package/README.md +190 -12
- package/bin/init.js +30 -2
- package/package.json +6 -3
- package/templates/base/AGENTS.md +54 -23
- package/templates/base/README.md +325 -0
- package/templates/base/directives/memory_integration.md +95 -0
- package/templates/base/execution/memory_manager.py +309 -0
- package/templates/base/execution/session_boot.py +218 -0
- package/templates/base/execution/session_init.py +320 -0
- package/templates/base/skill-creator/SKILL_skillcreator.md +23 -36
- package/templates/base/skill-creator/scripts/init_skill.py +18 -135
- package/templates/skills/ec/README.md +31 -0
- package/templates/skills/ec/aws/SKILL.md +1020 -0
- package/templates/skills/ec/aws/defaults.yaml +13 -0
- package/templates/skills/ec/aws/references/common_patterns.md +80 -0
- package/templates/skills/ec/aws/references/mcp_servers.md +98 -0
- package/templates/skills/ec/aws-terraform/SKILL.md +349 -0
- package/templates/skills/ec/aws-terraform/references/best_practices.md +394 -0
- package/templates/skills/ec/aws-terraform/references/checkov_reference.md +337 -0
- package/templates/skills/ec/aws-terraform/scripts/configure_mcp.py +150 -0
- package/templates/skills/ec/confluent-kafka/SKILL.md +655 -0
- package/templates/skills/ec/confluent-kafka/references/ansible_playbooks.md +792 -0
- package/templates/skills/ec/confluent-kafka/references/ec_deployment.md +579 -0
- package/templates/skills/ec/confluent-kafka/references/kraft_migration.md +490 -0
- package/templates/skills/ec/confluent-kafka/references/troubleshooting.md +778 -0
- package/templates/skills/ec/confluent-kafka/references/upgrade_7x_to_8x.md +488 -0
- package/templates/skills/ec/confluent-kafka/scripts/kafka_health_check.py +435 -0
- package/templates/skills/ec/confluent-kafka/scripts/upgrade_preflight.py +568 -0
- package/templates/skills/ec/confluent-kafka/scripts/validate_config.py +455 -0
- package/templates/skills/ec/consul/SKILL.md +427 -0
- package/templates/skills/ec/consul/references/acl_setup.md +168 -0
- package/templates/skills/ec/consul/references/ha_config.md +196 -0
- package/templates/skills/ec/consul/references/troubleshooting.md +267 -0
- package/templates/skills/ec/consul/references/upgrades.md +213 -0
- package/templates/skills/ec/consul/scripts/consul_health_report.py +530 -0
- package/templates/skills/ec/consul/scripts/consul_status.py +264 -0
- package/templates/skills/ec/consul/scripts/generate_values.py +170 -0
- package/templates/skills/ec/documentation/SKILL.md +351 -0
- package/templates/skills/ec/documentation/references/best_practices.md +201 -0
- package/templates/skills/ec/documentation/scripts/analyze_code.py +307 -0
- package/templates/skills/ec/documentation/scripts/detect_changes.py +460 -0
- package/templates/skills/ec/documentation/scripts/generate_changelog.py +312 -0
- package/templates/skills/ec/documentation/scripts/sync_docs.py +272 -0
- package/templates/skills/ec/documentation/scripts/update_skill_docs.py +366 -0
- package/templates/skills/ec/gitlab/SKILL.md +529 -0
- package/templates/skills/ec/gitlab/references/agent_installation.md +416 -0
- package/templates/skills/ec/gitlab/references/api_reference.md +508 -0
- package/templates/skills/ec/gitlab/references/gitops_flux.md +465 -0
- package/templates/skills/ec/gitlab/references/troubleshooting.md +518 -0
- package/templates/skills/ec/gitlab/scripts/generate_agent_values.py +329 -0
- package/templates/skills/ec/gitlab/scripts/gitlab_agent_status.py +414 -0
- package/templates/skills/ec/jira/SKILL.md +484 -0
- package/templates/skills/ec/jira/references/jql_reference.md +148 -0
- package/templates/skills/ec/jira/scripts/add_comment.py +91 -0
- package/templates/skills/ec/jira/scripts/bulk_log_work.py +124 -0
- package/templates/skills/ec/jira/scripts/create_ticket.py +162 -0
- package/templates/skills/ec/jira/scripts/get_ticket.py +191 -0
- package/templates/skills/ec/jira/scripts/jira_client.py +383 -0
- package/templates/skills/ec/jira/scripts/log_work.py +154 -0
- package/templates/skills/ec/jira/scripts/search_tickets.py +104 -0
- package/templates/skills/ec/jira/scripts/update_comment.py +67 -0
- package/templates/skills/ec/jira/scripts/update_ticket.py +161 -0
- package/templates/skills/ec/karpenter/SKILL.md +301 -0
- package/templates/skills/ec/karpenter/references/ec2nodeclasses.md +421 -0
- package/templates/skills/ec/karpenter/references/migration.md +396 -0
- package/templates/skills/ec/karpenter/references/nodepools.md +400 -0
- package/templates/skills/ec/karpenter/references/troubleshooting.md +359 -0
- package/templates/skills/ec/karpenter/scripts/generate_ec2nodeclass.py +187 -0
- package/templates/skills/ec/karpenter/scripts/generate_nodepool.py +245 -0
- package/templates/skills/ec/karpenter/scripts/karpenter_status.py +359 -0
- package/templates/skills/ec/opensearch/SKILL.md +720 -0
- package/templates/skills/ec/opensearch/references/ml_neural_search.md +576 -0
- package/templates/skills/ec/opensearch/references/operator.md +532 -0
- package/templates/skills/ec/opensearch/references/query_dsl.md +532 -0
- package/templates/skills/ec/opensearch/scripts/configure_mcp.py +148 -0
- package/templates/skills/ec/victoriametrics/SKILL.md +598 -0
- package/templates/skills/ec/victoriametrics/references/kubernetes.md +531 -0
- package/templates/skills/ec/victoriametrics/references/prometheus_migration.md +333 -0
- package/templates/skills/ec/victoriametrics/references/troubleshooting.md +442 -0
- package/templates/skills/knowledge/SKILLS_CATALOG.md +274 -4
- package/templates/skills/knowledge/intelligent-routing/SKILL.md +237 -164
- package/templates/skills/knowledge/parallel-agents/SKILL.md +345 -73
- package/templates/skills/knowledge/plugin-discovery/SKILL.md +582 -0
- package/templates/skills/knowledge/plugin-discovery/scripts/platform_setup.py +1083 -0
- package/templates/skills/knowledge/design-md/README.md +0 -34
- package/templates/skills/knowledge/design-md/SKILL.md +0 -193
- package/templates/skills/knowledge/design-md/examples/DESIGN.md +0 -154
- package/templates/skills/knowledge/notebooklm-mcp/SKILL.md +0 -71
- package/templates/skills/knowledge/notebooklm-mcp/assets/example_asset.txt +0 -24
- package/templates/skills/knowledge/notebooklm-mcp/references/api_reference.md +0 -34
- package/templates/skills/knowledge/notebooklm-mcp/scripts/example.py +0 -19
- package/templates/skills/knowledge/react-components/README.md +0 -36
- package/templates/skills/knowledge/react-components/SKILL.md +0 -53
- package/templates/skills/knowledge/react-components/examples/gold-standard-card.tsx +0 -80
- package/templates/skills/knowledge/react-components/package-lock.json +0 -231
- package/templates/skills/knowledge/react-components/package.json +0 -16
- package/templates/skills/knowledge/react-components/resources/architecture-checklist.md +0 -15
- package/templates/skills/knowledge/react-components/resources/component-template.tsx +0 -37
- package/templates/skills/knowledge/react-components/resources/stitch-api-reference.md +0 -14
- package/templates/skills/knowledge/react-components/resources/style-guide.json +0 -27
- package/templates/skills/knowledge/react-components/scripts/fetch-stitch.sh +0 -30
- package/templates/skills/knowledge/react-components/scripts/validate.js +0 -68
- package/templates/skills/knowledge/self-update/SKILL.md +0 -60
- package/templates/skills/knowledge/self-update/scripts/update_kit.py +0 -103
- package/templates/skills/knowledge/stitch-loop/README.md +0 -54
- package/templates/skills/knowledge/stitch-loop/SKILL.md +0 -235
- package/templates/skills/knowledge/stitch-loop/examples/SITE.md +0 -73
- package/templates/skills/knowledge/stitch-loop/examples/next-prompt.md +0 -25
- package/templates/skills/knowledge/stitch-loop/resources/baton-schema.md +0 -61
- package/templates/skills/knowledge/stitch-loop/resources/site-template.md +0 -104
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Update Comment on Jira Ticket
|
|
4
|
+
|
|
5
|
+
Updates an existing comment on a Jira issue.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python update_comment.py --ticket <key> --comment-id <id> --text <text>
|
|
9
|
+
|
|
10
|
+
Arguments:
|
|
11
|
+
--ticket Ticket key (required)
|
|
12
|
+
--comment-id Comment ID to update (required)
|
|
13
|
+
--text New comment text (required)
|
|
14
|
+
|
|
15
|
+
Exit Codes:
|
|
16
|
+
0 - Success
|
|
17
|
+
1 - Invalid arguments
|
|
18
|
+
2 - Ticket not found
|
|
19
|
+
3 - Update error
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import argparse
|
|
23
|
+
import json
|
|
24
|
+
import sys
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
sys.path.insert(0, str(Path(__file__).parent))
|
|
28
|
+
from jira_client import get_client
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def main():
|
|
32
|
+
parser = argparse.ArgumentParser(
|
|
33
|
+
description='Update a comment on a Jira ticket',
|
|
34
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
35
|
+
epilog=__doc__
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument('--ticket', required=True, help='Ticket key')
|
|
38
|
+
parser.add_argument('--comment-id', required=True, help='Comment ID')
|
|
39
|
+
parser.add_argument('--text', required=True, help='New comment text')
|
|
40
|
+
args = parser.parse_args()
|
|
41
|
+
|
|
42
|
+
client = get_client()
|
|
43
|
+
ticket = args.ticket.upper()
|
|
44
|
+
|
|
45
|
+
print(f"✏️ Updating comment {args.comment_id} on {ticket}...", file=sys.stderr)
|
|
46
|
+
|
|
47
|
+
# Update comment
|
|
48
|
+
success, result = client.update_comment(ticket, args.comment_id, args.text)
|
|
49
|
+
|
|
50
|
+
if not success:
|
|
51
|
+
print(f"❌ Error updating comment: {result}", file=sys.stderr)
|
|
52
|
+
sys.exit(3)
|
|
53
|
+
|
|
54
|
+
output = {
|
|
55
|
+
'success': True,
|
|
56
|
+
'ticket': ticket,
|
|
57
|
+
'comment_id': args.comment_id,
|
|
58
|
+
'updated': True
|
|
59
|
+
}
|
|
60
|
+
print(json.dumps(output, indent=2))
|
|
61
|
+
|
|
62
|
+
print(f"✅ Comment updated on {ticket}", file=sys.stderr)
|
|
63
|
+
sys.exit(0)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
if __name__ == '__main__':
|
|
67
|
+
main()
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Update Jira Ticket
|
|
4
|
+
|
|
5
|
+
Updates an existing Jira issue with new field values.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python update_ticket.py --ticket <key> [options]
|
|
9
|
+
|
|
10
|
+
Arguments:
|
|
11
|
+
--ticket Ticket key (required, e.g., PROJ-123)
|
|
12
|
+
--status Transition to new status
|
|
13
|
+
--assignee New assignee
|
|
14
|
+
--priority New priority
|
|
15
|
+
--summary Updated summary
|
|
16
|
+
--description Updated description
|
|
17
|
+
--labels Replace labels (comma-separated)
|
|
18
|
+
--add-labels Add labels (comma-separated)
|
|
19
|
+
--remove-labels Remove labels (comma-separated)
|
|
20
|
+
--components Replace components
|
|
21
|
+
--custom-fields Custom fields as JSON
|
|
22
|
+
|
|
23
|
+
Exit Codes:
|
|
24
|
+
0 - Success
|
|
25
|
+
1 - Invalid arguments
|
|
26
|
+
2 - Ticket not found
|
|
27
|
+
3 - Update error
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
import argparse
|
|
31
|
+
import json
|
|
32
|
+
import sys
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
|
|
35
|
+
sys.path.insert(0, str(Path(__file__).parent))
|
|
36
|
+
from jira_client import get_client
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def main():
|
|
40
|
+
parser = argparse.ArgumentParser(
|
|
41
|
+
description='Update a Jira ticket',
|
|
42
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
43
|
+
epilog=__doc__
|
|
44
|
+
)
|
|
45
|
+
parser.add_argument('--ticket', required=True, help='Ticket key')
|
|
46
|
+
parser.add_argument('--status', help='Transition to status')
|
|
47
|
+
parser.add_argument('--assignee', help='New assignee')
|
|
48
|
+
parser.add_argument('--priority', help='New priority')
|
|
49
|
+
parser.add_argument('--summary', help='New summary')
|
|
50
|
+
parser.add_argument('--description', help='New description')
|
|
51
|
+
parser.add_argument('--labels', help='Replace all labels')
|
|
52
|
+
parser.add_argument('--add-labels', help='Add labels')
|
|
53
|
+
parser.add_argument('--remove-labels', help='Remove labels')
|
|
54
|
+
parser.add_argument('--components', help='Replace components')
|
|
55
|
+
parser.add_argument('--custom-fields', help='Custom fields as JSON')
|
|
56
|
+
args = parser.parse_args()
|
|
57
|
+
|
|
58
|
+
client = get_client()
|
|
59
|
+
ticket = args.ticket.upper()
|
|
60
|
+
|
|
61
|
+
print(f"🔧 Updating ticket {ticket}...", file=sys.stderr)
|
|
62
|
+
|
|
63
|
+
# Verify ticket exists
|
|
64
|
+
success, issue = client.get_issue(ticket)
|
|
65
|
+
if not success:
|
|
66
|
+
print(f"❌ Error: Could not find ticket {ticket}: {issue}", file=sys.stderr)
|
|
67
|
+
sys.exit(2)
|
|
68
|
+
|
|
69
|
+
# Handle status transition separately
|
|
70
|
+
if args.status:
|
|
71
|
+
transition_id = client.find_transition_by_name(ticket, args.status)
|
|
72
|
+
if transition_id:
|
|
73
|
+
success, result = client.transition_issue(ticket, transition_id)
|
|
74
|
+
if success:
|
|
75
|
+
print(f" ✓ Transitioned to: {args.status}", file=sys.stderr)
|
|
76
|
+
else:
|
|
77
|
+
print(f" ⚠️ Transition failed: {result}", file=sys.stderr)
|
|
78
|
+
else:
|
|
79
|
+
print(f" ⚠️ Status '{args.status}' not available", file=sys.stderr)
|
|
80
|
+
# Show available transitions
|
|
81
|
+
success, transitions = client.get_transitions(ticket)
|
|
82
|
+
if success:
|
|
83
|
+
available = [t['to']['name'] for t in transitions.get('transitions', [])]
|
|
84
|
+
print(f" Available: {', '.join(available)}", file=sys.stderr)
|
|
85
|
+
|
|
86
|
+
# Build update fields
|
|
87
|
+
fields = {}
|
|
88
|
+
update = {}
|
|
89
|
+
|
|
90
|
+
if args.summary:
|
|
91
|
+
fields['summary'] = args.summary
|
|
92
|
+
|
|
93
|
+
if args.description:
|
|
94
|
+
fields['description'] = client._format_body(args.description)
|
|
95
|
+
|
|
96
|
+
if args.priority:
|
|
97
|
+
fields['priority'] = {'name': args.priority}
|
|
98
|
+
|
|
99
|
+
if args.assignee:
|
|
100
|
+
if args.assignee.lower() == 'me':
|
|
101
|
+
success, me = client.get_myself()
|
|
102
|
+
if success:
|
|
103
|
+
fields['assignee'] = {'accountId': me.get('accountId')}
|
|
104
|
+
elif args.assignee.lower() in ['none', 'unassigned', '-']:
|
|
105
|
+
fields['assignee'] = None
|
|
106
|
+
else:
|
|
107
|
+
success, users = client.search_users(args.assignee, max_results=1)
|
|
108
|
+
if success and users:
|
|
109
|
+
fields['assignee'] = {'accountId': users[0].get('accountId')}
|
|
110
|
+
else:
|
|
111
|
+
print(f" ⚠️ Could not find user: {args.assignee}", file=sys.stderr)
|
|
112
|
+
|
|
113
|
+
if args.labels:
|
|
114
|
+
fields['labels'] = [l.strip() for l in args.labels.split(',') if l.strip()]
|
|
115
|
+
|
|
116
|
+
# Label modifications using update syntax
|
|
117
|
+
if args.add_labels:
|
|
118
|
+
labels_to_add = [l.strip() for l in args.add_labels.split(',') if l.strip()]
|
|
119
|
+
update['labels'] = [{'add': label} for label in labels_to_add]
|
|
120
|
+
|
|
121
|
+
if args.remove_labels:
|
|
122
|
+
labels_to_remove = [l.strip() for l in args.remove_labels.split(',') if l.strip()]
|
|
123
|
+
if 'labels' not in update:
|
|
124
|
+
update['labels'] = []
|
|
125
|
+
update['labels'].extend([{'remove': label} for label in labels_to_remove])
|
|
126
|
+
|
|
127
|
+
if args.components:
|
|
128
|
+
fields['components'] = [{'name': c.strip()} for c in args.components.split(',')]
|
|
129
|
+
|
|
130
|
+
if args.custom_fields:
|
|
131
|
+
try:
|
|
132
|
+
custom = json.loads(args.custom_fields)
|
|
133
|
+
fields.update(custom)
|
|
134
|
+
except json.JSONDecodeError as e:
|
|
135
|
+
print(f" ⚠️ Invalid custom-fields JSON: {e}", file=sys.stderr)
|
|
136
|
+
|
|
137
|
+
# Apply updates if any fields changed
|
|
138
|
+
if fields or update:
|
|
139
|
+
success, result = client.update_issue(ticket, fields=fields if fields else None,
|
|
140
|
+
update=update if update else None)
|
|
141
|
+
if not success:
|
|
142
|
+
print(f"❌ Error updating ticket: {result}", file=sys.stderr)
|
|
143
|
+
sys.exit(3)
|
|
144
|
+
|
|
145
|
+
print(f" ✓ Fields updated", file=sys.stderr)
|
|
146
|
+
|
|
147
|
+
# Output result
|
|
148
|
+
output = {
|
|
149
|
+
'success': True,
|
|
150
|
+
'key': ticket,
|
|
151
|
+
'url': f"{client.base_url}/browse/{ticket}",
|
|
152
|
+
'updated_fields': list(fields.keys()) + (['labels'] if update else [])
|
|
153
|
+
}
|
|
154
|
+
print(json.dumps(output, indent=2))
|
|
155
|
+
|
|
156
|
+
print(f"✅ Ticket {ticket} updated successfully", file=sys.stderr)
|
|
157
|
+
sys.exit(0)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
if __name__ == '__main__':
|
|
161
|
+
main()
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: karpenter
|
|
3
|
+
description: Karpenter Kubernetes autoscaler specialist for EKS clusters. Use for troubleshooting, documenting, managing, creating, updating, upgrading Karpenter deployments, and obtaining live cluster information. Covers NodePool/EC2NodeClass configuration, cost optimization, node consolidation, drift detection, Spot interruption handling, and migration from Cluster Autoscaler. Requires kubectl access to target EKS cluster.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Karpenter Skill
|
|
7
|
+
|
|
8
|
+
Comprehensive skill for managing Karpenter—the high-performance Kubernetes autoscaler for AWS EKS.
|
|
9
|
+
|
|
10
|
+
> **Last Updated:** 2026-01-20 from [karpenter.sh](https://karpenter.sh/)
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Quick Start
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
# Set cluster context
|
|
18
|
+
export CLUSTER_NAME=eks-nonprod
|
|
19
|
+
aws eks update-kubeconfig --name $CLUSTER_NAME --region eu-west-1
|
|
20
|
+
|
|
21
|
+
# Verify Karpenter is running
|
|
22
|
+
kubectl get pods -n karpenter
|
|
23
|
+
|
|
24
|
+
# List NodePools
|
|
25
|
+
kubectl get nodepools
|
|
26
|
+
|
|
27
|
+
# List EC2NodeClasses
|
|
28
|
+
kubectl get ec2nodeclasses
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Core Concepts
|
|
34
|
+
|
|
35
|
+
### Key Resources
|
|
36
|
+
|
|
37
|
+
| Resource | Description |
|
|
38
|
+
| ---------------- | ---------------------------------------------------------------------------- |
|
|
39
|
+
| **NodePool** | Defines constraints, limits, and disruption policies for provisioned nodes |
|
|
40
|
+
| **EC2NodeClass** | AWS-specific configuration (AMI, security groups, subnets, instance profile) |
|
|
41
|
+
| **NodeClaim** | Individual node request created by Karpenter |
|
|
42
|
+
|
|
43
|
+
### How Karpenter Works
|
|
44
|
+
|
|
45
|
+
1. **Watches** for unschedulable pods (`Unschedulable=True`)
|
|
46
|
+
2. **Evaluates** pod requirements (resources, affinity, tolerations, node selectors)
|
|
47
|
+
3. **Provisions** right-sized nodes matching constraints and cost optimization
|
|
48
|
+
4. **Disrupts** nodes via consolidation, drift, expiration, or interruption handling
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## Common Workflows
|
|
53
|
+
|
|
54
|
+
### 1. Check Karpenter Status
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# Controller pods
|
|
58
|
+
kubectl get pods -n karpenter
|
|
59
|
+
|
|
60
|
+
# Controller logs
|
|
61
|
+
kubectl logs -n karpenter -l app.kubernetes.io/name=karpenter -c controller --tail=100
|
|
62
|
+
|
|
63
|
+
# NodePools and their status
|
|
64
|
+
kubectl get nodepools -o wide
|
|
65
|
+
|
|
66
|
+
# EC2NodeClasses
|
|
67
|
+
kubectl get ec2nodeclasses -o wide
|
|
68
|
+
|
|
69
|
+
# NodeClaims (requested nodes)
|
|
70
|
+
kubectl get nodeclaims -o wide
|
|
71
|
+
|
|
72
|
+
# Karpenter-managed nodes
|
|
73
|
+
kubectl get nodes -l karpenter.sh/nodepool
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 2. Create a NodePool
|
|
77
|
+
|
|
78
|
+
```yaml
|
|
79
|
+
apiVersion: karpenter.sh/v1
|
|
80
|
+
kind: NodePool
|
|
81
|
+
metadata:
|
|
82
|
+
name: general-purpose
|
|
83
|
+
spec:
|
|
84
|
+
template:
|
|
85
|
+
spec:
|
|
86
|
+
nodeClassRef:
|
|
87
|
+
group: karpenter.k8s.aws
|
|
88
|
+
kind: EC2NodeClass
|
|
89
|
+
name: default
|
|
90
|
+
requirements:
|
|
91
|
+
- key: kubernetes.io/arch
|
|
92
|
+
operator: In
|
|
93
|
+
values: ["amd64"]
|
|
94
|
+
- key: karpenter.sh/capacity-type
|
|
95
|
+
operator: In
|
|
96
|
+
values: ["spot", "on-demand"]
|
|
97
|
+
- key: karpenter.k8s.aws/instance-category
|
|
98
|
+
operator: In
|
|
99
|
+
values: ["c", "m", "r"]
|
|
100
|
+
- key: karpenter.k8s.aws/instance-generation
|
|
101
|
+
operator: Gt
|
|
102
|
+
values: ["5"]
|
|
103
|
+
limits:
|
|
104
|
+
cpu: 1000
|
|
105
|
+
memory: 1000Gi
|
|
106
|
+
disruption:
|
|
107
|
+
consolidationPolicy: WhenEmptyOrUnderutilized
|
|
108
|
+
consolidateAfter: 1m
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### 3. Create an EC2NodeClass
|
|
112
|
+
|
|
113
|
+
```yaml
|
|
114
|
+
apiVersion: karpenter.k8s.aws/v1
|
|
115
|
+
kind: EC2NodeClass
|
|
116
|
+
metadata:
|
|
117
|
+
name: default
|
|
118
|
+
spec:
|
|
119
|
+
role: KarpenterNodeRole-${CLUSTER_NAME}
|
|
120
|
+
amiSelectorTerms:
|
|
121
|
+
- alias: al2023@latest
|
|
122
|
+
subnetSelectorTerms:
|
|
123
|
+
- tags:
|
|
124
|
+
karpenter.sh/discovery: ${CLUSTER_NAME}
|
|
125
|
+
securityGroupSelectorTerms:
|
|
126
|
+
- tags:
|
|
127
|
+
karpenter.sh/discovery: ${CLUSTER_NAME}
|
|
128
|
+
instanceProfile: KarpenterNodeInstanceProfile-${CLUSTER_NAME}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### 4. Troubleshoot Pending Pods
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
# Find unschedulable pods
|
|
135
|
+
kubectl get pods --all-namespaces -o wide | grep Pending
|
|
136
|
+
|
|
137
|
+
# Check why a pod is pending
|
|
138
|
+
kubectl describe pod <pod-name> -n <namespace>
|
|
139
|
+
|
|
140
|
+
# Check Karpenter logs for provisioning issues
|
|
141
|
+
kubectl logs -n karpenter -l app.kubernetes.io/name=karpenter -c controller | grep -i "could not"
|
|
142
|
+
|
|
143
|
+
# Verify NodePool requirements can be met
|
|
144
|
+
kubectl describe nodepool <nodepool-name>
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### 5. Force Node Refresh (Drift)
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
# Annotate EC2NodeClass to trigger drift
|
|
151
|
+
kubectl annotate ec2nodeclass default karpenter.k8s.aws/forced-drift=$(date +%s) --overwrite
|
|
152
|
+
|
|
153
|
+
# Watch drift propagation
|
|
154
|
+
kubectl get nodeclaims -w
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### 6. Manual Node Cordoning/Draining
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
# Cordon a node (prevent scheduling)
|
|
161
|
+
kubectl cordon <node-name>
|
|
162
|
+
|
|
163
|
+
# Drain a node (evict pods gracefully)
|
|
164
|
+
kubectl drain <node-name> --ignore-daemonsets --delete-emptydir-data
|
|
165
|
+
|
|
166
|
+
# Delete node (Karpenter handles cleanup via finalizer)
|
|
167
|
+
kubectl delete node <node-name>
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## Troubleshooting Guide
|
|
173
|
+
|
|
174
|
+
### Enable Debug Logging
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
# Helm upgrade with debug
|
|
178
|
+
helm upgrade karpenter oci://public.ecr.aws/karpenter/karpenter \
|
|
179
|
+
--set logLevel=debug \
|
|
180
|
+
-n karpenter
|
|
181
|
+
|
|
182
|
+
# Or patch deployment
|
|
183
|
+
kubectl set env deployment/karpenter -n karpenter LOG_LEVEL=debug
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Common Issues
|
|
187
|
+
|
|
188
|
+
| Issue | Diagnosis | Solution |
|
|
189
|
+
| ----------------------------- | -------------------------------- | -------------------------------------- |
|
|
190
|
+
| **No nodes provisioned** | Check controller logs for errors | Verify IAM permissions, subnet/SG tags |
|
|
191
|
+
| **Node not ready** | `kubectl describe node` | Check CNI, kubelet, VPC DNS |
|
|
192
|
+
| **Spot interruption** | Events in node description | Karpenter auto-drains and replaces |
|
|
193
|
+
| **Drift not detected** | Check `drifted` condition | Verify AMI changes, annotate to force |
|
|
194
|
+
| **Consolidation not working** | Check `consolidatable` condition | Verify pods can be evicted (PDB, etc.) |
|
|
195
|
+
|
|
196
|
+
### Reference Files
|
|
197
|
+
|
|
198
|
+
- **[references/troubleshooting.md](references/troubleshooting.md)** — Detailed troubleshooting scenarios
|
|
199
|
+
- **[references/nodepools.md](references/nodepools.md)** — NodePool configuration patterns
|
|
200
|
+
- **[references/ec2nodeclasses.md](references/ec2nodeclasses.md)** — EC2NodeClass examples
|
|
201
|
+
- **[references/migration.md](references/migration.md)** — Migration from Cluster Autoscaler
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## Scripts
|
|
206
|
+
|
|
207
|
+
### Get Cluster Status
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
# Run from skill directory
|
|
211
|
+
python scripts/karpenter_status.py --cluster eks-nonprod
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Generate NodePool YAML
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
python scripts/generate_nodepool.py \
|
|
218
|
+
--name gpu-workloads \
|
|
219
|
+
--instance-types "p3.2xlarge,p3.8xlarge" \
|
|
220
|
+
--capacity-type spot \
|
|
221
|
+
--cpu-limit 500
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## Best Practices
|
|
227
|
+
|
|
228
|
+
### Cost Optimization
|
|
229
|
+
|
|
230
|
+
1. **Use Spot Instances** — Include `spot` in capacity-type requirements
|
|
231
|
+
2. **Enable Consolidation** — Use `WhenEmptyOrUnderutilized` policy
|
|
232
|
+
3. **Set Limits** — Define CPU/memory limits per NodePool
|
|
233
|
+
4. **Right-size Selection** — Use instance categories and generations
|
|
234
|
+
|
|
235
|
+
### Reliability
|
|
236
|
+
|
|
237
|
+
1. **Multi-AZ** — Don't restrict to single availability zone
|
|
238
|
+
2. **Instance Diversity** — Allow multiple instance types for flexibility
|
|
239
|
+
3. **PodDisruptionBudgets** — Protect critical workloads during consolidation
|
|
240
|
+
4. **Expiration** — Set `expireAfter` for node recycling (security)
|
|
241
|
+
|
|
242
|
+
### Security
|
|
243
|
+
|
|
244
|
+
1. **IMDSv2** — Enforce in EC2NodeClass with `metadataOptions`
|
|
245
|
+
2. **node IAM Role** — Scope permissions to minimum required
|
|
246
|
+
3. **Private subnets** — Use private subnets for nodes
|
|
247
|
+
|
|
248
|
+
---
|
|
249
|
+
|
|
250
|
+
## Installation & Upgrade
|
|
251
|
+
|
|
252
|
+
### Install Karpenter
|
|
253
|
+
|
|
254
|
+
```bash
|
|
255
|
+
export KARPENTER_VERSION="1.5.2"
|
|
256
|
+
export CLUSTER_NAME="eks-nonprod"
|
|
257
|
+
export AWS_PARTITION="aws"
|
|
258
|
+
export AWS_REGION="eu-west-1"
|
|
259
|
+
export AWS_ACCOUNT_ID="$(aws sts get-caller-identity --query Account --output text)"
|
|
260
|
+
|
|
261
|
+
helm registry logout public.ecr.aws
|
|
262
|
+
helm upgrade --install karpenter oci://public.ecr.aws/karpenter/karpenter \
|
|
263
|
+
--version "${KARPENTER_VERSION}" \
|
|
264
|
+
--namespace karpenter --create-namespace \
|
|
265
|
+
--set "settings.clusterName=${CLUSTER_NAME}" \
|
|
266
|
+
--set "settings.interruptionQueue=${CLUSTER_NAME}" \
|
|
267
|
+
--set controller.resources.requests.cpu=1 \
|
|
268
|
+
--set controller.resources.requests.memory=1Gi \
|
|
269
|
+
--set controller.resources.limits.cpu=1 \
|
|
270
|
+
--set controller.resources.limits.memory=1Gi \
|
|
271
|
+
--wait
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
### Upgrade Karpenter
|
|
275
|
+
|
|
276
|
+
```bash
|
|
277
|
+
# Check current version
|
|
278
|
+
kubectl get deployment -n karpenter karpenter -o jsonpath='{.spec.template.spec.containers[0].image}'
|
|
279
|
+
|
|
280
|
+
# Upgrade
|
|
281
|
+
helm upgrade karpenter oci://public.ecr.aws/karpenter/karpenter \
|
|
282
|
+
--version "${NEW_VERSION}" \
|
|
283
|
+
--namespace karpenter \
|
|
284
|
+
--reuse-values
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
## Related Skills
|
|
290
|
+
|
|
291
|
+
- **[aws](../aws/SKILL.md)** — Parent AWS skill for broader AWS operations
|
|
292
|
+
- **[aws-terraform](../aws-terraform/SKILL.md)** — Infrastructure as Code for Karpenter deployment
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
296
|
+
## External Resources
|
|
297
|
+
|
|
298
|
+
- [Karpenter Documentation](https://karpenter.sh/docs/)
|
|
299
|
+
- [Karpenter EKS Best Practices](https://aws.github.io/aws-eks-best-practices/karpenter/)
|
|
300
|
+
- [Karpenter Blueprints](https://github.com/aws-samples/karpenter-blueprints)
|
|
301
|
+
- [EKS Karpenter Workshop](https://www.eksworkshop.com/docs/autoscaling/compute/karpenter/)
|