@techwavedev/agi-agent-kit 1.1.7 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @techwavedev/agi-agent-kit might be problematic. Click here for more details.
- package/CHANGELOG.md +82 -1
- package/README.md +190 -12
- package/bin/init.js +30 -2
- package/package.json +6 -3
- package/templates/base/AGENTS.md +54 -23
- package/templates/base/README.md +325 -0
- package/templates/base/directives/memory_integration.md +95 -0
- package/templates/base/execution/memory_manager.py +309 -0
- package/templates/base/execution/session_boot.py +218 -0
- package/templates/base/execution/session_init.py +320 -0
- package/templates/base/skill-creator/SKILL_skillcreator.md +23 -36
- package/templates/base/skill-creator/scripts/init_skill.py +18 -135
- package/templates/skills/ec/README.md +31 -0
- package/templates/skills/ec/aws/SKILL.md +1020 -0
- package/templates/skills/ec/aws/defaults.yaml +13 -0
- package/templates/skills/ec/aws/references/common_patterns.md +80 -0
- package/templates/skills/ec/aws/references/mcp_servers.md +98 -0
- package/templates/skills/ec/aws-terraform/SKILL.md +349 -0
- package/templates/skills/ec/aws-terraform/references/best_practices.md +394 -0
- package/templates/skills/ec/aws-terraform/references/checkov_reference.md +337 -0
- package/templates/skills/ec/aws-terraform/scripts/configure_mcp.py +150 -0
- package/templates/skills/ec/confluent-kafka/SKILL.md +655 -0
- package/templates/skills/ec/confluent-kafka/references/ansible_playbooks.md +792 -0
- package/templates/skills/ec/confluent-kafka/references/ec_deployment.md +579 -0
- package/templates/skills/ec/confluent-kafka/references/kraft_migration.md +490 -0
- package/templates/skills/ec/confluent-kafka/references/troubleshooting.md +778 -0
- package/templates/skills/ec/confluent-kafka/references/upgrade_7x_to_8x.md +488 -0
- package/templates/skills/ec/confluent-kafka/scripts/kafka_health_check.py +435 -0
- package/templates/skills/ec/confluent-kafka/scripts/upgrade_preflight.py +568 -0
- package/templates/skills/ec/confluent-kafka/scripts/validate_config.py +455 -0
- package/templates/skills/ec/consul/SKILL.md +427 -0
- package/templates/skills/ec/consul/references/acl_setup.md +168 -0
- package/templates/skills/ec/consul/references/ha_config.md +196 -0
- package/templates/skills/ec/consul/references/troubleshooting.md +267 -0
- package/templates/skills/ec/consul/references/upgrades.md +213 -0
- package/templates/skills/ec/consul/scripts/consul_health_report.py +530 -0
- package/templates/skills/ec/consul/scripts/consul_status.py +264 -0
- package/templates/skills/ec/consul/scripts/generate_values.py +170 -0
- package/templates/skills/ec/documentation/SKILL.md +351 -0
- package/templates/skills/ec/documentation/references/best_practices.md +201 -0
- package/templates/skills/ec/documentation/scripts/analyze_code.py +307 -0
- package/templates/skills/ec/documentation/scripts/detect_changes.py +460 -0
- package/templates/skills/ec/documentation/scripts/generate_changelog.py +312 -0
- package/templates/skills/ec/documentation/scripts/sync_docs.py +272 -0
- package/templates/skills/ec/documentation/scripts/update_skill_docs.py +366 -0
- package/templates/skills/ec/gitlab/SKILL.md +529 -0
- package/templates/skills/ec/gitlab/references/agent_installation.md +416 -0
- package/templates/skills/ec/gitlab/references/api_reference.md +508 -0
- package/templates/skills/ec/gitlab/references/gitops_flux.md +465 -0
- package/templates/skills/ec/gitlab/references/troubleshooting.md +518 -0
- package/templates/skills/ec/gitlab/scripts/generate_agent_values.py +329 -0
- package/templates/skills/ec/gitlab/scripts/gitlab_agent_status.py +414 -0
- package/templates/skills/ec/jira/SKILL.md +484 -0
- package/templates/skills/ec/jira/references/jql_reference.md +148 -0
- package/templates/skills/ec/jira/scripts/add_comment.py +91 -0
- package/templates/skills/ec/jira/scripts/bulk_log_work.py +124 -0
- package/templates/skills/ec/jira/scripts/create_ticket.py +162 -0
- package/templates/skills/ec/jira/scripts/get_ticket.py +191 -0
- package/templates/skills/ec/jira/scripts/jira_client.py +383 -0
- package/templates/skills/ec/jira/scripts/log_work.py +154 -0
- package/templates/skills/ec/jira/scripts/search_tickets.py +104 -0
- package/templates/skills/ec/jira/scripts/update_comment.py +67 -0
- package/templates/skills/ec/jira/scripts/update_ticket.py +161 -0
- package/templates/skills/ec/karpenter/SKILL.md +301 -0
- package/templates/skills/ec/karpenter/references/ec2nodeclasses.md +421 -0
- package/templates/skills/ec/karpenter/references/migration.md +396 -0
- package/templates/skills/ec/karpenter/references/nodepools.md +400 -0
- package/templates/skills/ec/karpenter/references/troubleshooting.md +359 -0
- package/templates/skills/ec/karpenter/scripts/generate_ec2nodeclass.py +187 -0
- package/templates/skills/ec/karpenter/scripts/generate_nodepool.py +245 -0
- package/templates/skills/ec/karpenter/scripts/karpenter_status.py +359 -0
- package/templates/skills/ec/opensearch/SKILL.md +720 -0
- package/templates/skills/ec/opensearch/references/ml_neural_search.md +576 -0
- package/templates/skills/ec/opensearch/references/operator.md +532 -0
- package/templates/skills/ec/opensearch/references/query_dsl.md +532 -0
- package/templates/skills/ec/opensearch/scripts/configure_mcp.py +148 -0
- package/templates/skills/ec/victoriametrics/SKILL.md +598 -0
- package/templates/skills/ec/victoriametrics/references/kubernetes.md +531 -0
- package/templates/skills/ec/victoriametrics/references/prometheus_migration.md +333 -0
- package/templates/skills/ec/victoriametrics/references/troubleshooting.md +442 -0
- package/templates/skills/knowledge/SKILLS_CATALOG.md +274 -4
- package/templates/skills/knowledge/intelligent-routing/SKILL.md +237 -164
- package/templates/skills/knowledge/parallel-agents/SKILL.md +345 -73
- package/templates/skills/knowledge/plugin-discovery/SKILL.md +582 -0
- package/templates/skills/knowledge/plugin-discovery/scripts/platform_setup.py +1083 -0
- package/templates/skills/knowledge/design-md/README.md +0 -34
- package/templates/skills/knowledge/design-md/SKILL.md +0 -193
- package/templates/skills/knowledge/design-md/examples/DESIGN.md +0 -154
- package/templates/skills/knowledge/notebooklm-mcp/SKILL.md +0 -71
- package/templates/skills/knowledge/notebooklm-mcp/assets/example_asset.txt +0 -24
- package/templates/skills/knowledge/notebooklm-mcp/references/api_reference.md +0 -34
- package/templates/skills/knowledge/notebooklm-mcp/scripts/example.py +0 -19
- package/templates/skills/knowledge/react-components/README.md +0 -36
- package/templates/skills/knowledge/react-components/SKILL.md +0 -53
- package/templates/skills/knowledge/react-components/examples/gold-standard-card.tsx +0 -80
- package/templates/skills/knowledge/react-components/package-lock.json +0 -231
- package/templates/skills/knowledge/react-components/package.json +0 -16
- package/templates/skills/knowledge/react-components/resources/architecture-checklist.md +0 -15
- package/templates/skills/knowledge/react-components/resources/component-template.tsx +0 -37
- package/templates/skills/knowledge/react-components/resources/stitch-api-reference.md +0 -14
- package/templates/skills/knowledge/react-components/resources/style-guide.json +0 -27
- package/templates/skills/knowledge/react-components/scripts/fetch-stitch.sh +0 -30
- package/templates/skills/knowledge/react-components/scripts/validate.js +0 -68
- package/templates/skills/knowledge/self-update/SKILL.md +0 -60
- package/templates/skills/knowledge/self-update/scripts/update_kit.py +0 -103
- package/templates/skills/knowledge/stitch-loop/README.md +0 -54
- package/templates/skills/knowledge/stitch-loop/SKILL.md +0 -235
- package/templates/skills/knowledge/stitch-loop/examples/SITE.md +0 -73
- package/templates/skills/knowledge/stitch-loop/examples/next-prompt.md +0 -25
- package/templates/skills/knowledge/stitch-loop/resources/baton-schema.md +0 -61
- package/templates/skills/knowledge/stitch-loop/resources/site-template.md +0 -104
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
# ZooKeeper to KRaft Migration Guide
|
|
2
|
+
|
|
3
|
+
Complete guide for migrating Confluent Kafka from ZooKeeper mode to KRaft mode.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## ⚠️ EC Environment Path Mappings
|
|
8
|
+
|
|
9
|
+
> **This guide uses standard Confluent paths in examples.** For EC deployments, substitute paths as follows:
|
|
10
|
+
|
|
11
|
+
| Standard Path | EC Path |
|
|
12
|
+
| ----------------------------- | -------------------------------------------------------- |
|
|
13
|
+
| `/opt/confluent/` | `{{ base_path }}/opt/confluent-{{ confluent_version }}/` |
|
|
14
|
+
| `/var/kafka-logs/` | `{{ base_path }}/opt/data` (broker) |
|
|
15
|
+
| `/var/kafka-controller-data/` | `{{ base_path }}/opt/data/controller` |
|
|
16
|
+
| `/var/log/confluent/kafka/` | `{{ base_path }}/logs/` |
|
|
17
|
+
| `localhost:9092` | `$BOOTSTRAP` (use SSL port 9443) |
|
|
18
|
+
| `systemctl` | `systemctl --user` |
|
|
19
|
+
|
|
20
|
+
**EC Quick Setup:**
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
export KAFKA_HOME={{ base_path }}/opt/confluent-{{ confluent_version }}
|
|
24
|
+
export BOOTSTRAP={{ broker_host_1 }}:{{ broker_port }}
|
|
25
|
+
export DATA_DIR={{ base_path }}/opt/data
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
See **[ec_deployment.md](ec_deployment.md)** for complete EC paths and configuration.
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## Table of Contents
|
|
33
|
+
|
|
34
|
+
1. [KRaft Overview](#kraft-overview)
|
|
35
|
+
2. [Migration Prerequisites](#migration-prerequisites)
|
|
36
|
+
3. [Migration Strategies](#migration-strategies)
|
|
37
|
+
4. [In-Place Migration Procedure](#in-place-migration-procedure)
|
|
38
|
+
5. [Post-Migration Tasks](#post-migration-tasks)
|
|
39
|
+
6. [Troubleshooting](#troubleshooting)
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## KRaft Overview
|
|
44
|
+
|
|
45
|
+
### What is KRaft?
|
|
46
|
+
|
|
47
|
+
KRaft (Kafka Raft) replaces ZooKeeper as the metadata management system:
|
|
48
|
+
|
|
49
|
+
| Aspect | ZooKeeper Mode | KRaft Mode |
|
|
50
|
+
| -------------- | --------------------------- | ---------------------------- |
|
|
51
|
+
| Metadata store | External ZooKeeper ensemble | Internal Raft quorum |
|
|
52
|
+
| Failover | Depends on ZK availability | Built-in Raft consensus |
|
|
53
|
+
| Latency | ZK round-trip overhead | Direct controller access |
|
|
54
|
+
| Operations | Manage 2 clusters | Single Kafka cluster |
|
|
55
|
+
| Scaling | ZK becomes bottleneck | Controller scales with Kafka |
|
|
56
|
+
|
|
57
|
+
### KRaft Architecture
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
ZooKeeper Mode KRaft Mode
|
|
61
|
+
|
|
62
|
+
┌─────────┐ ┌─────────────────────┐
|
|
63
|
+
│ ZK1 │◄──────┐ │ Controller-1 │
|
|
64
|
+
├─────────┤ │ │ (voter, node.id=1) │
|
|
65
|
+
│ ZK2 │◄──────┼── Metadata └─────────────────────┘
|
|
66
|
+
├─────────┤ │ │
|
|
67
|
+
│ ZK3 │◄──────┘ ┌─────────────────────┐
|
|
68
|
+
└─────────┘ │ Controller-2 │
|
|
69
|
+
▲ │ (voter, node.id=2) │
|
|
70
|
+
│ └─────────────────────┘
|
|
71
|
+
┌────┴────┐ │
|
|
72
|
+
│Broker-1 │ ┌─────────────────────┐
|
|
73
|
+
├─────────┤ │ Controller-3 │
|
|
74
|
+
│Broker-2 │ │ (voter, node.id=3) │
|
|
75
|
+
├─────────┤ └─────────────────────┘
|
|
76
|
+
│Broker-3 │ │
|
|
77
|
+
└─────────┘ ┌────────┴────────┐
|
|
78
|
+
│ │
|
|
79
|
+
┌───────┴───┐ ┌───────┴───┐
|
|
80
|
+
│ Broker-1 │ │ Broker-N │
|
|
81
|
+
│(node.id │ │(node.id │
|
|
82
|
+
│ =101) │ │ =10N) │
|
|
83
|
+
└───────────┘ └───────────┘
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Node Roles in KRaft
|
|
87
|
+
|
|
88
|
+
| Role | process.roles | Description |
|
|
89
|
+
| -------------- | ------------------- | ------------------------------------ |
|
|
90
|
+
| **Controller** | `controller` | Manages metadata, handles leadership |
|
|
91
|
+
| **Broker** | `broker` | Handles client requests, stores data |
|
|
92
|
+
| **Combined** | `controller,broker` | Both roles (small clusters only) |
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Migration Prerequisites
|
|
97
|
+
|
|
98
|
+
### Version Requirements
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
# Minimum supported versions
|
|
102
|
+
# Source: Confluent 7.4+ (Kafka 3.4+)
|
|
103
|
+
# Target: Confluent 8.0+ (recommended)
|
|
104
|
+
|
|
105
|
+
# Check current version
|
|
106
|
+
/opt/confluent/bin/kafka-broker-api-versions --bootstrap-server localhost:9092 --version
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Cluster Requirements
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
# 1. All brokers must be on same version
|
|
113
|
+
grep -h broker.id /opt/confluent/etc/kafka/server.properties
|
|
114
|
+
# (run on each broker, versions should match)
|
|
115
|
+
|
|
116
|
+
# 2. No offline partitions
|
|
117
|
+
/opt/confluent/bin/kafka-topics --bootstrap-server localhost:9092 \
|
|
118
|
+
--describe --unavailable-partitions
|
|
119
|
+
|
|
120
|
+
# 3. All replicas in-sync
|
|
121
|
+
/opt/confluent/bin/kafka-topics --bootstrap-server localhost:9092 \
|
|
122
|
+
--describe --under-replicated-partitions
|
|
123
|
+
|
|
124
|
+
# 4. ZooKeeper healthy
|
|
125
|
+
echo ruok | nc localhost 2181 # Should return "imok"
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Hardware Requirements for Controllers
|
|
129
|
+
|
|
130
|
+
| Cluster Size | Controllers | CPU (cores) | Memory | Disk (SSD) |
|
|
131
|
+
| -------------- | ----------- | ----------- | ------ | ---------- |
|
|
132
|
+
| < 50 brokers | 3 | 4 | 8GB | 50GB |
|
|
133
|
+
| 50-100 brokers | 3 | 8 | 16GB | 100GB |
|
|
134
|
+
| > 100 brokers | 5 | 16 | 32GB | 200GB |
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Migration Strategies
|
|
139
|
+
|
|
140
|
+
### Strategy 1: In-Place Migration (Recommended)
|
|
141
|
+
|
|
142
|
+
Migrate existing brokers to KRaft mode without rebuilding:
|
|
143
|
+
|
|
144
|
+
```
|
|
145
|
+
Pros:
|
|
146
|
+
✅ No data movement required
|
|
147
|
+
✅ Minimal downtime
|
|
148
|
+
✅ Preserves topic configurations
|
|
149
|
+
|
|
150
|
+
Cons:
|
|
151
|
+
❌ Rollback is complex after finalization
|
|
152
|
+
❌ Requires careful coordination
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### Strategy 2: Parallel Cluster Migration
|
|
156
|
+
|
|
157
|
+
Build new KRaft cluster and migrate topics:
|
|
158
|
+
|
|
159
|
+
```
|
|
160
|
+
Pros:
|
|
161
|
+
✅ Clean separation
|
|
162
|
+
✅ Easy rollback (keep old cluster)
|
|
163
|
+
✅ Test in isolation
|
|
164
|
+
|
|
165
|
+
Cons:
|
|
166
|
+
❌ Double infrastructure cost during migration
|
|
167
|
+
❌ Requires MirrorMaker or replication setup
|
|
168
|
+
❌ Client reconfiguration required
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## In-Place Migration Procedure
|
|
174
|
+
|
|
175
|
+
### Phase 1: Prepare for Migration
|
|
176
|
+
|
|
177
|
+
#### Step 1.1: Enable Migration Mode on Brokers
|
|
178
|
+
|
|
179
|
+
```properties
|
|
180
|
+
# Add to ALL broker server.properties
|
|
181
|
+
|
|
182
|
+
# Keep existing ZK config
|
|
183
|
+
zookeeper.connect=zk1:2181,zk2:2181,zk3:2181
|
|
184
|
+
|
|
185
|
+
# Add controller quorum (future controllers)
|
|
186
|
+
controller.quorum.voters=1@controller-01:9093,2@controller-02:9093,3@controller-03:9093
|
|
187
|
+
controller.listener.names=CONTROLLER
|
|
188
|
+
listener.security.protocol.map=CONTROLLER:PLAINTEXT,...existing...
|
|
189
|
+
|
|
190
|
+
# Enable migration
|
|
191
|
+
zookeeper.metadata.migration.enable=true
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
#### Step 1.2: Deploy Controller Nodes
|
|
195
|
+
|
|
196
|
+
Create dedicated controller nodes (or use combined mode for small clusters):
|
|
197
|
+
|
|
198
|
+
```properties
|
|
199
|
+
# /opt/confluent/etc/kafka/kraft/controller.properties
|
|
200
|
+
|
|
201
|
+
# Controller ONLY
|
|
202
|
+
process.roles=controller
|
|
203
|
+
node.id=1 # Unique per controller (1, 2, 3...)
|
|
204
|
+
|
|
205
|
+
# Controller quorum
|
|
206
|
+
controller.quorum.voters=1@controller-01:9093,2@controller-02:9093,3@controller-03:9093
|
|
207
|
+
controller.listener.names=CONTROLLER
|
|
208
|
+
listener.security.protocol.map=CONTROLLER:PLAINTEXT
|
|
209
|
+
|
|
210
|
+
# Listeners
|
|
211
|
+
listeners=CONTROLLER://0.0.0.0:9093
|
|
212
|
+
|
|
213
|
+
# Data directory
|
|
214
|
+
log.dirs=/var/kafka-controller-data
|
|
215
|
+
|
|
216
|
+
# Migration settings - connect to ZK to read metadata
|
|
217
|
+
zookeeper.connect=zk1:2181,zk2:2181,zk3:2181
|
|
218
|
+
zookeeper.metadata.migration.enable=true
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
#### Step 1.3: Format Controller Storage
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
# Generate cluster ID (ONCE, use same ID for all controllers)
|
|
225
|
+
CLUSTER_ID=$(/opt/confluent/bin/kafka-storage random-uuid)
|
|
226
|
+
echo $CLUSTER_ID > /backup/cluster-id.txt
|
|
227
|
+
|
|
228
|
+
# Format storage on each controller
|
|
229
|
+
/opt/confluent/bin/kafka-storage format \
|
|
230
|
+
-t $CLUSTER_ID \
|
|
231
|
+
-c /opt/confluent/etc/kafka/kraft/controller.properties
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
### Phase 2: Start Migration
|
|
235
|
+
|
|
236
|
+
#### Step 2.1: Start Controllers in Migration Mode
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
# On each controller node
|
|
240
|
+
sudo systemctl start confluent-kafka-controller
|
|
241
|
+
|
|
242
|
+
# Verify controllers formed quorum
|
|
243
|
+
/opt/confluent/bin/kafka-metadata --snapshot /var/kafka-controller-data/__cluster_metadata-0/00000000000000000000.log \
|
|
244
|
+
--command quorum
|
|
245
|
+
|
|
246
|
+
# Expected output: 3 voters, 1 leader
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
#### Step 2.2: Rolling Restart Brokers
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
# Restart each broker (one at a time) to pick up migration config
|
|
253
|
+
sudo systemctl restart confluent-server
|
|
254
|
+
|
|
255
|
+
# Verify broker joined KRaft quorum
|
|
256
|
+
# Look for log message:
|
|
257
|
+
grep "Registered broker" /var/log/confluent/kafka/server.log | tail -1
|
|
258
|
+
|
|
259
|
+
# Verify migration is in progress
|
|
260
|
+
/opt/confluent/bin/kafka-metadata --snapshot /var/kafka-controller-data/__cluster_metadata-0/00000000000000000000.log \
|
|
261
|
+
--command broker
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
#### Step 2.3: Verify Dual-Write Mode
|
|
265
|
+
|
|
266
|
+
During migration, metadata is written to both ZooKeeper and KRaft:
|
|
267
|
+
|
|
268
|
+
```bash
|
|
269
|
+
# Check ZK still has metadata
|
|
270
|
+
/opt/confluent/bin/zookeeper-shell localhost:2181 <<< "ls /brokers/ids"
|
|
271
|
+
|
|
272
|
+
# Check KRaft has metadata
|
|
273
|
+
/opt/confluent/bin/kafka-metadata --snapshot /var/kafka-controller-data/__cluster_metadata-0/00000000000000000000.log \
|
|
274
|
+
--command topic
|
|
275
|
+
|
|
276
|
+
# Both should show same brokers/topics
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
### Phase 3: Finalize Migration
|
|
280
|
+
|
|
281
|
+
**⚠️ WARNING: After finalization, rollback requires restoring from backup**
|
|
282
|
+
|
|
283
|
+
#### Step 3.1: Pre-Finalization Checks
|
|
284
|
+
|
|
285
|
+
```bash
|
|
286
|
+
# Ensure all brokers are in KRaft mode
|
|
287
|
+
/opt/confluent/bin/kafka-metadata --snapshot /var/kafka-controller-data/__cluster_metadata-0/00000000000000000000.log \
|
|
288
|
+
--command broker | wc -l
|
|
289
|
+
# Should match expected broker count
|
|
290
|
+
|
|
291
|
+
# Verify no under-replicated partitions
|
|
292
|
+
/opt/confluent/bin/kafka-topics --bootstrap-server localhost:9092 \
|
|
293
|
+
--describe --under-replicated-partitions
|
|
294
|
+
|
|
295
|
+
# Final backup before finalization
|
|
296
|
+
tar -czvf /backup/pre-finalization-$(date +%Y%m%d).tar.gz /opt/confluent/etc /var/kafka-logs
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
#### Step 3.2: Finalize Migration
|
|
300
|
+
|
|
301
|
+
```bash
|
|
302
|
+
# Run finalization (from any controller)
|
|
303
|
+
/opt/confluent/bin/kafka-metadata-migration \
|
|
304
|
+
--snapshot /var/kafka-controller-data/__cluster_metadata-0/00000000000000000000.log \
|
|
305
|
+
--finalize
|
|
306
|
+
|
|
307
|
+
# Verify finalization
|
|
308
|
+
grep "migration.state" /var/log/confluent/kafka/controller.log | tail -1
|
|
309
|
+
# Should show: FULLY_MIGRATED
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
#### Step 3.3: Remove ZooKeeper Configuration
|
|
313
|
+
|
|
314
|
+
```bash
|
|
315
|
+
# Remove ZK config from all brokers
|
|
316
|
+
# Edit /opt/confluent/etc/kafka/server.properties
|
|
317
|
+
|
|
318
|
+
# REMOVE:
|
|
319
|
+
# zookeeper.connect=...
|
|
320
|
+
# zookeeper.metadata.migration.enable=true
|
|
321
|
+
|
|
322
|
+
# Rolling restart brokers
|
|
323
|
+
for broker in kafka-01 kafka-02 kafka-03; do
|
|
324
|
+
ssh $broker "sudo systemctl restart confluent-server"
|
|
325
|
+
sleep 120 # Wait for ISR sync
|
|
326
|
+
# Verify no under-replicated partitions
|
|
327
|
+
done
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
#### Step 3.4: Decommission ZooKeeper
|
|
331
|
+
|
|
332
|
+
```bash
|
|
333
|
+
# Stop ZooKeeper services (after successful finalization)
|
|
334
|
+
for zk in zk1 zk2 zk3; do
|
|
335
|
+
ssh $zk "sudo systemctl stop confluent-zookeeper"
|
|
336
|
+
ssh $zk "sudo systemctl disable confluent-zookeeper"
|
|
337
|
+
done
|
|
338
|
+
|
|
339
|
+
# Archive ZK data for safety
|
|
340
|
+
tar -czvf /backup/zookeeper-final-$(date +%Y%m%d).tar.gz /var/zookeeper
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
---
|
|
344
|
+
|
|
345
|
+
## Post-Migration Tasks
|
|
346
|
+
|
|
347
|
+
### Update Ecosystem Components
|
|
348
|
+
|
|
349
|
+
#### Schema Registry
|
|
350
|
+
|
|
351
|
+
```properties
|
|
352
|
+
# Update schema-registry.properties
|
|
353
|
+
# REMOVE:
|
|
354
|
+
# kafkastore.connection.url=zk1:2181
|
|
355
|
+
|
|
356
|
+
# KEEP/ADD:
|
|
357
|
+
kafkastore.bootstrap.servers=kafka-01:9092,kafka-02:9092,kafka-03:9092
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
#### Kafka Connect
|
|
361
|
+
|
|
362
|
+
```properties
|
|
363
|
+
# Connect already uses bootstrap.servers, no ZK dependency
|
|
364
|
+
# Just verify configuration
|
|
365
|
+
bootstrap.servers=kafka-01:9092,kafka-02:9092,kafka-03:9092
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
#### Control Center
|
|
369
|
+
|
|
370
|
+
```properties
|
|
371
|
+
# Update control-center.properties
|
|
372
|
+
# REMOVE:
|
|
373
|
+
# zookeeper.connect=...
|
|
374
|
+
|
|
375
|
+
# KEEP:
|
|
376
|
+
bootstrap.servers=kafka-01:9092,kafka-02:9092,kafka-03:9092
|
|
377
|
+
|
|
378
|
+
# Restart Control Center
|
|
379
|
+
sudo systemctl restart confluent-control-center
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
### Update Monitoring
|
|
383
|
+
|
|
384
|
+
```yaml
|
|
385
|
+
# Prometheus targets - remove ZK metrics, add controller metrics
|
|
386
|
+
- job_name: "kafka-controllers"
|
|
387
|
+
static_configs:
|
|
388
|
+
- targets:
|
|
389
|
+
["controller-01:9999", "controller-02:9999", "controller-03:9999"]
|
|
390
|
+
|
|
391
|
+
# Remove:
|
|
392
|
+
- job_name: "zookeeper"
|
|
393
|
+
static_configs:
|
|
394
|
+
- targets: ["zk1:7000", "zk2:7000", "zk3:7000"]
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
### Update Ansible Inventory
|
|
398
|
+
|
|
399
|
+
```ini
|
|
400
|
+
# REMOVE:
|
|
401
|
+
# [zookeeper]
|
|
402
|
+
# zk1 ansible_host=...
|
|
403
|
+
|
|
404
|
+
# ADD:
|
|
405
|
+
[controllers]
|
|
406
|
+
controller-01 ansible_host=10.0.1.1 node_id=1
|
|
407
|
+
controller-02 ansible_host=10.0.1.2 node_id=2
|
|
408
|
+
controller-03 ansible_host=10.0.1.3 node_id=3
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
---
|
|
412
|
+
|
|
413
|
+
## Troubleshooting
|
|
414
|
+
|
|
415
|
+
### Common Issues
|
|
416
|
+
|
|
417
|
+
| Issue | Diagnosis | Solution |
|
|
418
|
+
| ----------------------------- | -------------------------- | ------------------------------------------ |
|
|
419
|
+
| Controllers won't form quorum | Network or config mismatch | Verify voter config matches on all nodes |
|
|
420
|
+
| Brokers not registering | Missing migration config | Add `zookeeper.metadata.migration.enable` |
|
|
421
|
+
| Metadata mismatch | Dual-write failed | Check controller logs, restart controllers |
|
|
422
|
+
| Finalization fails | Not all brokers in KRaft | Verify all brokers have restarted |
|
|
423
|
+
| Client connection failures | Listener config issues | Verify `advertised.listeners` settings |
|
|
424
|
+
|
|
425
|
+
### Debug Commands
|
|
426
|
+
|
|
427
|
+
```bash
|
|
428
|
+
# Controller logs
|
|
429
|
+
tail -100 /var/log/confluent/kafka/controller.log
|
|
430
|
+
|
|
431
|
+
# Check controller state
|
|
432
|
+
/opt/confluent/bin/kafka-metadata --snapshot /var/kafka-controller-data/__cluster_metadata-0/00000000000000000000.log \
|
|
433
|
+
--command describe
|
|
434
|
+
|
|
435
|
+
# Verify quorum health
|
|
436
|
+
/opt/confluent/bin/kafka-metadata --snapshot /var/kafka-controller-data/__cluster_metadata-0/00000000000000000000.log \
|
|
437
|
+
--command quorum
|
|
438
|
+
|
|
439
|
+
# Check migration state
|
|
440
|
+
grep -i "migration" /var/log/confluent/kafka/controller.log | tail -20
|
|
441
|
+
|
|
442
|
+
# Verify broker registration
|
|
443
|
+
/opt/confluent/bin/kafka-metadata --snapshot /var/kafka-controller-data/__cluster_metadata-0/00000000000000000000.log \
|
|
444
|
+
--command broker
|
|
445
|
+
|
|
446
|
+
# Check metadata log size (should be reasonable, < 1GB typically)
|
|
447
|
+
du -sh /var/kafka-controller-data/__cluster_metadata-0/
|
|
448
|
+
```
|
|
449
|
+
|
|
450
|
+
### Rollback (Pre-Finalization Only)
|
|
451
|
+
|
|
452
|
+
If migration fails BEFORE finalization:
|
|
453
|
+
|
|
454
|
+
```bash
|
|
455
|
+
# 1. Stop controllers
|
|
456
|
+
sudo systemctl stop confluent-kafka-controller
|
|
457
|
+
|
|
458
|
+
# 2. Remove migration config from brokers
|
|
459
|
+
sed -i '/controller.quorum.voters/d' /opt/confluent/etc/kafka/server.properties
|
|
460
|
+
sed -i '/zookeeper.metadata.migration.enable/d' /opt/confluent/etc/kafka/server.properties
|
|
461
|
+
|
|
462
|
+
# 3. Rolling restart brokers
|
|
463
|
+
for broker in kafka-01 kafka-02 kafka-03; do
|
|
464
|
+
ssh $broker "sudo systemctl restart confluent-server"
|
|
465
|
+
sleep 60
|
|
466
|
+
done
|
|
467
|
+
|
|
468
|
+
# 4. Verify brokers reconnected to ZK
|
|
469
|
+
/opt/confluent/bin/zookeeper-shell localhost:2181 <<< "ls /brokers/ids"
|
|
470
|
+
```
|
|
471
|
+
|
|
472
|
+
### Post-Finalization Rollback
|
|
473
|
+
|
|
474
|
+
**⚠️ Requires full restore from backup - contact Confluent Support**
|
|
475
|
+
|
|
476
|
+
```bash
|
|
477
|
+
# 1. Stop all Kafka services
|
|
478
|
+
for host in kafka-01 kafka-02 kafka-03; do
|
|
479
|
+
ssh $host "sudo systemctl stop confluent-server"
|
|
480
|
+
done
|
|
481
|
+
|
|
482
|
+
# 2. Restore ZooKeeper
|
|
483
|
+
tar -xzf /backup/zookeeper-pre-migration.tar.gz -C /
|
|
484
|
+
|
|
485
|
+
# 3. Restore Kafka configs and data
|
|
486
|
+
tar -xzf /backup/kafka-pre-migration.tar.gz -C /
|
|
487
|
+
|
|
488
|
+
# 4. Start ZooKeeper, then Kafka
|
|
489
|
+
# (Follow standard disaster recovery procedures)
|
|
490
|
+
```
|