@techwavedev/agi-agent-kit 1.1.7 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of @techwavedev/agi-agent-kit might be problematic. Click here for more details.

Files changed (111) hide show
  1. package/CHANGELOG.md +82 -1
  2. package/README.md +190 -12
  3. package/bin/init.js +30 -2
  4. package/package.json +6 -3
  5. package/templates/base/AGENTS.md +54 -23
  6. package/templates/base/README.md +325 -0
  7. package/templates/base/directives/memory_integration.md +95 -0
  8. package/templates/base/execution/memory_manager.py +309 -0
  9. package/templates/base/execution/session_boot.py +218 -0
  10. package/templates/base/execution/session_init.py +320 -0
  11. package/templates/base/skill-creator/SKILL_skillcreator.md +23 -36
  12. package/templates/base/skill-creator/scripts/init_skill.py +18 -135
  13. package/templates/skills/ec/README.md +31 -0
  14. package/templates/skills/ec/aws/SKILL.md +1020 -0
  15. package/templates/skills/ec/aws/defaults.yaml +13 -0
  16. package/templates/skills/ec/aws/references/common_patterns.md +80 -0
  17. package/templates/skills/ec/aws/references/mcp_servers.md +98 -0
  18. package/templates/skills/ec/aws-terraform/SKILL.md +349 -0
  19. package/templates/skills/ec/aws-terraform/references/best_practices.md +394 -0
  20. package/templates/skills/ec/aws-terraform/references/checkov_reference.md +337 -0
  21. package/templates/skills/ec/aws-terraform/scripts/configure_mcp.py +150 -0
  22. package/templates/skills/ec/confluent-kafka/SKILL.md +655 -0
  23. package/templates/skills/ec/confluent-kafka/references/ansible_playbooks.md +792 -0
  24. package/templates/skills/ec/confluent-kafka/references/ec_deployment.md +579 -0
  25. package/templates/skills/ec/confluent-kafka/references/kraft_migration.md +490 -0
  26. package/templates/skills/ec/confluent-kafka/references/troubleshooting.md +778 -0
  27. package/templates/skills/ec/confluent-kafka/references/upgrade_7x_to_8x.md +488 -0
  28. package/templates/skills/ec/confluent-kafka/scripts/kafka_health_check.py +435 -0
  29. package/templates/skills/ec/confluent-kafka/scripts/upgrade_preflight.py +568 -0
  30. package/templates/skills/ec/confluent-kafka/scripts/validate_config.py +455 -0
  31. package/templates/skills/ec/consul/SKILL.md +427 -0
  32. package/templates/skills/ec/consul/references/acl_setup.md +168 -0
  33. package/templates/skills/ec/consul/references/ha_config.md +196 -0
  34. package/templates/skills/ec/consul/references/troubleshooting.md +267 -0
  35. package/templates/skills/ec/consul/references/upgrades.md +213 -0
  36. package/templates/skills/ec/consul/scripts/consul_health_report.py +530 -0
  37. package/templates/skills/ec/consul/scripts/consul_status.py +264 -0
  38. package/templates/skills/ec/consul/scripts/generate_values.py +170 -0
  39. package/templates/skills/ec/documentation/SKILL.md +351 -0
  40. package/templates/skills/ec/documentation/references/best_practices.md +201 -0
  41. package/templates/skills/ec/documentation/scripts/analyze_code.py +307 -0
  42. package/templates/skills/ec/documentation/scripts/detect_changes.py +460 -0
  43. package/templates/skills/ec/documentation/scripts/generate_changelog.py +312 -0
  44. package/templates/skills/ec/documentation/scripts/sync_docs.py +272 -0
  45. package/templates/skills/ec/documentation/scripts/update_skill_docs.py +366 -0
  46. package/templates/skills/ec/gitlab/SKILL.md +529 -0
  47. package/templates/skills/ec/gitlab/references/agent_installation.md +416 -0
  48. package/templates/skills/ec/gitlab/references/api_reference.md +508 -0
  49. package/templates/skills/ec/gitlab/references/gitops_flux.md +465 -0
  50. package/templates/skills/ec/gitlab/references/troubleshooting.md +518 -0
  51. package/templates/skills/ec/gitlab/scripts/generate_agent_values.py +329 -0
  52. package/templates/skills/ec/gitlab/scripts/gitlab_agent_status.py +414 -0
  53. package/templates/skills/ec/jira/SKILL.md +484 -0
  54. package/templates/skills/ec/jira/references/jql_reference.md +148 -0
  55. package/templates/skills/ec/jira/scripts/add_comment.py +91 -0
  56. package/templates/skills/ec/jira/scripts/bulk_log_work.py +124 -0
  57. package/templates/skills/ec/jira/scripts/create_ticket.py +162 -0
  58. package/templates/skills/ec/jira/scripts/get_ticket.py +191 -0
  59. package/templates/skills/ec/jira/scripts/jira_client.py +383 -0
  60. package/templates/skills/ec/jira/scripts/log_work.py +154 -0
  61. package/templates/skills/ec/jira/scripts/search_tickets.py +104 -0
  62. package/templates/skills/ec/jira/scripts/update_comment.py +67 -0
  63. package/templates/skills/ec/jira/scripts/update_ticket.py +161 -0
  64. package/templates/skills/ec/karpenter/SKILL.md +301 -0
  65. package/templates/skills/ec/karpenter/references/ec2nodeclasses.md +421 -0
  66. package/templates/skills/ec/karpenter/references/migration.md +396 -0
  67. package/templates/skills/ec/karpenter/references/nodepools.md +400 -0
  68. package/templates/skills/ec/karpenter/references/troubleshooting.md +359 -0
  69. package/templates/skills/ec/karpenter/scripts/generate_ec2nodeclass.py +187 -0
  70. package/templates/skills/ec/karpenter/scripts/generate_nodepool.py +245 -0
  71. package/templates/skills/ec/karpenter/scripts/karpenter_status.py +359 -0
  72. package/templates/skills/ec/opensearch/SKILL.md +720 -0
  73. package/templates/skills/ec/opensearch/references/ml_neural_search.md +576 -0
  74. package/templates/skills/ec/opensearch/references/operator.md +532 -0
  75. package/templates/skills/ec/opensearch/references/query_dsl.md +532 -0
  76. package/templates/skills/ec/opensearch/scripts/configure_mcp.py +148 -0
  77. package/templates/skills/ec/victoriametrics/SKILL.md +598 -0
  78. package/templates/skills/ec/victoriametrics/references/kubernetes.md +531 -0
  79. package/templates/skills/ec/victoriametrics/references/prometheus_migration.md +333 -0
  80. package/templates/skills/ec/victoriametrics/references/troubleshooting.md +442 -0
  81. package/templates/skills/knowledge/SKILLS_CATALOG.md +274 -4
  82. package/templates/skills/knowledge/intelligent-routing/SKILL.md +237 -164
  83. package/templates/skills/knowledge/parallel-agents/SKILL.md +345 -73
  84. package/templates/skills/knowledge/plugin-discovery/SKILL.md +582 -0
  85. package/templates/skills/knowledge/plugin-discovery/scripts/platform_setup.py +1083 -0
  86. package/templates/skills/knowledge/design-md/README.md +0 -34
  87. package/templates/skills/knowledge/design-md/SKILL.md +0 -193
  88. package/templates/skills/knowledge/design-md/examples/DESIGN.md +0 -154
  89. package/templates/skills/knowledge/notebooklm-mcp/SKILL.md +0 -71
  90. package/templates/skills/knowledge/notebooklm-mcp/assets/example_asset.txt +0 -24
  91. package/templates/skills/knowledge/notebooklm-mcp/references/api_reference.md +0 -34
  92. package/templates/skills/knowledge/notebooklm-mcp/scripts/example.py +0 -19
  93. package/templates/skills/knowledge/react-components/README.md +0 -36
  94. package/templates/skills/knowledge/react-components/SKILL.md +0 -53
  95. package/templates/skills/knowledge/react-components/examples/gold-standard-card.tsx +0 -80
  96. package/templates/skills/knowledge/react-components/package-lock.json +0 -231
  97. package/templates/skills/knowledge/react-components/package.json +0 -16
  98. package/templates/skills/knowledge/react-components/resources/architecture-checklist.md +0 -15
  99. package/templates/skills/knowledge/react-components/resources/component-template.tsx +0 -37
  100. package/templates/skills/knowledge/react-components/resources/stitch-api-reference.md +0 -14
  101. package/templates/skills/knowledge/react-components/resources/style-guide.json +0 -27
  102. package/templates/skills/knowledge/react-components/scripts/fetch-stitch.sh +0 -30
  103. package/templates/skills/knowledge/react-components/scripts/validate.js +0 -68
  104. package/templates/skills/knowledge/self-update/SKILL.md +0 -60
  105. package/templates/skills/knowledge/self-update/scripts/update_kit.py +0 -103
  106. package/templates/skills/knowledge/stitch-loop/README.md +0 -54
  107. package/templates/skills/knowledge/stitch-loop/SKILL.md +0 -235
  108. package/templates/skills/knowledge/stitch-loop/examples/SITE.md +0 -73
  109. package/templates/skills/knowledge/stitch-loop/examples/next-prompt.md +0 -25
  110. package/templates/skills/knowledge/stitch-loop/resources/baton-schema.md +0 -61
  111. package/templates/skills/knowledge/stitch-loop/resources/site-template.md +0 -104
@@ -0,0 +1,792 @@
1
+ # Confluent Kafka Ansible Playbooks
2
+
3
+ Ansible automation patterns for Confluent Kafka tarball installations.
4
+
5
+ ---
6
+
7
+ ## ⚠️ EC Environment Notes
8
+
9
+ > **This guide shows generic Ansible patterns.** For EC-specific deployments:
10
+
11
+ | Standard Pattern | EC Pattern |
12
+ | ------------------------------------- | -------------------------------------------------------- |
13
+ | Root systemd (`/etc/systemd/system/`) | User systemd (`~/.config/systemd/user/`) |
14
+ | `ansible_become: yes` | `ansible_become: false` |
15
+ | `systemctl start` | `systemctl --user start` |
16
+ | `/opt/confluent/` | `{{ base_path }}/opt/confluent-{{ confluent_version }}/` |
17
+ | `/var/kafka-logs/` | `{{ base_path }}/opt/data` |
18
+ | `/var/ssl/kafka/` | `{{ base_path }}/opt/ssl/` |
19
+
20
+ **EC Ansible Base:** `{{ ansible_base }}/` (e.g., `/ec/local/kafka/ansible/`)
21
+
22
+ **Key EC Constraints:**
23
+
24
+ - No root access (`ansible_become: false`)
25
+ - User-scope systemd services (`scope: user`)
26
+ - HashiCorp Vault for secrets
27
+ - SSL-only (no SASL/RBAC)
28
+
29
+ See **[ec_deployment.md](ec_deployment.md)** for complete EC Ansible setup and deployment commands.
30
+
31
+ ---
32
+
33
+ ## Table of Contents
34
+
35
+ 1. [Directory Structure](#directory-structure)
36
+ 2. [Inventory Configuration](#inventory-configuration)
37
+ 3. [Common Variables](#common-variables)
38
+ 4. [Playbooks](#playbooks)
39
+ 5. [Roles](#roles)
40
+ 6. [Usage Examples](#usage-examples)
41
+
42
+ ---
43
+
44
+ ## Directory Structure
45
+
46
+ ```
47
+ ansible/
48
+ ├── inventory/
49
+ │ ├── production/
50
+ │ │ ├── hosts.ini
51
+ │ │ └── group_vars/
52
+ │ │ ├── all.yml
53
+ │ │ ├── controllers.yml
54
+ │ │ ├── brokers.yml
55
+ │ │ └── schema_registry.yml
56
+ │ └── staging/
57
+ │ └── ...
58
+ ├── playbooks/
59
+ │ ├── install.yml
60
+ │ ├── upgrade.yml
61
+ │ ├── rolling_restart.yml
62
+ │ ├── health_check.yml
63
+ │ ├── backup.yml
64
+ │ └── kraft_migration.yml
65
+ ├── roles/
66
+ │ ├── confluent-common/
67
+ │ ├── confluent-controller/
68
+ │ ├── confluent-broker/
69
+ │ ├── confluent-schema-registry/
70
+ │ ├── confluent-connect/
71
+ │ └── confluent-control-center/
72
+ └── files/
73
+ ├── confluent-8.0.0.tar.gz
74
+ └── ssl/
75
+ ```
76
+
77
+ ---
78
+
79
+ ## Inventory Configuration
80
+
81
+ ### Production Inventory
82
+
83
+ ```ini
84
+ # inventory/production/hosts.ini
85
+
86
+ [controllers]
87
+ kafka-controller-01 ansible_host=10.0.1.11 node_id=1
88
+ kafka-controller-02 ansible_host=10.0.1.12 node_id=2
89
+ kafka-controller-03 ansible_host=10.0.1.13 node_id=3
90
+
91
+ [brokers]
92
+ kafka-broker-01 ansible_host=10.0.2.11 node_id=101
93
+ kafka-broker-02 ansible_host=10.0.2.12 node_id=102
94
+ kafka-broker-03 ansible_host=10.0.2.13 node_id=103
95
+ kafka-broker-04 ansible_host=10.0.2.14 node_id=104
96
+ kafka-broker-05 ansible_host=10.0.2.15 node_id=105
97
+
98
+ [schema_registry]
99
+ kafka-sr-01 ansible_host=10.0.3.11
100
+ kafka-sr-02 ansible_host=10.0.3.12
101
+
102
+ [connect]
103
+ kafka-connect-01 ansible_host=10.0.4.11
104
+ kafka-connect-02 ansible_host=10.0.4.12
105
+ kafka-connect-03 ansible_host=10.0.4.13
106
+
107
+ [control_center]
108
+ kafka-cc-01 ansible_host=10.0.5.11
109
+
110
+ [confluent:children]
111
+ controllers
112
+ brokers
113
+ schema_registry
114
+ connect
115
+ control_center
116
+
117
+ [confluent:vars]
118
+ ansible_user=kafka
119
+ ansible_become=yes
120
+ ansible_python_interpreter=/usr/bin/python3
121
+ ```
122
+
123
+ ### Group Variables
124
+
125
+ ```yaml
126
+ # inventory/production/group_vars/all.yml
127
+
128
+ # Confluent Platform version
129
+ confluent_version: "8.0.0"
130
+ confluent_install_base: "/opt"
131
+ confluent_install_path: "{{ confluent_install_base }}/confluent-{{ confluent_version }}"
132
+ confluent_symlink: "{{ confluent_install_base }}/confluent"
133
+
134
+ # Java configuration
135
+ java_home: "/opt/amazon-corretto-17"
136
+ kafka_heap_opts: "-Xms6g -Xmx6g"
137
+
138
+ # Cluster configuration
139
+ cluster_id: "MkU3OThlYzExNjdmNGIyMG" # Generated once: kafka-storage random-uuid
140
+ kafka_kraft_enabled: true
141
+
142
+ # Controller quorum
143
+ controller_quorum_voters: >-
144
+ 1@kafka-controller-01:9093,2@kafka-controller-02:9093,3@kafka-controller-03:9093
145
+
146
+ # Listeners
147
+ kafka_listener_internal_port: 9092
148
+ kafka_listener_external_port: 9094
149
+ kafka_controller_port: 9093
150
+
151
+ # Security
152
+ kafka_security_protocol: "SASL_SSL"
153
+ kafka_sasl_mechanism: "PLAIN"
154
+ ssl_keystore_path: "/var/ssl/kafka/kafka.keystore.jks"
155
+ ssl_truststore_path: "/var/ssl/kafka/kafka.truststore.jks"
156
+
157
+ # Data directories
158
+ kafka_log_dirs: "/var/kafka-logs"
159
+ controller_data_dir: "/var/kafka-controller"
160
+
161
+ # Logging
162
+ kafka_log_path: "/var/log/confluent/kafka"
163
+ ```
164
+
165
+ ```yaml
166
+ # inventory/production/group_vars/brokers.yml
167
+
168
+ # Broker-specific settings
169
+ kafka_broker_heap_opts: "-Xms8g -Xmx8g"
170
+ kafka_num_partitions: 12
171
+ kafka_default_replication_factor: 3
172
+ kafka_min_insync_replicas: 2
173
+
174
+ # Performance tuning
175
+ kafka_num_network_threads: 8
176
+ kafka_num_io_threads: 16
177
+ kafka_socket_send_buffer_bytes: 102400
178
+ kafka_socket_receive_buffer_bytes: 102400
179
+
180
+ # Retention
181
+ kafka_log_retention_hours: 168
182
+ kafka_log_segment_bytes: 1073741824
183
+ ```
184
+
185
+ ---
186
+
187
+ ## Common Variables
188
+
189
+ ```yaml
190
+ # inventory/production/group_vars/controllers.yml
191
+
192
+ # Controller-specific settings
193
+ controller_heap_opts: "-Xms4g -Xmx4g"
194
+ controller_log_dirs: "{{ controller_data_dir }}"
195
+ ```
196
+
197
+ ---
198
+
199
+ ## Playbooks
200
+
201
+ ### Installation Playbook
202
+
203
+ ```yaml
204
+ # playbooks/install.yml
205
+ ---
206
+ - name: Install Confluent Platform
207
+ hosts: confluent
208
+ become: yes
209
+ vars:
210
+ tarball_path: "files/confluent-{{ confluent_version }}.tar.gz"
211
+
212
+ tasks:
213
+ - name: Create kafka user
214
+ ansible.builtin.user:
215
+ name: kafka
216
+ shell: /bin/bash
217
+ system: yes
218
+ create_home: yes
219
+
220
+ - name: Create directories
221
+ ansible.builtin.file:
222
+ path: "{{ item }}"
223
+ state: directory
224
+ owner: kafka
225
+ group: kafka
226
+ mode: "0755"
227
+ loop:
228
+ - "{{ confluent_install_base }}"
229
+ - "{{ kafka_log_dirs }}"
230
+ - "{{ kafka_log_path }}"
231
+ - /var/ssl/kafka
232
+
233
+ - name: Extract Confluent Platform
234
+ ansible.builtin.unarchive:
235
+ src: "{{ tarball_path }}"
236
+ dest: "{{ confluent_install_base }}"
237
+ owner: kafka
238
+ group: kafka
239
+ creates: "{{ confluent_install_path }}"
240
+
241
+ - name: Create symlink
242
+ ansible.builtin.file:
243
+ src: "{{ confluent_install_path }}"
244
+ dest: "{{ confluent_symlink }}"
245
+ state: link
246
+ owner: kafka
247
+ group: kafka
248
+
249
+ - name: Install Java
250
+ ansible.builtin.include_role:
251
+ name: confluent-common
252
+ tasks_from: install_java
253
+
254
+ - name: Configure Controllers
255
+ hosts: controllers
256
+ become: yes
257
+ roles:
258
+ - confluent-controller
259
+
260
+ - name: Configure Brokers
261
+ hosts: brokers
262
+ become: yes
263
+ roles:
264
+ - confluent-broker
265
+
266
+ - name: Configure Schema Registry
267
+ hosts: schema_registry
268
+ become: yes
269
+ roles:
270
+ - confluent-schema-registry
271
+
272
+ - name: Configure Connect
273
+ hosts: connect
274
+ become: yes
275
+ roles:
276
+ - confluent-connect
277
+
278
+ - name: Configure Control Center
279
+ hosts: control_center
280
+ become: yes
281
+ roles:
282
+ - confluent-control-center
283
+ ```
284
+
285
+ ### Rolling Upgrade Playbook
286
+
287
+ ```yaml
288
+ # playbooks/upgrade.yml
289
+ ---
290
+ - name: Pre-upgrade validation
291
+ hosts: brokers[0]
292
+ become: yes
293
+ tasks:
294
+ - name: Check for under-replicated partitions
295
+ ansible.builtin.command: >
296
+ {{ confluent_symlink }}/bin/kafka-topics
297
+ --bootstrap-server localhost:{{ kafka_listener_internal_port }}
298
+ --describe --under-replicated-partitions
299
+ register: urp_check
300
+ changed_when: false
301
+
302
+ - name: Fail if partitions are under-replicated
303
+ ansible.builtin.fail:
304
+ msg: "Under-replicated partitions detected. Aborting upgrade."
305
+ when: urp_check.stdout | length > 0
306
+
307
+ - name: Upgrade Schema Registry (rolling)
308
+ hosts: schema_registry
309
+ become: yes
310
+ serial: 1
311
+ tasks:
312
+ - name: Stop Schema Registry
313
+ ansible.builtin.systemd:
314
+ name: confluent-schema-registry
315
+ state: stopped
316
+
317
+ - name: Update symlink
318
+ ansible.builtin.file:
319
+ src: "{{ confluent_install_path }}"
320
+ dest: "{{ confluent_symlink }}"
321
+ state: link
322
+ force: yes
323
+
324
+ - name: Start Schema Registry
325
+ ansible.builtin.systemd:
326
+ name: confluent-schema-registry
327
+ state: started
328
+
329
+ - name: Wait for Schema Registry health
330
+ ansible.builtin.uri:
331
+ url: "http://localhost:8081/"
332
+ status_code: 200
333
+ register: sr_health
334
+ until: sr_health.status == 200
335
+ retries: 30
336
+ delay: 10
337
+
338
+ - name: Upgrade Kafka Brokers (rolling)
339
+ hosts: brokers
340
+ become: yes
341
+ serial: 1
342
+ max_fail_percentage: 0
343
+ tasks:
344
+ - name: Get broker ID
345
+ ansible.builtin.command: >
346
+ grep broker.id {{ confluent_symlink }}/etc/kafka/server.properties
347
+ register: broker_id_line
348
+ changed_when: false
349
+
350
+ - name: Extract broker ID
351
+ ansible.builtin.set_fact:
352
+ broker_id: "{{ broker_id_line.stdout.split('=')[1] }}"
353
+
354
+ - name: Initiate controlled shutdown
355
+ ansible.builtin.systemd:
356
+ name: confluent-server
357
+ state: stopped
358
+
359
+ - name: Wait for partition leadership migration
360
+ ansible.builtin.pause:
361
+ seconds: 60
362
+
363
+ - name: Verify broker is stopped
364
+ ansible.builtin.wait_for:
365
+ port: "{{ kafka_listener_internal_port }}"
366
+ state: stopped
367
+ timeout: 120
368
+
369
+ - name: Backup current configuration
370
+ ansible.builtin.archive:
371
+ path: "{{ confluent_symlink }}/etc/kafka"
372
+ dest: "/backup/kafka-config-{{ ansible_date_time.epoch }}.tar.gz"
373
+
374
+ - name: Update symlink to new version
375
+ ansible.builtin.file:
376
+ src: "{{ confluent_install_path }}"
377
+ dest: "{{ confluent_symlink }}"
378
+ state: link
379
+ force: yes
380
+
381
+ - name: Restore configuration
382
+ ansible.builtin.copy:
383
+ src: "/backup/server.properties"
384
+ dest: "{{ confluent_symlink }}/etc/kafka/server.properties"
385
+ remote_src: yes
386
+
387
+ - name: Remove deprecated configs
388
+ ansible.builtin.lineinfile:
389
+ path: "{{ confluent_symlink }}/etc/kafka/server.properties"
390
+ regexp: "{{ item }}"
391
+ state: absent
392
+ loop:
393
+ - "^log.message.format.version"
394
+ - "^inter.broker.protocol.version"
395
+
396
+ - name: Start Kafka broker
397
+ ansible.builtin.systemd:
398
+ name: confluent-server
399
+ state: started
400
+
401
+ - name: Wait for broker to join cluster
402
+ ansible.builtin.command: >
403
+ {{ confluent_symlink }}/bin/kafka-broker-api-versions
404
+ --bootstrap-server localhost:{{ kafka_listener_internal_port }}
405
+ register: broker_check
406
+ until: broker_check.rc == 0
407
+ retries: 30
408
+ delay: 10
409
+
410
+ - name: Wait for ISR sync
411
+ ansible.builtin.command: >
412
+ {{ confluent_symlink }}/bin/kafka-topics
413
+ --bootstrap-server localhost:{{ kafka_listener_internal_port }}
414
+ --describe --under-replicated-partitions
415
+ register: isr_check
416
+ until: isr_check.stdout | length == 0
417
+ retries: 60
418
+ delay: 10
419
+ changed_when: false
420
+ ```
421
+
422
+ ### Rolling Restart Playbook
423
+
424
+ ```yaml
425
+ # playbooks/rolling_restart.yml
426
+ ---
427
+ - name: Rolling restart Kafka brokers
428
+ hosts: brokers
429
+ become: yes
430
+ serial: 1
431
+ max_fail_percentage: 0
432
+
433
+ tasks:
434
+ - name: Check cluster health before restart
435
+ ansible.builtin.command: >
436
+ {{ confluent_symlink }}/bin/kafka-topics
437
+ --bootstrap-server localhost:{{ kafka_listener_internal_port }}
438
+ --describe --under-replicated-partitions
439
+ register: pre_check
440
+ changed_when: false
441
+ delegate_to: "{{ groups['brokers'][0] }}"
442
+ run_once: true
443
+
444
+ - name: Stop broker
445
+ ansible.builtin.systemd:
446
+ name: confluent-server
447
+ state: stopped
448
+
449
+ - name: Wait for controlled shutdown
450
+ ansible.builtin.wait_for:
451
+ port: "{{ kafka_listener_internal_port }}"
452
+ state: stopped
453
+ timeout: 300
454
+
455
+ - name: Start broker
456
+ ansible.builtin.systemd:
457
+ name: confluent-server
458
+ state: started
459
+
460
+ - name: Wait for broker health
461
+ ansible.builtin.wait_for:
462
+ port: "{{ kafka_listener_internal_port }}"
463
+ state: started
464
+ timeout: 120
465
+
466
+ - name: Wait for ISR sync
467
+ ansible.builtin.command: >
468
+ {{ confluent_symlink }}/bin/kafka-topics
469
+ --bootstrap-server localhost:{{ kafka_listener_internal_port }}
470
+ --describe --under-replicated-partitions
471
+ register: isr_check
472
+ until: isr_check.stdout | length == 0
473
+ retries: 60
474
+ delay: 10
475
+ changed_when: false
476
+ ```
477
+
478
+ ### Health Check Playbook
479
+
480
+ ```yaml
481
+ # playbooks/health_check.yml
482
+ ---
483
+ - name: Kafka Cluster Health Check
484
+ hosts: brokers[0]
485
+ become: yes
486
+ gather_facts: no
487
+
488
+ tasks:
489
+ - name: Check broker count
490
+ ansible.builtin.command: >
491
+ {{ confluent_symlink }}/bin/kafka-metadata
492
+ --snapshot {{ kafka_log_dirs }}/__cluster_metadata-0/00000000000000000000.log
493
+ --command broker
494
+ register: broker_count
495
+ changed_when: false
496
+
497
+ - name: Display broker count
498
+ ansible.builtin.debug:
499
+ msg: "Active brokers: {{ broker_count.stdout_lines | length }}"
500
+
501
+ - name: Check controller quorum
502
+ ansible.builtin.command: >
503
+ {{ confluent_symlink }}/bin/kafka-metadata
504
+ --snapshot {{ kafka_log_dirs }}/__cluster_metadata-0/00000000000000000000.log
505
+ --command quorum
506
+ register: quorum_status
507
+ changed_when: false
508
+
509
+ - name: Display quorum status
510
+ ansible.builtin.debug:
511
+ var: quorum_status.stdout_lines
512
+
513
+ - name: Check under-replicated partitions
514
+ ansible.builtin.command: >
515
+ {{ confluent_symlink }}/bin/kafka-topics
516
+ --bootstrap-server localhost:{{ kafka_listener_internal_port }}
517
+ --describe --under-replicated-partitions
518
+ register: urp
519
+ changed_when: false
520
+
521
+ - name: Display URP status
522
+ ansible.builtin.debug:
523
+ msg: "{{ 'No under-replicated partitions' if urp.stdout | length == 0 else urp.stdout }}"
524
+
525
+ - name: Check offline partitions
526
+ ansible.builtin.command: >
527
+ {{ confluent_symlink }}/bin/kafka-topics
528
+ --bootstrap-server localhost:{{ kafka_listener_internal_port }}
529
+ --describe --unavailable-partitions
530
+ register: offline
531
+ changed_when: false
532
+
533
+ - name: Alert on offline partitions
534
+ ansible.builtin.fail:
535
+ msg: "CRITICAL: Offline partitions detected: {{ offline.stdout }}"
536
+ when: offline.stdout | length > 0
537
+
538
+ - name: Check Schema Registry
539
+ hosts: schema_registry[0]
540
+ become: yes
541
+ gather_facts: no
542
+
543
+ tasks:
544
+ - name: Schema Registry health
545
+ ansible.builtin.uri:
546
+ url: http://localhost:8081/
547
+ return_content: yes
548
+ register: sr_health
549
+
550
+ - name: Display SR status
551
+ ansible.builtin.debug:
552
+ msg: "Schema Registry: {{ sr_health.json }}"
553
+
554
+ - name: Check Connect cluster
555
+ hosts: connect[0]
556
+ become: yes
557
+ gather_facts: no
558
+
559
+ tasks:
560
+ - name: Connect cluster health
561
+ ansible.builtin.uri:
562
+ url: http://localhost:8083/
563
+ return_content: yes
564
+ register: connect_health
565
+
566
+ - name: List connectors
567
+ ansible.builtin.uri:
568
+ url: http://localhost:8083/connectors
569
+ return_content: yes
570
+ register: connectors
571
+
572
+ - name: Display Connect status
573
+ ansible.builtin.debug:
574
+ msg: "Connect workers: {{ connect_health.json.kafka_cluster_id }}, Connectors: {{ connectors.json | length }}"
575
+ ```
576
+
577
+ ### Backup Playbook
578
+
579
+ ```yaml
580
+ # playbooks/backup.yml
581
+ ---
582
+ - name: Backup Confluent Kafka Configuration
583
+ hosts: confluent
584
+ become: yes
585
+ vars:
586
+ backup_base: "/backup"
587
+ backup_dir: "{{ backup_base }}/confluent-{{ ansible_date_time.date }}"
588
+
589
+ tasks:
590
+ - name: Create backup directory
591
+ ansible.builtin.file:
592
+ path: "{{ backup_dir }}"
593
+ state: directory
594
+ mode: "0755"
595
+
596
+ - name: Backup configurations
597
+ ansible.builtin.archive:
598
+ path: "{{ confluent_symlink }}/etc"
599
+ dest: "{{ backup_dir }}/{{ inventory_hostname }}-config.tar.gz"
600
+
601
+ - name: Backup SSL certificates
602
+ ansible.builtin.archive:
603
+ path: /var/ssl/kafka
604
+ dest: "{{ backup_dir }}/{{ inventory_hostname }}-ssl.tar.gz"
605
+ ignore_errors: yes
606
+
607
+ - name: Backup Kafka metadata
608
+ hosts: brokers[0]
609
+ become: yes
610
+ vars:
611
+ backup_dir: "/backup/confluent-{{ ansible_date_time.date }}"
612
+
613
+ tasks:
614
+ - name: Export topic configurations
615
+ ansible.builtin.shell: >
616
+ {{ confluent_symlink }}/bin/kafka-configs
617
+ --bootstrap-server localhost:{{ kafka_listener_internal_port }}
618
+ --entity-type topics --all --describe > {{ backup_dir }}/topic-configs.txt
619
+ changed_when: false
620
+
621
+ - name: Export ACLs
622
+ ansible.builtin.shell: >
623
+ {{ confluent_symlink }}/bin/kafka-acls
624
+ --bootstrap-server localhost:{{ kafka_listener_internal_port }}
625
+ --list > {{ backup_dir }}/acls.txt
626
+ changed_when: false
627
+
628
+ - name: Create controller snapshot
629
+ ansible.builtin.command: >
630
+ {{ confluent_symlink }}/bin/kafka-metadata
631
+ --snapshot {{ kafka_log_dirs }}/__cluster_metadata-0/00000000000000000000.log
632
+ --command snapshot > {{ backup_dir }}/metadata-snapshot.json
633
+ changed_when: false
634
+ ```
635
+
636
+ ---
637
+
638
+ ## Roles
639
+
640
+ ### Broker Role Example
641
+
642
+ ```yaml
643
+ # roles/confluent-broker/tasks/main.yml
644
+ ---
645
+ - name: Template broker configuration
646
+ ansible.builtin.template:
647
+ src: server.properties.j2
648
+ dest: "{{ confluent_symlink }}/etc/kafka/server.properties"
649
+ owner: kafka
650
+ group: kafka
651
+ mode: "0644"
652
+ notify: Restart Kafka broker
653
+
654
+ - name: Template JVM options
655
+ ansible.builtin.template:
656
+ src: jvm.config.j2
657
+ dest: "{{ confluent_symlink }}/etc/kafka/jvm.config"
658
+ owner: kafka
659
+ group: kafka
660
+ mode: "0644"
661
+ notify: Restart Kafka broker
662
+
663
+ - name: Create systemd unit
664
+ ansible.builtin.template:
665
+ src: confluent-server.service.j2
666
+ dest: /etc/systemd/system/confluent-server.service
667
+ mode: "0644"
668
+ notify:
669
+ - Reload systemd
670
+ - Restart Kafka broker
671
+
672
+ - name: Enable and start Kafka broker
673
+ ansible.builtin.systemd:
674
+ name: confluent-server
675
+ state: started
676
+ enabled: yes
677
+ daemon_reload: yes
678
+ ```
679
+
680
+ ```jinja2
681
+ {# roles/confluent-broker/templates/server.properties.j2 #}
682
+
683
+ # Broker Configuration
684
+ # Generated by Ansible - Do not edit manually
685
+
686
+ # KRaft mode settings
687
+ process.roles=broker
688
+ node.id={{ node_id }}
689
+ controller.quorum.voters={{ controller_quorum_voters }}
690
+ controller.listener.names=CONTROLLER
691
+ inter.broker.listener.name=INTERNAL
692
+
693
+ # Listeners
694
+ listeners=INTERNAL://0.0.0.0:{{ kafka_listener_internal_port }},EXTERNAL://0.0.0.0:{{ kafka_listener_external_port }}
695
+ advertised.listeners=INTERNAL://{{ ansible_fqdn }}:{{ kafka_listener_internal_port }},EXTERNAL://{{ ansible_fqdn }}:{{ kafka_listener_external_port }}
696
+ listener.security.protocol.map=INTERNAL:{{ kafka_security_protocol }},EXTERNAL:{{ kafka_security_protocol }},CONTROLLER:{{ kafka_security_protocol }}
697
+
698
+ # SASL Configuration
699
+ sasl.mechanism.inter.broker.protocol=PLAIN
700
+ sasl.enabled.mechanisms=PLAIN
701
+
702
+ # SSL Configuration
703
+ ssl.keystore.location={{ ssl_keystore_path }}
704
+ ssl.keystore.password={{ ssl_keystore_password }}
705
+ ssl.truststore.location={{ ssl_truststore_path }}
706
+ ssl.truststore.password={{ ssl_truststore_password }}
707
+
708
+ # Data directories
709
+ log.dirs={{ kafka_log_dirs }}
710
+
711
+ # Performance tuning
712
+ num.network.threads={{ kafka_num_network_threads }}
713
+ num.io.threads={{ kafka_num_io_threads }}
714
+ socket.send.buffer.bytes={{ kafka_socket_send_buffer_bytes }}
715
+ socket.receive.buffer.bytes={{ kafka_socket_receive_buffer_bytes }}
716
+ socket.request.max.bytes=104857600
717
+
718
+ # Topic defaults
719
+ num.partitions={{ kafka_num_partitions }}
720
+ default.replication.factor={{ kafka_default_replication_factor }}
721
+ min.insync.replicas={{ kafka_min_insync_replicas }}
722
+
723
+ # Log retention
724
+ log.retention.hours={{ kafka_log_retention_hours }}
725
+ log.segment.bytes={{ kafka_log_segment_bytes }}
726
+ log.retention.check.interval.ms=300000
727
+
728
+ # Replication
729
+ replica.lag.time.max.ms=30000
730
+ num.replica.fetchers=4
731
+ replica.fetch.max.bytes=1048576
732
+
733
+ # Security
734
+ authorizer.class.name=kafka.security.authorizer.AclAuthorizer
735
+ super.users=User:admin
736
+ ```
737
+
738
+ ---
739
+
740
+ ## Usage Examples
741
+
742
+ ### Full Cluster Installation
743
+
744
+ ```bash
745
+ # Deploy new cluster
746
+ ansible-playbook -i inventory/production/hosts.ini playbooks/install.yml
747
+
748
+ # Verify installation
749
+ ansible-playbook -i inventory/production/hosts.ini playbooks/health_check.yml
750
+ ```
751
+
752
+ ### Rolling Upgrade
753
+
754
+ ```bash
755
+ # Pre-upgrade backup
756
+ ansible-playbook -i inventory/production/hosts.ini playbooks/backup.yml
757
+
758
+ # Upgrade from 7.6 to 8.0
759
+ ansible-playbook -i inventory/production/hosts.ini playbooks/upgrade.yml \
760
+ -e confluent_version=8.0.0 \
761
+ -e confluent_install_path=/opt/confluent-8.0.0
762
+
763
+ # Post-upgrade validation
764
+ ansible-playbook -i inventory/production/hosts.ini playbooks/health_check.yml
765
+ ```
766
+
767
+ ### Rolling Restart
768
+
769
+ ```bash
770
+ # Restart all brokers (one at a time)
771
+ ansible-playbook -i inventory/production/hosts.ini playbooks/rolling_restart.yml
772
+
773
+ # Restart specific broker
774
+ ansible-playbook -i inventory/production/hosts.ini playbooks/rolling_restart.yml \
775
+ --limit kafka-broker-03
776
+ ```
777
+
778
+ ### Ad-hoc Commands
779
+
780
+ ```bash
781
+ # Check all broker versions
782
+ ansible brokers -i inventory/production/hosts.ini -m shell \
783
+ -a "{{ confluent_symlink }}/bin/kafka-broker-api-versions --bootstrap-server localhost:9092 --version"
784
+
785
+ # Check disk usage on all brokers
786
+ ansible brokers -i inventory/production/hosts.ini -m shell \
787
+ -a "df -h /var/kafka-logs"
788
+
789
+ # Restart Schema Registry cluster
790
+ ansible schema_registry -i inventory/production/hosts.ini -m systemd \
791
+ -a "name=confluent-schema-registry state=restarted" --become
792
+ ```