cluster-builder 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cluster-builder might be problematic. Click here for more details.
- cluster_builder/config/cluster.py +35 -7
- cluster_builder/config/postgres.py +4 -1
- cluster_builder/infrastructure/executor.py +48 -48
- cluster_builder/infrastructure/templates.py +2 -2
- cluster_builder/swarmchestrate.py +268 -48
- cluster_builder/templates/aws/main.tf +109 -46
- cluster_builder/templates/deploy_manifest.tf +43 -0
- cluster_builder/templates/edge/main.tf +98 -0
- cluster_builder/templates/ha_user_data.sh.tpl +32 -1
- cluster_builder/templates/master_user_data.sh.tpl +36 -5
- cluster_builder/templates/openstack/main.tf +218 -0
- cluster_builder/templates/openstack_provider.tf +70 -0
- cluster_builder/templates/worker_user_data.sh.tpl +33 -1
- cluster_builder/utils/hcl.py +91 -15
- cluster_builder-0.3.2.dist-info/METADATA +339 -0
- cluster_builder-0.3.2.dist-info/RECORD +25 -0
- {cluster_builder-0.3.0.dist-info → cluster_builder-0.3.2.dist-info}/WHEEL +1 -1
- cluster_builder/templates/edge/main.tf.j2 +0 -40
- cluster_builder/templates/openstack/main.tf.j2 +0 -76
- cluster_builder/templates/openstack/network_security_group.tf.j2 +0 -34
- cluster_builder-0.3.0.dist-info/METADATA +0 -264
- cluster_builder-0.3.0.dist-info/RECORD +0 -24
- {cluster_builder-0.3.0.dist-info → cluster_builder-0.3.2.dist-info}/licenses/LICENSE +0 -0
- {cluster_builder-0.3.0.dist-info → cluster_builder-0.3.2.dist-info}/top_level.txt +0 -0
|
@@ -2,10 +2,14 @@
|
|
|
2
2
|
Swarmchestrate - Main orchestration class for K3s cluster management.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import json
|
|
5
6
|
import os
|
|
6
7
|
import logging
|
|
8
|
+
from pathlib import Path
|
|
7
9
|
import shutil
|
|
10
|
+
import subprocess
|
|
8
11
|
from typing import Optional
|
|
12
|
+
import psycopg2
|
|
9
13
|
|
|
10
14
|
from dotenv import load_dotenv
|
|
11
15
|
|
|
@@ -43,6 +47,7 @@ class Swarmchestrate:
|
|
|
43
47
|
load_dotenv()
|
|
44
48
|
|
|
45
49
|
try:
|
|
50
|
+
logger.debug("Loading PostgreSQL configuration from environment...")
|
|
46
51
|
self.pg_config = PostgresConfig.from_env()
|
|
47
52
|
except ValueError as e:
|
|
48
53
|
logger.error(f"Invalid PostgreSQL configuration: {e}")
|
|
@@ -52,7 +57,7 @@ class Swarmchestrate:
|
|
|
52
57
|
self.template_manager = TemplateManager()
|
|
53
58
|
self.cluster_config = ClusterConfig(self.template_manager, output_dir)
|
|
54
59
|
|
|
55
|
-
logger.
|
|
60
|
+
logger.debug(
|
|
56
61
|
f"Initialised with template_dir={template_dir}, output_dir={output_dir}"
|
|
57
62
|
)
|
|
58
63
|
|
|
@@ -68,15 +73,6 @@ class Swarmchestrate:
|
|
|
68
73
|
"""
|
|
69
74
|
return self.cluster_config.get_cluster_output_dir(cluster_name)
|
|
70
75
|
|
|
71
|
-
def generate_random_name(self) -> str:
|
|
72
|
-
"""
|
|
73
|
-
Generate a readable random string using names-generator.
|
|
74
|
-
|
|
75
|
-
Returns:
|
|
76
|
-
A randomly generated name
|
|
77
|
-
"""
|
|
78
|
-
return self.cluster_config.generate_random_name()
|
|
79
|
-
|
|
80
76
|
def validate_configuration(self, cloud: str, config: dict) -> list:
|
|
81
77
|
"""
|
|
82
78
|
Validate a configuration against the required variables for a cloud provider.
|
|
@@ -88,18 +84,21 @@ class Swarmchestrate:
|
|
|
88
84
|
Returns:
|
|
89
85
|
List of missing required variables (empty if all required variables are present)
|
|
90
86
|
"""
|
|
87
|
+
logger.debug(f"Validating configuration for cloud={cloud}, role={config.get('k3s_role')}")
|
|
91
88
|
# Master IP validation
|
|
92
89
|
has_master_ip = "master_ip" in config and config["master_ip"]
|
|
93
90
|
role = config["k3s_role"]
|
|
94
91
|
|
|
95
92
|
# Cannot add a master node to an existing cluster
|
|
96
93
|
if has_master_ip and role == "master":
|
|
94
|
+
logger.error("Invalid configuration: master_ip specified with master role")
|
|
97
95
|
raise ValueError(
|
|
98
96
|
"Cannot add master to existing cluster (master_ip specified with master role)"
|
|
99
97
|
)
|
|
100
98
|
|
|
101
99
|
# Worker/HA nodes require a master IP
|
|
102
100
|
if not has_master_ip and role in ["worker", "ha"]:
|
|
101
|
+
logger.error(f"Invalid configuration: Role '{role}' requires master_ip to be specified")
|
|
103
102
|
raise ValueError(f"Role '{role}' requires master_ip to be specified")
|
|
104
103
|
|
|
105
104
|
required_vars = self.template_manager.get_required_variables(cloud)
|
|
@@ -111,6 +110,11 @@ class Swarmchestrate:
|
|
|
111
110
|
if "default" not in var_config and var_name not in config:
|
|
112
111
|
missing_vars.append(var_name)
|
|
113
112
|
|
|
113
|
+
if missing_vars:
|
|
114
|
+
logger.warning(f"⚠️ Missing required variables for {cloud}: {missing_vars}")
|
|
115
|
+
else:
|
|
116
|
+
logger.debug(f"All required variables provided for {cloud}")
|
|
117
|
+
|
|
114
118
|
return missing_vars
|
|
115
119
|
|
|
116
120
|
def prepare_infrastructure(
|
|
@@ -134,9 +138,11 @@ class Swarmchestrate:
|
|
|
134
138
|
RuntimeError: If file operations fail
|
|
135
139
|
"""
|
|
136
140
|
try:
|
|
141
|
+
logger.debug("Preparing infrastructure configuration...")
|
|
137
142
|
# Prepare the configuration
|
|
138
143
|
cluster_dir, prepared_config = self.cluster_config.prepare(config)
|
|
139
|
-
|
|
144
|
+
logger.debug(f"Cluster directory prepared at: {cluster_dir}")
|
|
145
|
+
|
|
140
146
|
# Validate the configuration
|
|
141
147
|
cloud = prepared_config["cloud"]
|
|
142
148
|
missing_vars = self.validate_configuration(cloud, prepared_config)
|
|
@@ -144,10 +150,14 @@ class Swarmchestrate:
|
|
|
144
150
|
raise ValueError(
|
|
145
151
|
f"Missing required variables for cloud provider '{cloud}': {', '.join(missing_vars)}"
|
|
146
152
|
)
|
|
153
|
+
logger.debug(f"Configuration validated for cloud: {cloud}")
|
|
147
154
|
|
|
148
155
|
# Create provider configuration
|
|
149
|
-
|
|
150
|
-
|
|
156
|
+
if cloud!= "edge" :
|
|
157
|
+
self.template_manager.create_provider_config(cluster_dir, cloud)
|
|
158
|
+
logger.debug(f"Created provider configuration for {cloud}")
|
|
159
|
+
else:
|
|
160
|
+
logger.debug("Skipping provider configuration for edge.")
|
|
151
161
|
|
|
152
162
|
# Create Terraform files
|
|
153
163
|
main_tf_path = os.path.join(cluster_dir, "main.tf")
|
|
@@ -162,21 +172,22 @@ class Swarmchestrate:
|
|
|
162
172
|
conn_str,
|
|
163
173
|
prepared_config["cluster_name"],
|
|
164
174
|
)
|
|
165
|
-
logger.
|
|
175
|
+
logger.debug(f"Added backend configuration to {backend_tf_path}")
|
|
166
176
|
|
|
167
177
|
# Add module block
|
|
168
178
|
target = prepared_config["resource_name"]
|
|
169
179
|
hcl.add_module_block(main_tf_path, target, prepared_config)
|
|
170
|
-
logger.
|
|
180
|
+
logger.debug(f"Added module block to {main_tf_path}")
|
|
181
|
+
logger.debug("Infrastructure preparation complete.")
|
|
171
182
|
|
|
172
183
|
return cluster_dir, prepared_config
|
|
173
184
|
|
|
174
185
|
except Exception as e:
|
|
175
|
-
error_msg = f"Failed to prepare infrastructure: {e}"
|
|
186
|
+
error_msg = f"❌ Failed to prepare infrastructure: {e}"
|
|
176
187
|
logger.error(error_msg)
|
|
177
188
|
raise RuntimeError(error_msg)
|
|
178
189
|
|
|
179
|
-
def add_node(self, config: dict[str, any], dryrun: bool = False) ->
|
|
190
|
+
def add_node(self, config: dict[str, any], dryrun: bool = False) -> dict:
|
|
180
191
|
"""
|
|
181
192
|
Add a node to an existing cluster or create a new cluster based on configuration.
|
|
182
193
|
|
|
@@ -189,14 +200,33 @@ class Swarmchestrate:
|
|
|
189
200
|
dryrun: If True, only validate the configuration without deploying
|
|
190
201
|
|
|
191
202
|
Returns:
|
|
192
|
-
The cluster name
|
|
203
|
+
The cluster name and other output values.
|
|
193
204
|
|
|
194
205
|
Raises:
|
|
195
206
|
ValueError: If required configuration is missing or invalid
|
|
196
207
|
RuntimeError: If preparation or deployment fails
|
|
197
208
|
"""
|
|
198
209
|
# Prepare the infrastructure configuration
|
|
210
|
+
|
|
199
211
|
cluster_dir, prepared_config = self.prepare_infrastructure(config)
|
|
212
|
+
role = prepared_config["k3s_role"]
|
|
213
|
+
|
|
214
|
+
# Add output blocks for the module you just added
|
|
215
|
+
module_name = prepared_config["resource_name"]
|
|
216
|
+
logger.info(f"---------- Starting deployment of {module_name} ({role}) ----------")
|
|
217
|
+
outputs_file = os.path.join(cluster_dir, "outputs.tf")
|
|
218
|
+
|
|
219
|
+
# Define common output names
|
|
220
|
+
output_names = ["cluster_name", "master_ip", "worker_ip", "ha_ip", "k3s_token", "resource_name"]
|
|
221
|
+
|
|
222
|
+
# Include additional outputs based on the cloud type
|
|
223
|
+
if "aws" in cluster_dir:
|
|
224
|
+
output_names.append("instance_status")
|
|
225
|
+
elif "openstack" in cluster_dir:
|
|
226
|
+
output_names.append("instance_power_state")
|
|
227
|
+
|
|
228
|
+
# Add output blocks
|
|
229
|
+
hcl.add_output_blocks(outputs_file, module_name, output_names)
|
|
200
230
|
|
|
201
231
|
logger.info(f"Adding node for cluster '{prepared_config['cluster_name']}'")
|
|
202
232
|
|
|
@@ -204,18 +234,54 @@ class Swarmchestrate:
|
|
|
204
234
|
try:
|
|
205
235
|
self.deploy(cluster_dir, dryrun)
|
|
206
236
|
cluster_name = prepared_config["cluster_name"]
|
|
207
|
-
|
|
237
|
+
resource_name = prepared_config["resource_name"]
|
|
208
238
|
logger.info(
|
|
209
|
-
f"Successfully added '{
|
|
239
|
+
f"✅ Successfully added '{resource_name}' for cluster '{cluster_name}'"
|
|
210
240
|
)
|
|
211
|
-
|
|
241
|
+
# Run 'tofu output -json' to get outputs
|
|
242
|
+
result = subprocess.run(
|
|
243
|
+
["tofu", "output", "-json"],
|
|
244
|
+
cwd=cluster_dir,
|
|
245
|
+
stdout=subprocess.PIPE,
|
|
246
|
+
stderr=subprocess.PIPE,
|
|
247
|
+
text=True,
|
|
248
|
+
check=True,
|
|
249
|
+
)
|
|
250
|
+
outputs = json.loads(result.stdout)
|
|
251
|
+
|
|
252
|
+
# Extract output values for all required fields
|
|
253
|
+
result_outputs = {
|
|
254
|
+
"cluster_name": outputs.get("cluster_name", {}).get("value"),
|
|
255
|
+
"master_ip": outputs.get("master_ip", {}).get("value"),
|
|
256
|
+
"k3s_token": outputs.get("k3s_token", {}).get("value"),
|
|
257
|
+
"worker_ip": outputs.get("worker_ip", {}).get("value"),
|
|
258
|
+
"ha_ip": outputs.get("ha_ip", {}).get("value"),
|
|
259
|
+
"resource_name": outputs.get("resource_name", {}).get("value")
|
|
260
|
+
}
|
|
261
|
+
# Add cloud-specific output
|
|
262
|
+
if "aws" in cluster_dir:
|
|
263
|
+
result_outputs["instance_status"] = outputs.get("instance_status", {}).get("value")
|
|
264
|
+
elif "openstack" in cluster_dir:
|
|
265
|
+
result_outputs["instance_power_state"] = outputs.get("instance_power_state", {}).get("value")
|
|
266
|
+
|
|
267
|
+
logger.info(f"----------- Deployment of {role} node successful -----------")
|
|
268
|
+
logger.debug(f"Deployment outputs: {result_outputs}")
|
|
269
|
+
|
|
270
|
+
return result_outputs
|
|
271
|
+
|
|
272
|
+
except subprocess.CalledProcessError as e:
|
|
273
|
+
error_msg = f"❌ Failed to get outputs: {e.stderr.strip()}"
|
|
274
|
+
logger.error(error_msg)
|
|
275
|
+
raise RuntimeError(error_msg)
|
|
276
|
+
|
|
212
277
|
except Exception as e:
|
|
213
|
-
error_msg = f"Failed to add node: {e}"
|
|
278
|
+
error_msg = f"❌ Failed to add node: {e}"
|
|
214
279
|
logger.error(error_msg)
|
|
215
280
|
raise RuntimeError(error_msg)
|
|
216
281
|
|
|
282
|
+
|
|
217
283
|
def remove_node(
|
|
218
|
-
self, cluster_name: str, resource_name: str, dryrun: bool = False
|
|
284
|
+
self, cluster_name: str, resource_name: str, is_edge: bool = False, dryrun: bool = False
|
|
219
285
|
) -> None:
|
|
220
286
|
"""
|
|
221
287
|
Remove a specific node from a cluster.
|
|
@@ -225,14 +291,15 @@ class Swarmchestrate:
|
|
|
225
291
|
reapplying the configuration.
|
|
226
292
|
|
|
227
293
|
Args:
|
|
228
|
-
cluster_name: Name of the cluster
|
|
229
|
-
resource_name:
|
|
230
|
-
|
|
294
|
+
cluster_name: Name of the cluster
|
|
295
|
+
resource_name: Node name in K3s and module name in main.tf / OpenTofu
|
|
296
|
+
is_edge: True if the node is pre-provisioned (edge node)
|
|
297
|
+
dryrun: If True, only simulate actions without executing
|
|
231
298
|
|
|
232
299
|
Raises:
|
|
233
300
|
RuntimeError: If node removal fails
|
|
234
301
|
"""
|
|
235
|
-
logger.info(f"Removing node '{resource_name}' from cluster '{cluster_name}'
|
|
302
|
+
logger.info(f"------------ Removing node '{resource_name}' from cluster '{cluster_name}' ------------")
|
|
236
303
|
|
|
237
304
|
# Get the directory for the specified cluster
|
|
238
305
|
cluster_dir = self.get_cluster_output_dir(cluster_name)
|
|
@@ -251,21 +318,42 @@ class Swarmchestrate:
|
|
|
251
318
|
raise RuntimeError(error_msg)
|
|
252
319
|
|
|
253
320
|
try:
|
|
254
|
-
#
|
|
321
|
+
# Destroy VM only if cloud node (optional)
|
|
322
|
+
if not is_edge:
|
|
323
|
+
tofu_resource = f"opentofu_aws_instance.{resource_name}"
|
|
324
|
+
if not dryrun:
|
|
325
|
+
CommandExecutor.run_command(
|
|
326
|
+
["tofu", "destroy", "-target", tofu_resource, "-auto-approve"],
|
|
327
|
+
cwd=cluster_dir,
|
|
328
|
+
description=f"Destroying VM for node {resource_name}",
|
|
329
|
+
)
|
|
330
|
+
else:
|
|
331
|
+
logger.info(f"Dryrun: would destroy VM for node '{resource_name}' (cloud node)")
|
|
332
|
+
|
|
333
|
+
# Remove module block from main.tf
|
|
255
334
|
hcl.remove_module_block(main_tf_path, resource_name)
|
|
256
|
-
logger.info(
|
|
257
|
-
f"Removed module block for '{resource_name}' from {main_tf_path}"
|
|
258
|
-
)
|
|
335
|
+
logger.info(f"Removed module block for '{resource_name}' from {main_tf_path}")
|
|
259
336
|
|
|
260
|
-
|
|
337
|
+
# Delete outputs.tf entirely (optional, safer for decentralized setup)
|
|
338
|
+
outputs_tf_path = os.path.join(cluster_dir, "outputs.tf")
|
|
339
|
+
if os.path.exists(outputs_tf_path):
|
|
340
|
+
os.remove(outputs_tf_path)
|
|
341
|
+
logger.info(f"Deleted outputs.tf before applying changes to remove '{resource_name}'")
|
|
261
342
|
|
|
343
|
+
# Apply OpenTofu configuration to update state
|
|
262
344
|
if not dryrun:
|
|
263
|
-
|
|
264
|
-
|
|
345
|
+
CommandExecutor.run_command(
|
|
346
|
+
["tofu", "apply", "-auto-approve"],
|
|
347
|
+
cwd=cluster_dir,
|
|
348
|
+
description=f"Applying OpenTofu configuration after removing node {resource_name}",
|
|
265
349
|
)
|
|
350
|
+
else:
|
|
351
|
+
logger.info(f"Dryrun: would apply OpenTofu configuration after removing node '{resource_name}'")
|
|
352
|
+
|
|
353
|
+
logger.info(f"✅ Node '{resource_name}' removed successfully from cluster '{cluster_name}'")
|
|
266
354
|
|
|
267
355
|
except Exception as e:
|
|
268
|
-
error_msg = f"Failed to remove node '{resource_name}' from cluster '{cluster_name}': {str(e)}"
|
|
356
|
+
error_msg = f"❌ Failed to remove node '{resource_name}' from cluster '{cluster_name}': {str(e)}"
|
|
269
357
|
logger.error(error_msg)
|
|
270
358
|
raise RuntimeError(error_msg)
|
|
271
359
|
|
|
@@ -280,27 +368,41 @@ class Swarmchestrate:
|
|
|
280
368
|
Raises:
|
|
281
369
|
RuntimeError: If OpenTofu commands fail
|
|
282
370
|
"""
|
|
283
|
-
logger.
|
|
371
|
+
logger.debug(f"Updating infrastructure in {cluster_dir}")
|
|
284
372
|
|
|
285
373
|
if not os.path.exists(cluster_dir):
|
|
286
|
-
error_msg = f"Cluster directory '{cluster_dir}' not found"
|
|
374
|
+
error_msg = f"❌ Cluster directory '{cluster_dir}' not found"
|
|
287
375
|
logger.error(error_msg)
|
|
288
376
|
raise RuntimeError(error_msg)
|
|
289
377
|
|
|
378
|
+
# Retrieve the environment variables for tofu logs
|
|
379
|
+
tf_log = os.getenv("TF_LOG", "INFO")
|
|
380
|
+
tf_log_path = os.getenv("TF_LOG_PATH", "/tmp/opentofu.log")
|
|
381
|
+
|
|
382
|
+
# Check if the environment variables are set
|
|
383
|
+
if not tf_log or not tf_log_path:
|
|
384
|
+
print("❌ Error: Missing required environment variables.")
|
|
385
|
+
exit(1)
|
|
386
|
+
|
|
387
|
+
# Prepare environment variables for subprocess
|
|
388
|
+
env_vars = os.environ.copy()
|
|
389
|
+
env_vars["TF_LOG"] = tf_log
|
|
390
|
+
env_vars["TF_LOG_PATH"] = tf_log_path
|
|
391
|
+
|
|
290
392
|
try:
|
|
291
393
|
# Initialise OpenTofu
|
|
292
394
|
init_command = ["tofu", "init"]
|
|
293
395
|
if dryrun:
|
|
294
396
|
logger.info("Dryrun: will init without backend and validate only")
|
|
295
397
|
init_command.append("-backend=false")
|
|
296
|
-
CommandExecutor.run_command(init_command, cluster_dir, "OpenTofu init")
|
|
398
|
+
CommandExecutor.run_command(init_command, cluster_dir, "OpenTofu init", env=env_vars)
|
|
297
399
|
|
|
298
400
|
# Validate the deployment
|
|
299
401
|
if dryrun:
|
|
300
402
|
CommandExecutor.run_command(
|
|
301
|
-
["tofu", "validate"], cluster_dir, "OpenTofu validate"
|
|
403
|
+
["tofu", "validate"], cluster_dir, "OpenTofu validate", env=env_vars
|
|
302
404
|
)
|
|
303
|
-
logger.info("Infrastructure successfully validated")
|
|
405
|
+
logger.info("✅ Infrastructure successfully validated")
|
|
304
406
|
return
|
|
305
407
|
|
|
306
408
|
# Plan the deployment
|
|
@@ -309,16 +411,17 @@ class Swarmchestrate:
|
|
|
309
411
|
cluster_dir,
|
|
310
412
|
"OpenTofu plan",
|
|
311
413
|
timeout=30,
|
|
414
|
+
env=env_vars,
|
|
312
415
|
)
|
|
313
416
|
|
|
314
417
|
# Apply the deployment
|
|
315
418
|
CommandExecutor.run_command(
|
|
316
|
-
["tofu", "apply", "-auto-approve"], cluster_dir, "OpenTofu apply"
|
|
419
|
+
["tofu", "apply", "-auto-approve"], cluster_dir, "OpenTofu apply", env=env_vars
|
|
317
420
|
)
|
|
318
421
|
logger.info("Infrastructure successfully updated")
|
|
319
422
|
|
|
320
423
|
except RuntimeError as e:
|
|
321
|
-
error_msg = f"Failed to deploy infrastructure: {str(e)}"
|
|
424
|
+
error_msg = f"❌ Failed to deploy infrastructure: {str(e)}"
|
|
322
425
|
logger.error(error_msg)
|
|
323
426
|
raise RuntimeError(error_msg)
|
|
324
427
|
|
|
@@ -332,28 +435,29 @@ class Swarmchestrate:
|
|
|
332
435
|
Raises:
|
|
333
436
|
RuntimeError: If destruction fails
|
|
334
437
|
"""
|
|
335
|
-
logger.info(f"Destroying the
|
|
438
|
+
logger.info(f"---------- Destroying the cluster '{cluster_name}' -----------")
|
|
336
439
|
|
|
337
440
|
# Get the directory for the specified cluster
|
|
338
441
|
cluster_dir = self.get_cluster_output_dir(cluster_name)
|
|
339
442
|
|
|
340
443
|
if not os.path.exists(cluster_dir):
|
|
341
|
-
error_msg = f"Cluster directory '{cluster_dir}' not found"
|
|
444
|
+
error_msg = f"❌ Cluster directory '{cluster_dir}' not found"
|
|
342
445
|
logger.error(error_msg)
|
|
343
446
|
raise RuntimeError(error_msg)
|
|
344
447
|
|
|
345
448
|
if dryrun:
|
|
346
|
-
logger.info("Dryrun: will only delete")
|
|
449
|
+
logger.info("Dryrun: will only delete cluster")
|
|
347
450
|
shutil.rmtree(cluster_dir, ignore_errors=True)
|
|
348
451
|
return
|
|
349
452
|
|
|
350
453
|
try:
|
|
454
|
+
|
|
351
455
|
# Plan destruction
|
|
352
456
|
CommandExecutor.run_command(
|
|
353
457
|
["tofu", "plan", "-destroy", "-input=false"],
|
|
354
458
|
cluster_dir,
|
|
355
459
|
"OpenTofu plan destruction",
|
|
356
|
-
timeout=
|
|
460
|
+
timeout=40,
|
|
357
461
|
)
|
|
358
462
|
|
|
359
463
|
# Execute destruction
|
|
@@ -365,9 +469,125 @@ class Swarmchestrate:
|
|
|
365
469
|
|
|
366
470
|
# Remove the cluster directory
|
|
367
471
|
shutil.rmtree(cluster_dir, ignore_errors=True)
|
|
368
|
-
logger.info(f"Removed cluster directory: {cluster_dir}")
|
|
472
|
+
logger.info(f"✅ Removed cluster directory: {cluster_dir}")
|
|
473
|
+
|
|
474
|
+
# Remove schema and database entry from PostgreSQL
|
|
475
|
+
self.remove_cluster_schema_from_db(cluster_name)
|
|
369
476
|
|
|
370
477
|
except RuntimeError as e:
|
|
371
|
-
error_msg = f"Failed to destroy cluster '{cluster_name}': {str(e)}"
|
|
478
|
+
error_msg = f"❌ Failed to destroy cluster '{cluster_name}': {str(e)}"
|
|
372
479
|
logger.error(error_msg)
|
|
373
480
|
raise RuntimeError(error_msg)
|
|
481
|
+
|
|
482
|
+
def remove_cluster_schema_from_db(self, cluster_name: str) -> None:
|
|
483
|
+
"""
|
|
484
|
+
Removes the schema and the entry for the cluster from the PostgreSQL database.
|
|
485
|
+
|
|
486
|
+
Args:
|
|
487
|
+
cluster_name: The name of the cluster to remove from the database
|
|
488
|
+
|
|
489
|
+
Raises:
|
|
490
|
+
RuntimeError: If the database operation fails
|
|
491
|
+
"""
|
|
492
|
+
logger.info(f"Removing schema for cluster '{cluster_name}' from the PostgreSQL database...")
|
|
493
|
+
|
|
494
|
+
# Create a PostgreSQL connection string using the config
|
|
495
|
+
connection_string = self.pg_config.get_connection_string()
|
|
496
|
+
|
|
497
|
+
try:
|
|
498
|
+
# Connect to the PostgreSQL database
|
|
499
|
+
connection = psycopg2.connect(connection_string)
|
|
500
|
+
cursor = connection.cursor()
|
|
501
|
+
|
|
502
|
+
# Define the SQL query to delete the cluster schema
|
|
503
|
+
drop_schema_query = f'DROP SCHEMA IF EXISTS "{cluster_name}" CASCADE'
|
|
504
|
+
cursor.execute(drop_schema_query)
|
|
505
|
+
|
|
506
|
+
# Commit the transaction
|
|
507
|
+
connection.commit()
|
|
508
|
+
|
|
509
|
+
logger.info(f"Schema for cluster '{cluster_name}' removed from the database")
|
|
510
|
+
logger.info(f"----------- Destruction of cluster '{cluster_name}' successful -----------")
|
|
511
|
+
|
|
512
|
+
except psycopg2.Error as e:
|
|
513
|
+
logger.error(f"❌ Failed to remove schema for cluster '{cluster_name}' from the database: {e}")
|
|
514
|
+
raise RuntimeError(f" ❌Failed to remove schema for cluster '{cluster_name}' from the database")
|
|
515
|
+
|
|
516
|
+
finally:
|
|
517
|
+
# Close the database connection
|
|
518
|
+
if cursor:
|
|
519
|
+
cursor.close()
|
|
520
|
+
if connection:
|
|
521
|
+
connection.close()
|
|
522
|
+
|
|
523
|
+
def deploy_manifests(
|
|
524
|
+
self,
|
|
525
|
+
manifest_folder: str,
|
|
526
|
+
master_ip: str,
|
|
527
|
+
ssh_key_path: str,
|
|
528
|
+
ssh_user: str,
|
|
529
|
+
):
|
|
530
|
+
"""
|
|
531
|
+
Copy and apply manifests to a cluster using copy_manifest.tf in a temporaryfolder.
|
|
532
|
+
|
|
533
|
+
Args:
|
|
534
|
+
manifest_folder: Path to local manifest folder
|
|
535
|
+
master_ip: IP address of K3s master
|
|
536
|
+
ssh_key_path: Path to SSH private key
|
|
537
|
+
ssh_user: SSH username to connect to the master node
|
|
538
|
+
"""
|
|
539
|
+
# Dedicated folder for copy-manifest operations
|
|
540
|
+
copy_dir = Path(self.output_dir) / "copy-manifest"
|
|
541
|
+
copy_dir.mkdir(parents=True, exist_ok=True)
|
|
542
|
+
|
|
543
|
+
logger.debug(f"Using copy-manifest folder: {copy_dir}")
|
|
544
|
+
|
|
545
|
+
try:
|
|
546
|
+
# Copy copy_manifest.tf from templates
|
|
547
|
+
tf_source_file = Path(self.template_manager.templates_dir) / "deploy_manifest.tf"
|
|
548
|
+
if not tf_source_file.exists():
|
|
549
|
+
logger.debug(f"deploy_manifest.tf not found at: {tf_source_file}")
|
|
550
|
+
raise RuntimeError(f"deploy_manifest.tf not found at: {tf_source_file}")
|
|
551
|
+
shutil.copy(tf_source_file, copy_dir)
|
|
552
|
+
logger.debug(f"Copied copy_manifest.tf to {copy_dir}")
|
|
553
|
+
|
|
554
|
+
# Prepare environment for OpenTofu
|
|
555
|
+
env_vars = os.environ.copy()
|
|
556
|
+
env_vars["TF_LOG"] = os.getenv("TF_LOG", "INFO")
|
|
557
|
+
env_vars["TF_LOG_PATH"] = os.getenv("TF_LOG_PATH", "/tmp/opentofu.log")
|
|
558
|
+
|
|
559
|
+
logger.info(f"------------ Applying manifest on node: {master_ip} -------------------")
|
|
560
|
+
|
|
561
|
+
# Run tofu init with spinner
|
|
562
|
+
CommandExecutor.run_command(
|
|
563
|
+
["tofu", "init"],
|
|
564
|
+
cwd=str(copy_dir),
|
|
565
|
+
description="OpenTofu init",
|
|
566
|
+
env=env_vars,
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
# Run tofu apply with spinner
|
|
570
|
+
CommandExecutor.run_command(
|
|
571
|
+
[
|
|
572
|
+
"tofu",
|
|
573
|
+
"apply",
|
|
574
|
+
"-auto-approve",
|
|
575
|
+
f"-var=manifest_folder={manifest_folder}",
|
|
576
|
+
f"-var=master_ip={master_ip}",
|
|
577
|
+
f"-var=ssh_private_key_path={ssh_key_path}",
|
|
578
|
+
f"-var=ssh_user={ssh_user}"
|
|
579
|
+
],
|
|
580
|
+
cwd=str(copy_dir),
|
|
581
|
+
description="OpenTofu apply",
|
|
582
|
+
env=env_vars,
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
logger.info("------------ Successfully applied manifests -------------------")
|
|
586
|
+
|
|
587
|
+
except RuntimeError as e:
|
|
588
|
+
print(f"\n---------- ERROR ----------\n{e}\n")
|
|
589
|
+
raise
|
|
590
|
+
|
|
591
|
+
finally:
|
|
592
|
+
if copy_dir.exists():
|
|
593
|
+
shutil.rmtree(copy_dir)
|