cluster-builder 0.3.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cluster-builder might be problematic. Click here for more details.

Files changed (44) hide show
  1. cluster_builder-0.3.1/PKG-INFO +321 -0
  2. cluster_builder-0.3.1/README.md +305 -0
  3. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/cluster_builder/config/cluster.py +30 -5
  4. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/cluster_builder/config/postgres.py +4 -1
  5. cluster_builder-0.3.1/cluster_builder/infrastructure/executor.py +88 -0
  6. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/cluster_builder/infrastructure/templates.py +2 -2
  7. cluster_builder-0.3.1/cluster_builder/swarmchestrate.py +587 -0
  8. cluster_builder-0.3.1/cluster_builder/templates/aws/main.tf +156 -0
  9. cluster_builder-0.3.1/cluster_builder/templates/copy_manifest.tf +36 -0
  10. cluster_builder-0.3.1/cluster_builder/templates/edge/main.tf +98 -0
  11. cluster_builder-0.3.1/cluster_builder/templates/ha_user_data.sh.tpl +33 -0
  12. cluster_builder-0.3.1/cluster_builder/templates/master_user_data.sh.tpl +37 -0
  13. cluster_builder-0.3.1/cluster_builder/templates/openstack/main.tf +218 -0
  14. cluster_builder-0.3.1/cluster_builder/templates/openstack_provider.tf +70 -0
  15. cluster_builder-0.3.1/cluster_builder/templates/worker_user_data.sh.tpl +34 -0
  16. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/cluster_builder/utils/hcl.py +91 -15
  17. cluster_builder-0.3.1/cluster_builder.egg-info/PKG-INFO +321 -0
  18. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/cluster_builder.egg-info/SOURCES.txt +4 -3
  19. cluster_builder-0.3.1/cluster_builder.egg-info/requires.txt +6 -0
  20. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/pyproject.toml +8 -5
  21. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/tests/test_hcl.py +33 -20
  22. cluster_builder-0.3.0/PKG-INFO +0 -264
  23. cluster_builder-0.3.0/README.md +0 -250
  24. cluster_builder-0.3.0/cluster_builder/infrastructure/executor.py +0 -88
  25. cluster_builder-0.3.0/cluster_builder/swarmchestrate.py +0 -373
  26. cluster_builder-0.3.0/cluster_builder/templates/aws/main.tf +0 -93
  27. cluster_builder-0.3.0/cluster_builder/templates/edge/main.tf.j2 +0 -40
  28. cluster_builder-0.3.0/cluster_builder/templates/ha_user_data.sh.tpl +0 -2
  29. cluster_builder-0.3.0/cluster_builder/templates/master_user_data.sh.tpl +0 -6
  30. cluster_builder-0.3.0/cluster_builder/templates/openstack/main.tf.j2 +0 -76
  31. cluster_builder-0.3.0/cluster_builder/templates/openstack/network_security_group.tf.j2 +0 -34
  32. cluster_builder-0.3.0/cluster_builder/templates/worker_user_data.sh.tpl +0 -2
  33. cluster_builder-0.3.0/cluster_builder.egg-info/PKG-INFO +0 -264
  34. cluster_builder-0.3.0/cluster_builder.egg-info/requires.txt +0 -4
  35. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/LICENSE +0 -0
  36. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/cluster_builder/__init__.py +0 -0
  37. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/cluster_builder/config/__init__.py +0 -0
  38. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/cluster_builder/infrastructure/__init__.py +0 -0
  39. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/cluster_builder/templates/aws_provider.tf +0 -0
  40. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/cluster_builder/utils/__init__.py +0 -0
  41. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/cluster_builder/utils/logging.py +0 -0
  42. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/cluster_builder.egg-info/dependency_links.txt +0 -0
  43. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/cluster_builder.egg-info/top_level.txt +0 -0
  44. {cluster_builder-0.3.0 → cluster_builder-0.3.1}/setup.cfg +0 -0
@@ -0,0 +1,321 @@
1
+ Metadata-Version: 2.4
2
+ Name: cluster-builder
3
+ Version: 0.3.1
4
+ Summary: Swarmchestrate cluster builder
5
+ Author-email: Gunjan <G.Kotak@westminster.ac.uk>, Jay <J.Deslauriers@westminster.ac.uk>
6
+ License: Apache2
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: names_generator==0.2.0
10
+ Requires-Dist: python-hcl2==7.2
11
+ Requires-Dist: lark-parser==0.12.0
12
+ Requires-Dist: python-dotenv==1.1.1
13
+ Requires-Dist: psycopg2-binary==2.9.10
14
+ Requires-Dist: yaspin==3.1.0
15
+ Dynamic: license-file
16
+
17
+ # Swarmchestrate - Cluster Builder
18
+
19
+ This repository contains the codebase for **[cluster-builder]**, which builds K3s clusters for Swarmchestrate using OpenTofu.
20
+
21
+ Key features:
22
+ - **Create**: Provisions infrastructure using OpenTofu and installs K3s.
23
+ - **Add**: Add worker or HA nodes to existing clusters.
24
+ - **Remove**: Selectively remove nodes from existing clusters.
25
+ - **Delete**: Destroys the provisioned infrastructure when no longer required.
26
+
27
+ ---
28
+
29
+ ## Prerequisites
30
+
31
+ Before proceeding, ensure the following prerequisites are installed:
32
+
33
+ 1. **Git**: For cloning the repository.
34
+ 2. **Python**: Version 3.9 or higher.
35
+ 3. **pip**: Python package manager.
36
+ 4. **OpenTofu**: Version 1.6 or higher for infrastructure provisioning.
37
+ 6. **Make**: To run the provided `Makefile`.
38
+ 7. **PostgreSQL**: For storing OpenTofu state.
39
+ 8. (Optional) **Docker**: To create a dev Postgres
40
+ 9. For detailed instructions on **edge device requirements**, refer to the [Edge Device Requirements](docs/edge-requirements.md) document.
41
+
42
+ ---
43
+
44
+ ## Getting Started
45
+
46
+ ### 1. Clone the Repository
47
+
48
+ To get started, clone this repository:
49
+
50
+ ```bash
51
+ git clone https://github.com/Swarmchestrate/cluster-builder.git
52
+ ```
53
+
54
+ ### 2. Navigate to the Project Directory
55
+
56
+ ```bash
57
+ cd cluster-builder
58
+ ```
59
+
60
+ ### 3. Install Dependencies and Tools
61
+
62
+ Run the Makefile to install all necessary dependencies, including OpenTofu:
63
+
64
+ ```bash
65
+ make install
66
+ ```
67
+
68
+ This command will:
69
+ - Install Python dependencies listed in requirements.txt.
70
+ - Download and configure OpenTofu for infrastructure management.
71
+
72
+ ```bash
73
+ make db
74
+ ```
75
+
76
+ This command will:
77
+ - Spin up an empty dev Postgres DB (in Docker) for storing state
78
+
79
+ in ths makefile database details are provide you update or use that ones name pg-db -e POSTGRES_USER=admin -e POSTGRES_PASSWORD=adminpass -e POSTGRES_DB=swarmchestrate
80
+
81
+ For database setup as a service, refer to the [database setup as service](docs/database_setup.md) document
82
+
83
+ ### 4. Populate .env file with access config
84
+ The .env file is used to store environment variables required by the application. It contains configuration details for connecting to your cloud providers, the PostgreSQL database, and any other necessary resources.
85
+
86
+ #### 4.1. Rename or copy the example file to **.env**
87
+
88
+ ```bash
89
+ cp .env_example .env
90
+ ```
91
+
92
+ #### 4.2. Open the **.env** file and add the necessary configuration for your cloud providers and PostgreSQL:
93
+
94
+ ```ini
95
+ ## PG Configuration
96
+ POSTGRES_USER=postgres
97
+ POSTGRES_PASSWORD=secret
98
+ POSTGRES_HOST=db.example.com
99
+ POSTGRES_DATABASE=terraform_state
100
+ POSTGRES_SSLMODE=prefer
101
+
102
+ ## AWS Auth
103
+ TF_VAR_aws_region=us-west-2
104
+ TF_VAR_aws_access_key=AKIAXXXXXXXXXXXXXXXX
105
+ TF_VAR_aws_secret_key=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
106
+
107
+ ## OpenStack Auth - AppCreds Mode
108
+ TF_VAR_openstack_auth_method=appcreds
109
+ TF_VAR_openstack_auth_url=https://openstack.example.com:5000
110
+ TF_VAR_openstack_application_credential_id=fdXXXXXXXXXXXXXXXX
111
+ TF_VAR_openstack_application_credential_secret=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
112
+ TF_VAR_openstack_region=RegionOne
113
+
114
+ ## OpenStack Auth - User/Pass Mode
115
+ # TF_VAR_openstack_auth_method=userpass
116
+ # TF_VAR_openstack_auth_url=https://openstack.example.com:5000
117
+ # TF_VAR_openstack_region=RegionOne
118
+ # TF_VAR_openstack_user_name=myuser
119
+ # TF_VAR_openstack_password=mypassword
120
+ # TF_VAR_openstack_project_id=project-id-123
121
+ # TF_VAR_openstack_user_domain_name=Default
122
+ ```
123
+
124
+ ---
125
+
126
+ ## Basic Usage
127
+
128
+ ### Initialisation
129
+
130
+ ```python
131
+ from cluster_builder import Swarmchestrate
132
+
133
+ # Initialise the orchestrator
134
+ orchestrator = Swarmchestrate(
135
+ template_dir="/path/to/templates",
136
+ output_dir="/path/to/output"
137
+ )
138
+ ```
139
+
140
+ ### Creating a New Cluster
141
+
142
+ To create a new k3s cluster, use the **add_node** method with the **master** role:
143
+
144
+ ```python
145
+ # Configuration for a new cluster
146
+ config = {
147
+ "cloud": "aws", # Can be 'aws', 'openstack', or 'edge'
148
+ "k3s_role": "master", # Role can be 'master', 'worker', or 'ha'
149
+ "ha": False, # Set to True for high availability (HA) deployments
150
+ "instance_type": "t2.small", # AWS instance type
151
+ "ssh_key_name": "g", # SSH key name for AWS or OpenStack
152
+ "ssh_user": "ec2-user", # SSH user for the instance
153
+ "ssh_private_key_path": "/workspaces/cluster-builder/scripts/g.pem", # Path to SSH private key
154
+ "ami": "ami-0c0493bbac867d427", # AMI ID for AWS (specific to region)
155
+ "tcp_ports": [10020], # Optional list of TCP ports to open
156
+ "udp_ports": [1003] # Optional list of UDP ports to open
157
+ }
158
+
159
+ # Create the cluster (returns the cluster name)
160
+ cluster_name = orchestrator.add_node(config)
161
+ print(f"Created cluster: {cluster_name}")
162
+ ```
163
+
164
+ ### Adding Nodes to an Existing Cluster
165
+
166
+ To add worker or high-availability nodes to an existing cluster:
167
+
168
+ ```python
169
+ # Configuration for adding a worker node
170
+ worker_config = {
171
+ "cloud": "aws", # Cloud provider (can be 'aws', 'openstack', or 'edge')
172
+ "k3s_role": "worker", # Role can be 'worker' or 'ha'
173
+ "ha": False, # Set to True for high availability (HA) deployments
174
+ "instance_type": "t2.small", # AWS instance type
175
+ "ssh_key_name": "g", # SSH key name
176
+ "ssh_user": "ec2-user", # SSH user for the instance
177
+ "ssh_private_key_path": "/workspaces/cluster-builder/scripts/g.pem", # Path to SSH private key
178
+ "ami": "ami-0c0493bbac867d427", # AMI ID for AWS
179
+ # Optional parameters:
180
+ # "master_ip": "12.13.14.15", # IP address of the master node (required for worker/HA roles)
181
+ # "cluster_name": "elastic_mcnulty", # Name of the cluster
182
+ # "security_group_id": "sg-xxxxxxxxxxxxxxx", # Security group ID for AWS or OpenStack
183
+ # "tcp_ports": [80, 443], # List of TCP ports to open
184
+ # "udp_ports": [53] # List of UDP ports to open
185
+ }
186
+
187
+ # Add the worker node
188
+ cluster_name = orchestrator.add_node(worker_config)
189
+ print(f"Added worker node to cluster: {cluster_name}")
190
+ ```
191
+
192
+ ### Removing a Specific Node
193
+
194
+ To remove a specific node from a cluster:
195
+
196
+ ```python
197
+ # Remove a node by its resource name
198
+ orchestrator.remove_node(
199
+ cluster_name="your-cluster-name",
200
+ resource_name="aws_eloquent_feynman" # The resource identifier of the node
201
+ )
202
+ ```
203
+
204
+ The **remove_node** method:
205
+ 1. Destroys the node's infrastructure resources
206
+ 2. Removes the node's configuration from the cluster
207
+
208
+ ---
209
+
210
+ ### Destroying an Entire Cluster
211
+
212
+ To completely destroy a cluster and all its nodes:
213
+
214
+ ```python
215
+ # Destroy the entire cluster
216
+ orchestrator.destroy(
217
+ cluster_name="your-cluster-name"
218
+ )
219
+ ```
220
+
221
+ The **destroy** method:
222
+ 1. Destroys all infrastructure resources associated with the cluster
223
+ 2. Removes the cluster directory and configuration files
224
+
225
+ Note for **Edge Devices**:
226
+ Since the edge device is already provisioned, the `destroy` method will not remove K3s directly from the edge device. You will need to manually uninstall K3s from your edge device after the cluster is destroyed.
227
+
228
+ ---
229
+
230
+ ### Important Configuration Requirements
231
+ #### High Availability Flag (ha):
232
+
233
+ - For k3s_role="worker" or k3s_role="ha", you must specify a master_ip (the IP address of the master node).
234
+
235
+ - For k3s_role="master", you must not specify a master_ip.
236
+
237
+ - The ha flag should be set to True for high availability deployment (usually when adding a ha or worker node to an existing master).
238
+
239
+ #### SSH Credentials:
240
+
241
+ - For all roles (k3s_role="master", k3s_role="worker", k3s_role="ha"), you must specify both ssh_user and ssh_private_key_path except for edge.
242
+
243
+ - The ssh_private_key_path should be the path to your SSH private key file. Ensure that the SSH key is copied to the specified path before running the script.
244
+
245
+ - The ssh_key_name and the ssh_private_key_path are different—ensure that your SSH key is placed correctly at the provided ssh_private_key_path.
246
+
247
+ #### Ports:
248
+ You can specify custom ports for your nodes in the tcp_ports and udp_ports fields. However, certain ports are required for Kubernetes deployment (even if not specified explicitly):
249
+
250
+ **TCP Ports:**
251
+
252
+ - 2379-2380: For etcd communication
253
+ - 6443: K3s API server
254
+ - 10250: Kubelet metrics
255
+ - 51820-51821: WireGuard (for encrypted networking)
256
+ - 22: SSH access
257
+ - 80, 443: HTTP/HTTPS access
258
+ - 53: DNS (CoreDNS)
259
+ - 5432: PostgreSQL access (master node)
260
+
261
+ **UDP Ports:**
262
+
263
+ - 8472: VXLAN for Flannel
264
+ - 53: DNS
265
+
266
+ #### OpenStack:
267
+ When provisioning on OpenStack, you should provide the value for 'floating_ip_pool' from which floating IPs can be allocated for the instance. If not specified, OpenTofu will not assign floating IP.
268
+
269
+ ---
270
+
271
+ ## Advanced Usage
272
+
273
+ ### Dry Run Mode
274
+
275
+ All operations support a **dryrun** parameter, which validates the configuration
276
+ without making changes. A node created with dryrun should be removed with dryrun.
277
+
278
+ ```python
279
+ # Validate configuration without deploying
280
+ orchestrator.add_node(config, dryrun=True)
281
+
282
+ # Validate removal without destroying
283
+ orchestrator.remove_node(cluster_name, resource_name, dryrun=True)
284
+
285
+ # Validate destruction without destroying
286
+ orchestrator.destroy(cluster_name, dryrun=True)
287
+ ```
288
+
289
+ ### Custom Cluster Names
290
+
291
+ By default, cluster names are generated automatically. To specify a custom name:
292
+
293
+ ```python
294
+ config = {
295
+ "cloud": "aws",
296
+ "k3s_role": "master",
297
+ "cluster_name": "production-cluster",
298
+ # ... other configuration ...
299
+ }
300
+
301
+ orchestrator.add_node(config)
302
+ ```
303
+
304
+ ---
305
+
306
+ ## Template Structure
307
+
308
+ Templates should be organised as follows:
309
+ - `templates/` - Base directory for templates
310
+ - `templates/{cloud}/` - Terraform modules for each cloud provider
311
+ - `templates/{role}_user_data.sh.tpl` - Node initialisation scripts
312
+ - `templates/{cloud}_provider.tf.j2` - Provider configuration templates
313
+
314
+ ---
315
+
316
+ ## DEMO
317
+ Some test scripts have been created for demonstrating the functionality of the cluster builder. These scripts can be referred to for understanding how the system works and for testing various configurations.
318
+
319
+ For detailed service deployment examples and to explore the test scripts, refer to the [test scripts](docs/test-scripts.md) document
320
+
321
+ ---
@@ -0,0 +1,305 @@
1
+ # Swarmchestrate - Cluster Builder
2
+
3
+ This repository contains the codebase for **[cluster-builder]**, which builds K3s clusters for Swarmchestrate using OpenTofu.
4
+
5
+ Key features:
6
+ - **Create**: Provisions infrastructure using OpenTofu and installs K3s.
7
+ - **Add**: Add worker or HA nodes to existing clusters.
8
+ - **Remove**: Selectively remove nodes from existing clusters.
9
+ - **Delete**: Destroys the provisioned infrastructure when no longer required.
10
+
11
+ ---
12
+
13
+ ## Prerequisites
14
+
15
+ Before proceeding, ensure the following prerequisites are installed:
16
+
17
+ 1. **Git**: For cloning the repository.
18
+ 2. **Python**: Version 3.9 or higher.
19
+ 3. **pip**: Python package manager.
20
+ 4. **OpenTofu**: Version 1.6 or higher for infrastructure provisioning.
21
+ 6. **Make**: To run the provided `Makefile`.
22
+ 7. **PostgreSQL**: For storing OpenTofu state.
23
+ 8. (Optional) **Docker**: To create a dev Postgres
24
+ 9. For detailed instructions on **edge device requirements**, refer to the [Edge Device Requirements](docs/edge-requirements.md) document.
25
+
26
+ ---
27
+
28
+ ## Getting Started
29
+
30
+ ### 1. Clone the Repository
31
+
32
+ To get started, clone this repository:
33
+
34
+ ```bash
35
+ git clone https://github.com/Swarmchestrate/cluster-builder.git
36
+ ```
37
+
38
+ ### 2. Navigate to the Project Directory
39
+
40
+ ```bash
41
+ cd cluster-builder
42
+ ```
43
+
44
+ ### 3. Install Dependencies and Tools
45
+
46
+ Run the Makefile to install all necessary dependencies, including OpenTofu:
47
+
48
+ ```bash
49
+ make install
50
+ ```
51
+
52
+ This command will:
53
+ - Install Python dependencies listed in requirements.txt.
54
+ - Download and configure OpenTofu for infrastructure management.
55
+
56
+ ```bash
57
+ make db
58
+ ```
59
+
60
+ This command will:
61
+ - Spin up an empty dev Postgres DB (in Docker) for storing state
62
+
63
+ in ths makefile database details are provide you update or use that ones name pg-db -e POSTGRES_USER=admin -e POSTGRES_PASSWORD=adminpass -e POSTGRES_DB=swarmchestrate
64
+
65
+ For database setup as a service, refer to the [database setup as service](docs/database_setup.md) document
66
+
67
+ ### 4. Populate .env file with access config
68
+ The .env file is used to store environment variables required by the application. It contains configuration details for connecting to your cloud providers, the PostgreSQL database, and any other necessary resources.
69
+
70
+ #### 4.1. Rename or copy the example file to **.env**
71
+
72
+ ```bash
73
+ cp .env_example .env
74
+ ```
75
+
76
+ #### 4.2. Open the **.env** file and add the necessary configuration for your cloud providers and PostgreSQL:
77
+
78
+ ```ini
79
+ ## PG Configuration
80
+ POSTGRES_USER=postgres
81
+ POSTGRES_PASSWORD=secret
82
+ POSTGRES_HOST=db.example.com
83
+ POSTGRES_DATABASE=terraform_state
84
+ POSTGRES_SSLMODE=prefer
85
+
86
+ ## AWS Auth
87
+ TF_VAR_aws_region=us-west-2
88
+ TF_VAR_aws_access_key=AKIAXXXXXXXXXXXXXXXX
89
+ TF_VAR_aws_secret_key=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
90
+
91
+ ## OpenStack Auth - AppCreds Mode
92
+ TF_VAR_openstack_auth_method=appcreds
93
+ TF_VAR_openstack_auth_url=https://openstack.example.com:5000
94
+ TF_VAR_openstack_application_credential_id=fdXXXXXXXXXXXXXXXX
95
+ TF_VAR_openstack_application_credential_secret=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
96
+ TF_VAR_openstack_region=RegionOne
97
+
98
+ ## OpenStack Auth - User/Pass Mode
99
+ # TF_VAR_openstack_auth_method=userpass
100
+ # TF_VAR_openstack_auth_url=https://openstack.example.com:5000
101
+ # TF_VAR_openstack_region=RegionOne
102
+ # TF_VAR_openstack_user_name=myuser
103
+ # TF_VAR_openstack_password=mypassword
104
+ # TF_VAR_openstack_project_id=project-id-123
105
+ # TF_VAR_openstack_user_domain_name=Default
106
+ ```
107
+
108
+ ---
109
+
110
+ ## Basic Usage
111
+
112
+ ### Initialisation
113
+
114
+ ```python
115
+ from cluster_builder import Swarmchestrate
116
+
117
+ # Initialise the orchestrator
118
+ orchestrator = Swarmchestrate(
119
+ template_dir="/path/to/templates",
120
+ output_dir="/path/to/output"
121
+ )
122
+ ```
123
+
124
+ ### Creating a New Cluster
125
+
126
+ To create a new k3s cluster, use the **add_node** method with the **master** role:
127
+
128
+ ```python
129
+ # Configuration for a new cluster
130
+ config = {
131
+ "cloud": "aws", # Can be 'aws', 'openstack', or 'edge'
132
+ "k3s_role": "master", # Role can be 'master', 'worker', or 'ha'
133
+ "ha": False, # Set to True for high availability (HA) deployments
134
+ "instance_type": "t2.small", # AWS instance type
135
+ "ssh_key_name": "g", # SSH key name for AWS or OpenStack
136
+ "ssh_user": "ec2-user", # SSH user for the instance
137
+ "ssh_private_key_path": "/workspaces/cluster-builder/scripts/g.pem", # Path to SSH private key
138
+ "ami": "ami-0c0493bbac867d427", # AMI ID for AWS (specific to region)
139
+ "tcp_ports": [10020], # Optional list of TCP ports to open
140
+ "udp_ports": [1003] # Optional list of UDP ports to open
141
+ }
142
+
143
+ # Create the cluster (returns the cluster name)
144
+ cluster_name = orchestrator.add_node(config)
145
+ print(f"Created cluster: {cluster_name}")
146
+ ```
147
+
148
+ ### Adding Nodes to an Existing Cluster
149
+
150
+ To add worker or high-availability nodes to an existing cluster:
151
+
152
+ ```python
153
+ # Configuration for adding a worker node
154
+ worker_config = {
155
+ "cloud": "aws", # Cloud provider (can be 'aws', 'openstack', or 'edge')
156
+ "k3s_role": "worker", # Role can be 'worker' or 'ha'
157
+ "ha": False, # Set to True for high availability (HA) deployments
158
+ "instance_type": "t2.small", # AWS instance type
159
+ "ssh_key_name": "g", # SSH key name
160
+ "ssh_user": "ec2-user", # SSH user for the instance
161
+ "ssh_private_key_path": "/workspaces/cluster-builder/scripts/g.pem", # Path to SSH private key
162
+ "ami": "ami-0c0493bbac867d427", # AMI ID for AWS
163
+ # Optional parameters:
164
+ # "master_ip": "12.13.14.15", # IP address of the master node (required for worker/HA roles)
165
+ # "cluster_name": "elastic_mcnulty", # Name of the cluster
166
+ # "security_group_id": "sg-xxxxxxxxxxxxxxx", # Security group ID for AWS or OpenStack
167
+ # "tcp_ports": [80, 443], # List of TCP ports to open
168
+ # "udp_ports": [53] # List of UDP ports to open
169
+ }
170
+
171
+ # Add the worker node
172
+ cluster_name = orchestrator.add_node(worker_config)
173
+ print(f"Added worker node to cluster: {cluster_name}")
174
+ ```
175
+
176
+ ### Removing a Specific Node
177
+
178
+ To remove a specific node from a cluster:
179
+
180
+ ```python
181
+ # Remove a node by its resource name
182
+ orchestrator.remove_node(
183
+ cluster_name="your-cluster-name",
184
+ resource_name="aws_eloquent_feynman" # The resource identifier of the node
185
+ )
186
+ ```
187
+
188
+ The **remove_node** method:
189
+ 1. Destroys the node's infrastructure resources
190
+ 2. Removes the node's configuration from the cluster
191
+
192
+ ---
193
+
194
+ ### Destroying an Entire Cluster
195
+
196
+ To completely destroy a cluster and all its nodes:
197
+
198
+ ```python
199
+ # Destroy the entire cluster
200
+ orchestrator.destroy(
201
+ cluster_name="your-cluster-name"
202
+ )
203
+ ```
204
+
205
+ The **destroy** method:
206
+ 1. Destroys all infrastructure resources associated with the cluster
207
+ 2. Removes the cluster directory and configuration files
208
+
209
+ Note for **Edge Devices**:
210
+ Since the edge device is already provisioned, the `destroy` method will not remove K3s directly from the edge device. You will need to manually uninstall K3s from your edge device after the cluster is destroyed.
211
+
212
+ ---
213
+
214
+ ### Important Configuration Requirements
215
+ #### High Availability Flag (ha):
216
+
217
+ - For k3s_role="worker" or k3s_role="ha", you must specify a master_ip (the IP address of the master node).
218
+
219
+ - For k3s_role="master", you must not specify a master_ip.
220
+
221
+ - The ha flag should be set to True for high availability deployment (usually when adding a ha or worker node to an existing master).
222
+
223
+ #### SSH Credentials:
224
+
225
+ - For all roles (k3s_role="master", k3s_role="worker", k3s_role="ha"), you must specify both ssh_user and ssh_private_key_path except for edge.
226
+
227
+ - The ssh_private_key_path should be the path to your SSH private key file. Ensure that the SSH key is copied to the specified path before running the script.
228
+
229
+ - The ssh_key_name and the ssh_private_key_path are different—ensure that your SSH key is placed correctly at the provided ssh_private_key_path.
230
+
231
+ #### Ports:
232
+ You can specify custom ports for your nodes in the tcp_ports and udp_ports fields. However, certain ports are required for Kubernetes deployment (even if not specified explicitly):
233
+
234
+ **TCP Ports:**
235
+
236
+ - 2379-2380: For etcd communication
237
+ - 6443: K3s API server
238
+ - 10250: Kubelet metrics
239
+ - 51820-51821: WireGuard (for encrypted networking)
240
+ - 22: SSH access
241
+ - 80, 443: HTTP/HTTPS access
242
+ - 53: DNS (CoreDNS)
243
+ - 5432: PostgreSQL access (master node)
244
+
245
+ **UDP Ports:**
246
+
247
+ - 8472: VXLAN for Flannel
248
+ - 53: DNS
249
+
250
+ #### OpenStack:
251
+ When provisioning on OpenStack, you should provide the value for 'floating_ip_pool' from which floating IPs can be allocated for the instance. If not specified, OpenTofu will not assign floating IP.
252
+
253
+ ---
254
+
255
+ ## Advanced Usage
256
+
257
+ ### Dry Run Mode
258
+
259
+ All operations support a **dryrun** parameter, which validates the configuration
260
+ without making changes. A node created with dryrun should be removed with dryrun.
261
+
262
+ ```python
263
+ # Validate configuration without deploying
264
+ orchestrator.add_node(config, dryrun=True)
265
+
266
+ # Validate removal without destroying
267
+ orchestrator.remove_node(cluster_name, resource_name, dryrun=True)
268
+
269
+ # Validate destruction without destroying
270
+ orchestrator.destroy(cluster_name, dryrun=True)
271
+ ```
272
+
273
+ ### Custom Cluster Names
274
+
275
+ By default, cluster names are generated automatically. To specify a custom name:
276
+
277
+ ```python
278
+ config = {
279
+ "cloud": "aws",
280
+ "k3s_role": "master",
281
+ "cluster_name": "production-cluster",
282
+ # ... other configuration ...
283
+ }
284
+
285
+ orchestrator.add_node(config)
286
+ ```
287
+
288
+ ---
289
+
290
+ ## Template Structure
291
+
292
+ Templates should be organised as follows:
293
+ - `templates/` - Base directory for templates
294
+ - `templates/{cloud}/` - Terraform modules for each cloud provider
295
+ - `templates/{role}_user_data.sh.tpl` - Node initialisation scripts
296
+ - `templates/{cloud}_provider.tf.j2` - Provider configuration templates
297
+
298
+ ---
299
+
300
+ ## DEMO
301
+ Some test scripts have been created for demonstrating the functionality of the cluster builder. These scripts can be referred to for understanding how the system works and for testing various configurations.
302
+
303
+ For detailed service deployment examples and to explore the test scripts, refer to the [test scripts](docs/test-scripts.md) document
304
+
305
+ ---
@@ -4,7 +4,8 @@ Cluster configuration management.
4
4
 
5
5
  import os
6
6
  import logging
7
-
7
+ import secrets
8
+ import string
8
9
  from names_generator import generate_name
9
10
 
10
11
  from cluster_builder.infrastructure import TemplateManager
@@ -50,9 +51,25 @@ class ClusterConfig:
50
51
  A randomly generated name
51
52
  """
52
53
  name = generate_name()
54
+ name = name.replace("_", "-")
53
55
  logger.debug(f"Generated random name: {name}")
54
56
  return name
55
57
 
58
+ def generate_k3s_token(self, length: int = 16) -> str:
59
+ """
60
+ Generate a secure random alphanumeric token for K3s.
61
+
62
+ Args:
63
+ length: Length of the token (default: 16)
64
+
65
+ Returns:
66
+ A secure, randomly generated alphanumeric token
67
+ """
68
+ chars = string.ascii_letters + string.digits
69
+ token = ''.join(secrets.choice(chars) for _ in range(length))
70
+ logger.debug(f"Generated K3s token: {token}")
71
+ return token
72
+
56
73
  def prepare(self, config: dict[str, any]) -> tuple[str, dict[str, any]]:
57
74
  """
58
75
  Prepare the configuration and template files for deployment.
@@ -84,7 +101,7 @@ class ClusterConfig:
84
101
 
85
102
  cloud = prepared_config["cloud"]
86
103
  role = prepared_config["k3s_role"]
87
- logger.info(f"Preparing configuration for cloud={cloud}, role={role}")
104
+ logger.debug(f"Preparing configuration for cloud={cloud}, role={role}")
88
105
 
89
106
  # Set module source path
90
107
  prepared_config["module_source"] = self.template_manager.get_module_source_path(
@@ -92,14 +109,22 @@ class ClusterConfig:
92
109
  )
93
110
  logger.debug(f"Using module source: {prepared_config['module_source']}")
94
111
 
112
+ # create k3s-token if not provided
113
+ if "k3s_token" not in prepared_config:
114
+ logger.debug("Generating k3s token for cluster")
115
+ k3s_token = self.generate_k3s_token()
116
+ prepared_config["k3s_token"] = k3s_token
117
+ else:
118
+ logger.debug(f"Using provided K3s token: {prepared_config['k3s_token']}")
119
+
95
120
  # Generate a cluster name if not provided
96
121
  if "cluster_name" not in prepared_config:
97
122
  cluster_name = self.generate_random_name()
98
123
  prepared_config["cluster_name"] = cluster_name
99
- logger.info(f"Generated cluster name: {cluster_name}")
124
+ logger.info(f"Creating new cluster: {cluster_name}")
100
125
  else:
101
126
  logger.info(
102
- f"Using provided cluster name: {prepared_config['cluster_name']}"
127
+ f"Adding node to existing cluster: {prepared_config['cluster_name']}"
103
128
  )
104
129
 
105
130
  cluster_dir = self.get_cluster_output_dir(prepared_config["cluster_name"])
@@ -107,7 +132,7 @@ class ClusterConfig:
107
132
 
108
133
  # Generate a resource name
109
134
  random_name = self.generate_random_name()
110
- prepared_config["resource_name"] = f"{cloud}_{random_name}"
135
+ prepared_config["resource_name"] = f"{cloud}-{random_name}"
111
136
  logger.debug(f"Resource name: {prepared_config['resource_name']}")
112
137
 
113
138
  # Create the cluster directory