cluster-builder 0.2.1__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cluster-builder might be problematic. Click here for more details.
- cluster_builder-0.3.1/PKG-INFO +321 -0
- cluster_builder-0.3.1/README.md +305 -0
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/cluster_builder/config/cluster.py +30 -5
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/cluster_builder/config/postgres.py +4 -1
- cluster_builder-0.3.1/cluster_builder/infrastructure/executor.py +88 -0
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/cluster_builder/infrastructure/templates.py +4 -6
- cluster_builder-0.3.1/cluster_builder/swarmchestrate.py +587 -0
- cluster_builder-0.3.1/cluster_builder/templates/aws/main.tf +156 -0
- cluster_builder-0.3.1/cluster_builder/templates/aws_provider.tf +22 -0
- cluster_builder-0.3.1/cluster_builder/templates/copy_manifest.tf +36 -0
- cluster_builder-0.3.1/cluster_builder/templates/edge/main.tf +98 -0
- cluster_builder-0.3.1/cluster_builder/templates/ha_user_data.sh.tpl +33 -0
- cluster_builder-0.3.1/cluster_builder/templates/master_user_data.sh.tpl +37 -0
- cluster_builder-0.3.1/cluster_builder/templates/openstack/main.tf +218 -0
- cluster_builder-0.3.1/cluster_builder/templates/openstack_provider.tf +70 -0
- cluster_builder-0.3.1/cluster_builder/templates/worker_user_data.sh.tpl +34 -0
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/cluster_builder/utils/hcl.py +91 -15
- cluster_builder-0.3.1/cluster_builder.egg-info/PKG-INFO +321 -0
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/cluster_builder.egg-info/SOURCES.txt +9 -0
- cluster_builder-0.3.1/cluster_builder.egg-info/requires.txt +6 -0
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/pyproject.toml +14 -5
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/tests/test_hcl.py +33 -20
- cluster_builder-0.2.1/PKG-INFO +0 -264
- cluster_builder-0.2.1/README.md +0 -250
- cluster_builder-0.2.1/cluster_builder/infrastructure/executor.py +0 -88
- cluster_builder-0.2.1/cluster_builder/swarmchestrate.py +0 -373
- cluster_builder-0.2.1/cluster_builder.egg-info/PKG-INFO +0 -264
- cluster_builder-0.2.1/cluster_builder.egg-info/requires.txt +0 -4
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/LICENSE +0 -0
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/cluster_builder/__init__.py +0 -0
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/cluster_builder/config/__init__.py +0 -0
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/cluster_builder/infrastructure/__init__.py +0 -0
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/cluster_builder/utils/__init__.py +0 -0
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/cluster_builder/utils/logging.py +0 -0
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/cluster_builder.egg-info/dependency_links.txt +0 -0
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/cluster_builder.egg-info/top_level.txt +0 -0
- {cluster_builder-0.2.1 → cluster_builder-0.3.1}/setup.cfg +0 -0
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cluster-builder
|
|
3
|
+
Version: 0.3.1
|
|
4
|
+
Summary: Swarmchestrate cluster builder
|
|
5
|
+
Author-email: Gunjan <G.Kotak@westminster.ac.uk>, Jay <J.Deslauriers@westminster.ac.uk>
|
|
6
|
+
License: Apache2
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: names_generator==0.2.0
|
|
10
|
+
Requires-Dist: python-hcl2==7.2
|
|
11
|
+
Requires-Dist: lark-parser==0.12.0
|
|
12
|
+
Requires-Dist: python-dotenv==1.1.1
|
|
13
|
+
Requires-Dist: psycopg2-binary==2.9.10
|
|
14
|
+
Requires-Dist: yaspin==3.1.0
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
|
|
17
|
+
# Swarmchestrate - Cluster Builder
|
|
18
|
+
|
|
19
|
+
This repository contains the codebase for **[cluster-builder]**, which builds K3s clusters for Swarmchestrate using OpenTofu.
|
|
20
|
+
|
|
21
|
+
Key features:
|
|
22
|
+
- **Create**: Provisions infrastructure using OpenTofu and installs K3s.
|
|
23
|
+
- **Add**: Add worker or HA nodes to existing clusters.
|
|
24
|
+
- **Remove**: Selectively remove nodes from existing clusters.
|
|
25
|
+
- **Delete**: Destroys the provisioned infrastructure when no longer required.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Prerequisites
|
|
30
|
+
|
|
31
|
+
Before proceeding, ensure the following prerequisites are installed:
|
|
32
|
+
|
|
33
|
+
1. **Git**: For cloning the repository.
|
|
34
|
+
2. **Python**: Version 3.9 or higher.
|
|
35
|
+
3. **pip**: Python package manager.
|
|
36
|
+
4. **OpenTofu**: Version 1.6 or higher for infrastructure provisioning.
|
|
37
|
+
6. **Make**: To run the provided `Makefile`.
|
|
38
|
+
7. **PostgreSQL**: For storing OpenTofu state.
|
|
39
|
+
8. (Optional) **Docker**: To create a dev Postgres
|
|
40
|
+
9. For detailed instructions on **edge device requirements**, refer to the [Edge Device Requirements](docs/edge-requirements.md) document.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Getting Started
|
|
45
|
+
|
|
46
|
+
### 1. Clone the Repository
|
|
47
|
+
|
|
48
|
+
To get started, clone this repository:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
git clone https://github.com/Swarmchestrate/cluster-builder.git
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### 2. Navigate to the Project Directory
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
cd cluster-builder
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### 3. Install Dependencies and Tools
|
|
61
|
+
|
|
62
|
+
Run the Makefile to install all necessary dependencies, including OpenTofu:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
make install
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
This command will:
|
|
69
|
+
- Install Python dependencies listed in requirements.txt.
|
|
70
|
+
- Download and configure OpenTofu for infrastructure management.
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
make db
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
This command will:
|
|
77
|
+
- Spin up an empty dev Postgres DB (in Docker) for storing state
|
|
78
|
+
|
|
79
|
+
in ths makefile database details are provide you update or use that ones name pg-db -e POSTGRES_USER=admin -e POSTGRES_PASSWORD=adminpass -e POSTGRES_DB=swarmchestrate
|
|
80
|
+
|
|
81
|
+
For database setup as a service, refer to the [database setup as service](docs/database_setup.md) document
|
|
82
|
+
|
|
83
|
+
### 4. Populate .env file with access config
|
|
84
|
+
The .env file is used to store environment variables required by the application. It contains configuration details for connecting to your cloud providers, the PostgreSQL database, and any other necessary resources.
|
|
85
|
+
|
|
86
|
+
#### 4.1. Rename or copy the example file to **.env**
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
cp .env_example .env
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
#### 4.2. Open the **.env** file and add the necessary configuration for your cloud providers and PostgreSQL:
|
|
93
|
+
|
|
94
|
+
```ini
|
|
95
|
+
## PG Configuration
|
|
96
|
+
POSTGRES_USER=postgres
|
|
97
|
+
POSTGRES_PASSWORD=secret
|
|
98
|
+
POSTGRES_HOST=db.example.com
|
|
99
|
+
POSTGRES_DATABASE=terraform_state
|
|
100
|
+
POSTGRES_SSLMODE=prefer
|
|
101
|
+
|
|
102
|
+
## AWS Auth
|
|
103
|
+
TF_VAR_aws_region=us-west-2
|
|
104
|
+
TF_VAR_aws_access_key=AKIAXXXXXXXXXXXXXXXX
|
|
105
|
+
TF_VAR_aws_secret_key=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
106
|
+
|
|
107
|
+
## OpenStack Auth - AppCreds Mode
|
|
108
|
+
TF_VAR_openstack_auth_method=appcreds
|
|
109
|
+
TF_VAR_openstack_auth_url=https://openstack.example.com:5000
|
|
110
|
+
TF_VAR_openstack_application_credential_id=fdXXXXXXXXXXXXXXXX
|
|
111
|
+
TF_VAR_openstack_application_credential_secret=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
112
|
+
TF_VAR_openstack_region=RegionOne
|
|
113
|
+
|
|
114
|
+
## OpenStack Auth - User/Pass Mode
|
|
115
|
+
# TF_VAR_openstack_auth_method=userpass
|
|
116
|
+
# TF_VAR_openstack_auth_url=https://openstack.example.com:5000
|
|
117
|
+
# TF_VAR_openstack_region=RegionOne
|
|
118
|
+
# TF_VAR_openstack_user_name=myuser
|
|
119
|
+
# TF_VAR_openstack_password=mypassword
|
|
120
|
+
# TF_VAR_openstack_project_id=project-id-123
|
|
121
|
+
# TF_VAR_openstack_user_domain_name=Default
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Basic Usage
|
|
127
|
+
|
|
128
|
+
### Initialisation
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from cluster_builder import Swarmchestrate
|
|
132
|
+
|
|
133
|
+
# Initialise the orchestrator
|
|
134
|
+
orchestrator = Swarmchestrate(
|
|
135
|
+
template_dir="/path/to/templates",
|
|
136
|
+
output_dir="/path/to/output"
|
|
137
|
+
)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### Creating a New Cluster
|
|
141
|
+
|
|
142
|
+
To create a new k3s cluster, use the **add_node** method with the **master** role:
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
# Configuration for a new cluster
|
|
146
|
+
config = {
|
|
147
|
+
"cloud": "aws", # Can be 'aws', 'openstack', or 'edge'
|
|
148
|
+
"k3s_role": "master", # Role can be 'master', 'worker', or 'ha'
|
|
149
|
+
"ha": False, # Set to True for high availability (HA) deployments
|
|
150
|
+
"instance_type": "t2.small", # AWS instance type
|
|
151
|
+
"ssh_key_name": "g", # SSH key name for AWS or OpenStack
|
|
152
|
+
"ssh_user": "ec2-user", # SSH user for the instance
|
|
153
|
+
"ssh_private_key_path": "/workspaces/cluster-builder/scripts/g.pem", # Path to SSH private key
|
|
154
|
+
"ami": "ami-0c0493bbac867d427", # AMI ID for AWS (specific to region)
|
|
155
|
+
"tcp_ports": [10020], # Optional list of TCP ports to open
|
|
156
|
+
"udp_ports": [1003] # Optional list of UDP ports to open
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
# Create the cluster (returns the cluster name)
|
|
160
|
+
cluster_name = orchestrator.add_node(config)
|
|
161
|
+
print(f"Created cluster: {cluster_name}")
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Adding Nodes to an Existing Cluster
|
|
165
|
+
|
|
166
|
+
To add worker or high-availability nodes to an existing cluster:
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
# Configuration for adding a worker node
|
|
170
|
+
worker_config = {
|
|
171
|
+
"cloud": "aws", # Cloud provider (can be 'aws', 'openstack', or 'edge')
|
|
172
|
+
"k3s_role": "worker", # Role can be 'worker' or 'ha'
|
|
173
|
+
"ha": False, # Set to True for high availability (HA) deployments
|
|
174
|
+
"instance_type": "t2.small", # AWS instance type
|
|
175
|
+
"ssh_key_name": "g", # SSH key name
|
|
176
|
+
"ssh_user": "ec2-user", # SSH user for the instance
|
|
177
|
+
"ssh_private_key_path": "/workspaces/cluster-builder/scripts/g.pem", # Path to SSH private key
|
|
178
|
+
"ami": "ami-0c0493bbac867d427", # AMI ID for AWS
|
|
179
|
+
# Optional parameters:
|
|
180
|
+
# "master_ip": "12.13.14.15", # IP address of the master node (required for worker/HA roles)
|
|
181
|
+
# "cluster_name": "elastic_mcnulty", # Name of the cluster
|
|
182
|
+
# "security_group_id": "sg-xxxxxxxxxxxxxxx", # Security group ID for AWS or OpenStack
|
|
183
|
+
# "tcp_ports": [80, 443], # List of TCP ports to open
|
|
184
|
+
# "udp_ports": [53] # List of UDP ports to open
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
# Add the worker node
|
|
188
|
+
cluster_name = orchestrator.add_node(worker_config)
|
|
189
|
+
print(f"Added worker node to cluster: {cluster_name}")
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Removing a Specific Node
|
|
193
|
+
|
|
194
|
+
To remove a specific node from a cluster:
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
# Remove a node by its resource name
|
|
198
|
+
orchestrator.remove_node(
|
|
199
|
+
cluster_name="your-cluster-name",
|
|
200
|
+
resource_name="aws_eloquent_feynman" # The resource identifier of the node
|
|
201
|
+
)
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
The **remove_node** method:
|
|
205
|
+
1. Destroys the node's infrastructure resources
|
|
206
|
+
2. Removes the node's configuration from the cluster
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
### Destroying an Entire Cluster
|
|
211
|
+
|
|
212
|
+
To completely destroy a cluster and all its nodes:
|
|
213
|
+
|
|
214
|
+
```python
|
|
215
|
+
# Destroy the entire cluster
|
|
216
|
+
orchestrator.destroy(
|
|
217
|
+
cluster_name="your-cluster-name"
|
|
218
|
+
)
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
The **destroy** method:
|
|
222
|
+
1. Destroys all infrastructure resources associated with the cluster
|
|
223
|
+
2. Removes the cluster directory and configuration files
|
|
224
|
+
|
|
225
|
+
Note for **Edge Devices**:
|
|
226
|
+
Since the edge device is already provisioned, the `destroy` method will not remove K3s directly from the edge device. You will need to manually uninstall K3s from your edge device after the cluster is destroyed.
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
### Important Configuration Requirements
|
|
231
|
+
#### High Availability Flag (ha):
|
|
232
|
+
|
|
233
|
+
- For k3s_role="worker" or k3s_role="ha", you must specify a master_ip (the IP address of the master node).
|
|
234
|
+
|
|
235
|
+
- For k3s_role="master", you must not specify a master_ip.
|
|
236
|
+
|
|
237
|
+
- The ha flag should be set to True for high availability deployment (usually when adding a ha or worker node to an existing master).
|
|
238
|
+
|
|
239
|
+
#### SSH Credentials:
|
|
240
|
+
|
|
241
|
+
- For all roles (k3s_role="master", k3s_role="worker", k3s_role="ha"), you must specify both ssh_user and ssh_private_key_path except for edge.
|
|
242
|
+
|
|
243
|
+
- The ssh_private_key_path should be the path to your SSH private key file. Ensure that the SSH key is copied to the specified path before running the script.
|
|
244
|
+
|
|
245
|
+
- The ssh_key_name and the ssh_private_key_path are different—ensure that your SSH key is placed correctly at the provided ssh_private_key_path.
|
|
246
|
+
|
|
247
|
+
#### Ports:
|
|
248
|
+
You can specify custom ports for your nodes in the tcp_ports and udp_ports fields. However, certain ports are required for Kubernetes deployment (even if not specified explicitly):
|
|
249
|
+
|
|
250
|
+
**TCP Ports:**
|
|
251
|
+
|
|
252
|
+
- 2379-2380: For etcd communication
|
|
253
|
+
- 6443: K3s API server
|
|
254
|
+
- 10250: Kubelet metrics
|
|
255
|
+
- 51820-51821: WireGuard (for encrypted networking)
|
|
256
|
+
- 22: SSH access
|
|
257
|
+
- 80, 443: HTTP/HTTPS access
|
|
258
|
+
- 53: DNS (CoreDNS)
|
|
259
|
+
- 5432: PostgreSQL access (master node)
|
|
260
|
+
|
|
261
|
+
**UDP Ports:**
|
|
262
|
+
|
|
263
|
+
- 8472: VXLAN for Flannel
|
|
264
|
+
- 53: DNS
|
|
265
|
+
|
|
266
|
+
#### OpenStack:
|
|
267
|
+
When provisioning on OpenStack, you should provide the value for 'floating_ip_pool' from which floating IPs can be allocated for the instance. If not specified, OpenTofu will not assign floating IP.
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
## Advanced Usage
|
|
272
|
+
|
|
273
|
+
### Dry Run Mode
|
|
274
|
+
|
|
275
|
+
All operations support a **dryrun** parameter, which validates the configuration
|
|
276
|
+
without making changes. A node created with dryrun should be removed with dryrun.
|
|
277
|
+
|
|
278
|
+
```python
|
|
279
|
+
# Validate configuration without deploying
|
|
280
|
+
orchestrator.add_node(config, dryrun=True)
|
|
281
|
+
|
|
282
|
+
# Validate removal without destroying
|
|
283
|
+
orchestrator.remove_node(cluster_name, resource_name, dryrun=True)
|
|
284
|
+
|
|
285
|
+
# Validate destruction without destroying
|
|
286
|
+
orchestrator.destroy(cluster_name, dryrun=True)
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
### Custom Cluster Names
|
|
290
|
+
|
|
291
|
+
By default, cluster names are generated automatically. To specify a custom name:
|
|
292
|
+
|
|
293
|
+
```python
|
|
294
|
+
config = {
|
|
295
|
+
"cloud": "aws",
|
|
296
|
+
"k3s_role": "master",
|
|
297
|
+
"cluster_name": "production-cluster",
|
|
298
|
+
# ... other configuration ...
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
orchestrator.add_node(config)
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
---
|
|
305
|
+
|
|
306
|
+
## Template Structure
|
|
307
|
+
|
|
308
|
+
Templates should be organised as follows:
|
|
309
|
+
- `templates/` - Base directory for templates
|
|
310
|
+
- `templates/{cloud}/` - Terraform modules for each cloud provider
|
|
311
|
+
- `templates/{role}_user_data.sh.tpl` - Node initialisation scripts
|
|
312
|
+
- `templates/{cloud}_provider.tf.j2` - Provider configuration templates
|
|
313
|
+
|
|
314
|
+
---
|
|
315
|
+
|
|
316
|
+
## DEMO
|
|
317
|
+
Some test scripts have been created for demonstrating the functionality of the cluster builder. These scripts can be referred to for understanding how the system works and for testing various configurations.
|
|
318
|
+
|
|
319
|
+
For detailed service deployment examples and to explore the test scripts, refer to the [test scripts](docs/test-scripts.md) document
|
|
320
|
+
|
|
321
|
+
---
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
# Swarmchestrate - Cluster Builder
|
|
2
|
+
|
|
3
|
+
This repository contains the codebase for **[cluster-builder]**, which builds K3s clusters for Swarmchestrate using OpenTofu.
|
|
4
|
+
|
|
5
|
+
Key features:
|
|
6
|
+
- **Create**: Provisions infrastructure using OpenTofu and installs K3s.
|
|
7
|
+
- **Add**: Add worker or HA nodes to existing clusters.
|
|
8
|
+
- **Remove**: Selectively remove nodes from existing clusters.
|
|
9
|
+
- **Delete**: Destroys the provisioned infrastructure when no longer required.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Prerequisites
|
|
14
|
+
|
|
15
|
+
Before proceeding, ensure the following prerequisites are installed:
|
|
16
|
+
|
|
17
|
+
1. **Git**: For cloning the repository.
|
|
18
|
+
2. **Python**: Version 3.9 or higher.
|
|
19
|
+
3. **pip**: Python package manager.
|
|
20
|
+
4. **OpenTofu**: Version 1.6 or higher for infrastructure provisioning.
|
|
21
|
+
6. **Make**: To run the provided `Makefile`.
|
|
22
|
+
7. **PostgreSQL**: For storing OpenTofu state.
|
|
23
|
+
8. (Optional) **Docker**: To create a dev Postgres
|
|
24
|
+
9. For detailed instructions on **edge device requirements**, refer to the [Edge Device Requirements](docs/edge-requirements.md) document.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Getting Started
|
|
29
|
+
|
|
30
|
+
### 1. Clone the Repository
|
|
31
|
+
|
|
32
|
+
To get started, clone this repository:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
git clone https://github.com/Swarmchestrate/cluster-builder.git
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### 2. Navigate to the Project Directory
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
cd cluster-builder
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### 3. Install Dependencies and Tools
|
|
45
|
+
|
|
46
|
+
Run the Makefile to install all necessary dependencies, including OpenTofu:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
make install
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
This command will:
|
|
53
|
+
- Install Python dependencies listed in requirements.txt.
|
|
54
|
+
- Download and configure OpenTofu for infrastructure management.
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
make db
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
This command will:
|
|
61
|
+
- Spin up an empty dev Postgres DB (in Docker) for storing state
|
|
62
|
+
|
|
63
|
+
in ths makefile database details are provide you update or use that ones name pg-db -e POSTGRES_USER=admin -e POSTGRES_PASSWORD=adminpass -e POSTGRES_DB=swarmchestrate
|
|
64
|
+
|
|
65
|
+
For database setup as a service, refer to the [database setup as service](docs/database_setup.md) document
|
|
66
|
+
|
|
67
|
+
### 4. Populate .env file with access config
|
|
68
|
+
The .env file is used to store environment variables required by the application. It contains configuration details for connecting to your cloud providers, the PostgreSQL database, and any other necessary resources.
|
|
69
|
+
|
|
70
|
+
#### 4.1. Rename or copy the example file to **.env**
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
cp .env_example .env
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
#### 4.2. Open the **.env** file and add the necessary configuration for your cloud providers and PostgreSQL:
|
|
77
|
+
|
|
78
|
+
```ini
|
|
79
|
+
## PG Configuration
|
|
80
|
+
POSTGRES_USER=postgres
|
|
81
|
+
POSTGRES_PASSWORD=secret
|
|
82
|
+
POSTGRES_HOST=db.example.com
|
|
83
|
+
POSTGRES_DATABASE=terraform_state
|
|
84
|
+
POSTGRES_SSLMODE=prefer
|
|
85
|
+
|
|
86
|
+
## AWS Auth
|
|
87
|
+
TF_VAR_aws_region=us-west-2
|
|
88
|
+
TF_VAR_aws_access_key=AKIAXXXXXXXXXXXXXXXX
|
|
89
|
+
TF_VAR_aws_secret_key=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
90
|
+
|
|
91
|
+
## OpenStack Auth - AppCreds Mode
|
|
92
|
+
TF_VAR_openstack_auth_method=appcreds
|
|
93
|
+
TF_VAR_openstack_auth_url=https://openstack.example.com:5000
|
|
94
|
+
TF_VAR_openstack_application_credential_id=fdXXXXXXXXXXXXXXXX
|
|
95
|
+
TF_VAR_openstack_application_credential_secret=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
96
|
+
TF_VAR_openstack_region=RegionOne
|
|
97
|
+
|
|
98
|
+
## OpenStack Auth - User/Pass Mode
|
|
99
|
+
# TF_VAR_openstack_auth_method=userpass
|
|
100
|
+
# TF_VAR_openstack_auth_url=https://openstack.example.com:5000
|
|
101
|
+
# TF_VAR_openstack_region=RegionOne
|
|
102
|
+
# TF_VAR_openstack_user_name=myuser
|
|
103
|
+
# TF_VAR_openstack_password=mypassword
|
|
104
|
+
# TF_VAR_openstack_project_id=project-id-123
|
|
105
|
+
# TF_VAR_openstack_user_domain_name=Default
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## Basic Usage
|
|
111
|
+
|
|
112
|
+
### Initialisation
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from cluster_builder import Swarmchestrate
|
|
116
|
+
|
|
117
|
+
# Initialise the orchestrator
|
|
118
|
+
orchestrator = Swarmchestrate(
|
|
119
|
+
template_dir="/path/to/templates",
|
|
120
|
+
output_dir="/path/to/output"
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Creating a New Cluster
|
|
125
|
+
|
|
126
|
+
To create a new k3s cluster, use the **add_node** method with the **master** role:
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
# Configuration for a new cluster
|
|
130
|
+
config = {
|
|
131
|
+
"cloud": "aws", # Can be 'aws', 'openstack', or 'edge'
|
|
132
|
+
"k3s_role": "master", # Role can be 'master', 'worker', or 'ha'
|
|
133
|
+
"ha": False, # Set to True for high availability (HA) deployments
|
|
134
|
+
"instance_type": "t2.small", # AWS instance type
|
|
135
|
+
"ssh_key_name": "g", # SSH key name for AWS or OpenStack
|
|
136
|
+
"ssh_user": "ec2-user", # SSH user for the instance
|
|
137
|
+
"ssh_private_key_path": "/workspaces/cluster-builder/scripts/g.pem", # Path to SSH private key
|
|
138
|
+
"ami": "ami-0c0493bbac867d427", # AMI ID for AWS (specific to region)
|
|
139
|
+
"tcp_ports": [10020], # Optional list of TCP ports to open
|
|
140
|
+
"udp_ports": [1003] # Optional list of UDP ports to open
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
# Create the cluster (returns the cluster name)
|
|
144
|
+
cluster_name = orchestrator.add_node(config)
|
|
145
|
+
print(f"Created cluster: {cluster_name}")
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Adding Nodes to an Existing Cluster
|
|
149
|
+
|
|
150
|
+
To add worker or high-availability nodes to an existing cluster:
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
# Configuration for adding a worker node
|
|
154
|
+
worker_config = {
|
|
155
|
+
"cloud": "aws", # Cloud provider (can be 'aws', 'openstack', or 'edge')
|
|
156
|
+
"k3s_role": "worker", # Role can be 'worker' or 'ha'
|
|
157
|
+
"ha": False, # Set to True for high availability (HA) deployments
|
|
158
|
+
"instance_type": "t2.small", # AWS instance type
|
|
159
|
+
"ssh_key_name": "g", # SSH key name
|
|
160
|
+
"ssh_user": "ec2-user", # SSH user for the instance
|
|
161
|
+
"ssh_private_key_path": "/workspaces/cluster-builder/scripts/g.pem", # Path to SSH private key
|
|
162
|
+
"ami": "ami-0c0493bbac867d427", # AMI ID for AWS
|
|
163
|
+
# Optional parameters:
|
|
164
|
+
# "master_ip": "12.13.14.15", # IP address of the master node (required for worker/HA roles)
|
|
165
|
+
# "cluster_name": "elastic_mcnulty", # Name of the cluster
|
|
166
|
+
# "security_group_id": "sg-xxxxxxxxxxxxxxx", # Security group ID for AWS or OpenStack
|
|
167
|
+
# "tcp_ports": [80, 443], # List of TCP ports to open
|
|
168
|
+
# "udp_ports": [53] # List of UDP ports to open
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
# Add the worker node
|
|
172
|
+
cluster_name = orchestrator.add_node(worker_config)
|
|
173
|
+
print(f"Added worker node to cluster: {cluster_name}")
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Removing a Specific Node
|
|
177
|
+
|
|
178
|
+
To remove a specific node from a cluster:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
# Remove a node by its resource name
|
|
182
|
+
orchestrator.remove_node(
|
|
183
|
+
cluster_name="your-cluster-name",
|
|
184
|
+
resource_name="aws_eloquent_feynman" # The resource identifier of the node
|
|
185
|
+
)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
The **remove_node** method:
|
|
189
|
+
1. Destroys the node's infrastructure resources
|
|
190
|
+
2. Removes the node's configuration from the cluster
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
### Destroying an Entire Cluster
|
|
195
|
+
|
|
196
|
+
To completely destroy a cluster and all its nodes:
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
# Destroy the entire cluster
|
|
200
|
+
orchestrator.destroy(
|
|
201
|
+
cluster_name="your-cluster-name"
|
|
202
|
+
)
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
The **destroy** method:
|
|
206
|
+
1. Destroys all infrastructure resources associated with the cluster
|
|
207
|
+
2. Removes the cluster directory and configuration files
|
|
208
|
+
|
|
209
|
+
Note for **Edge Devices**:
|
|
210
|
+
Since the edge device is already provisioned, the `destroy` method will not remove K3s directly from the edge device. You will need to manually uninstall K3s from your edge device after the cluster is destroyed.
|
|
211
|
+
|
|
212
|
+
---
|
|
213
|
+
|
|
214
|
+
### Important Configuration Requirements
|
|
215
|
+
#### High Availability Flag (ha):
|
|
216
|
+
|
|
217
|
+
- For k3s_role="worker" or k3s_role="ha", you must specify a master_ip (the IP address of the master node).
|
|
218
|
+
|
|
219
|
+
- For k3s_role="master", you must not specify a master_ip.
|
|
220
|
+
|
|
221
|
+
- The ha flag should be set to True for high availability deployment (usually when adding a ha or worker node to an existing master).
|
|
222
|
+
|
|
223
|
+
#### SSH Credentials:
|
|
224
|
+
|
|
225
|
+
- For all roles (k3s_role="master", k3s_role="worker", k3s_role="ha"), you must specify both ssh_user and ssh_private_key_path except for edge.
|
|
226
|
+
|
|
227
|
+
- The ssh_private_key_path should be the path to your SSH private key file. Ensure that the SSH key is copied to the specified path before running the script.
|
|
228
|
+
|
|
229
|
+
- The ssh_key_name and the ssh_private_key_path are different—ensure that your SSH key is placed correctly at the provided ssh_private_key_path.
|
|
230
|
+
|
|
231
|
+
#### Ports:
|
|
232
|
+
You can specify custom ports for your nodes in the tcp_ports and udp_ports fields. However, certain ports are required for Kubernetes deployment (even if not specified explicitly):
|
|
233
|
+
|
|
234
|
+
**TCP Ports:**
|
|
235
|
+
|
|
236
|
+
- 2379-2380: For etcd communication
|
|
237
|
+
- 6443: K3s API server
|
|
238
|
+
- 10250: Kubelet metrics
|
|
239
|
+
- 51820-51821: WireGuard (for encrypted networking)
|
|
240
|
+
- 22: SSH access
|
|
241
|
+
- 80, 443: HTTP/HTTPS access
|
|
242
|
+
- 53: DNS (CoreDNS)
|
|
243
|
+
- 5432: PostgreSQL access (master node)
|
|
244
|
+
|
|
245
|
+
**UDP Ports:**
|
|
246
|
+
|
|
247
|
+
- 8472: VXLAN for Flannel
|
|
248
|
+
- 53: DNS
|
|
249
|
+
|
|
250
|
+
#### OpenStack:
|
|
251
|
+
When provisioning on OpenStack, you should provide the value for 'floating_ip_pool' from which floating IPs can be allocated for the instance. If not specified, OpenTofu will not assign floating IP.
|
|
252
|
+
|
|
253
|
+
---
|
|
254
|
+
|
|
255
|
+
## Advanced Usage
|
|
256
|
+
|
|
257
|
+
### Dry Run Mode
|
|
258
|
+
|
|
259
|
+
All operations support a **dryrun** parameter, which validates the configuration
|
|
260
|
+
without making changes. A node created with dryrun should be removed with dryrun.
|
|
261
|
+
|
|
262
|
+
```python
|
|
263
|
+
# Validate configuration without deploying
|
|
264
|
+
orchestrator.add_node(config, dryrun=True)
|
|
265
|
+
|
|
266
|
+
# Validate removal without destroying
|
|
267
|
+
orchestrator.remove_node(cluster_name, resource_name, dryrun=True)
|
|
268
|
+
|
|
269
|
+
# Validate destruction without destroying
|
|
270
|
+
orchestrator.destroy(cluster_name, dryrun=True)
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
### Custom Cluster Names
|
|
274
|
+
|
|
275
|
+
By default, cluster names are generated automatically. To specify a custom name:
|
|
276
|
+
|
|
277
|
+
```python
|
|
278
|
+
config = {
|
|
279
|
+
"cloud": "aws",
|
|
280
|
+
"k3s_role": "master",
|
|
281
|
+
"cluster_name": "production-cluster",
|
|
282
|
+
# ... other configuration ...
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
orchestrator.add_node(config)
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
---
|
|
289
|
+
|
|
290
|
+
## Template Structure
|
|
291
|
+
|
|
292
|
+
Templates should be organised as follows:
|
|
293
|
+
- `templates/` - Base directory for templates
|
|
294
|
+
- `templates/{cloud}/` - Terraform modules for each cloud provider
|
|
295
|
+
- `templates/{role}_user_data.sh.tpl` - Node initialisation scripts
|
|
296
|
+
- `templates/{cloud}_provider.tf.j2` - Provider configuration templates
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
## DEMO
|
|
301
|
+
Some test scripts have been created for demonstrating the functionality of the cluster builder. These scripts can be referred to for understanding how the system works and for testing various configurations.
|
|
302
|
+
|
|
303
|
+
For detailed service deployment examples and to explore the test scripts, refer to the [test scripts](docs/test-scripts.md) document
|
|
304
|
+
|
|
305
|
+
---
|
|
@@ -4,7 +4,8 @@ Cluster configuration management.
|
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
6
|
import logging
|
|
7
|
-
|
|
7
|
+
import secrets
|
|
8
|
+
import string
|
|
8
9
|
from names_generator import generate_name
|
|
9
10
|
|
|
10
11
|
from cluster_builder.infrastructure import TemplateManager
|
|
@@ -50,9 +51,25 @@ class ClusterConfig:
|
|
|
50
51
|
A randomly generated name
|
|
51
52
|
"""
|
|
52
53
|
name = generate_name()
|
|
54
|
+
name = name.replace("_", "-")
|
|
53
55
|
logger.debug(f"Generated random name: {name}")
|
|
54
56
|
return name
|
|
55
57
|
|
|
58
|
+
def generate_k3s_token(self, length: int = 16) -> str:
|
|
59
|
+
"""
|
|
60
|
+
Generate a secure random alphanumeric token for K3s.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
length: Length of the token (default: 16)
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
A secure, randomly generated alphanumeric token
|
|
67
|
+
"""
|
|
68
|
+
chars = string.ascii_letters + string.digits
|
|
69
|
+
token = ''.join(secrets.choice(chars) for _ in range(length))
|
|
70
|
+
logger.debug(f"Generated K3s token: {token}")
|
|
71
|
+
return token
|
|
72
|
+
|
|
56
73
|
def prepare(self, config: dict[str, any]) -> tuple[str, dict[str, any]]:
|
|
57
74
|
"""
|
|
58
75
|
Prepare the configuration and template files for deployment.
|
|
@@ -84,7 +101,7 @@ class ClusterConfig:
|
|
|
84
101
|
|
|
85
102
|
cloud = prepared_config["cloud"]
|
|
86
103
|
role = prepared_config["k3s_role"]
|
|
87
|
-
logger.
|
|
104
|
+
logger.debug(f"Preparing configuration for cloud={cloud}, role={role}")
|
|
88
105
|
|
|
89
106
|
# Set module source path
|
|
90
107
|
prepared_config["module_source"] = self.template_manager.get_module_source_path(
|
|
@@ -92,14 +109,22 @@ class ClusterConfig:
|
|
|
92
109
|
)
|
|
93
110
|
logger.debug(f"Using module source: {prepared_config['module_source']}")
|
|
94
111
|
|
|
112
|
+
# create k3s-token if not provided
|
|
113
|
+
if "k3s_token" not in prepared_config:
|
|
114
|
+
logger.debug("Generating k3s token for cluster")
|
|
115
|
+
k3s_token = self.generate_k3s_token()
|
|
116
|
+
prepared_config["k3s_token"] = k3s_token
|
|
117
|
+
else:
|
|
118
|
+
logger.debug(f"Using provided K3s token: {prepared_config['k3s_token']}")
|
|
119
|
+
|
|
95
120
|
# Generate a cluster name if not provided
|
|
96
121
|
if "cluster_name" not in prepared_config:
|
|
97
122
|
cluster_name = self.generate_random_name()
|
|
98
123
|
prepared_config["cluster_name"] = cluster_name
|
|
99
|
-
logger.info(f"
|
|
124
|
+
logger.info(f"Creating new cluster: {cluster_name}")
|
|
100
125
|
else:
|
|
101
126
|
logger.info(
|
|
102
|
-
f"
|
|
127
|
+
f"Adding node to existing cluster: {prepared_config['cluster_name']}"
|
|
103
128
|
)
|
|
104
129
|
|
|
105
130
|
cluster_dir = self.get_cluster_output_dir(prepared_config["cluster_name"])
|
|
@@ -107,7 +132,7 @@ class ClusterConfig:
|
|
|
107
132
|
|
|
108
133
|
# Generate a resource name
|
|
109
134
|
random_name = self.generate_random_name()
|
|
110
|
-
prepared_config["resource_name"] = f"{cloud}
|
|
135
|
+
prepared_config["resource_name"] = f"{cloud}-{random_name}"
|
|
111
136
|
logger.debug(f"Resource name: {prepared_config['resource_name']}")
|
|
112
137
|
|
|
113
138
|
# Create the cluster directory
|