sbcli-pre 1.2.5__zip → 1.2.7__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/PKG-INFO +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/env_var +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/PKG-INFO +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/SOURCES.txt +5 -3
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_cli/cli.py +138 -136
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/cluster_ops.py +138 -235
- sbcli_pre-1.2.7/simplyblock_core/constants.py +91 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/caching_node_controller.py +8 -6
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/cluster_events.py +9 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/device_controller.py +56 -63
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/events_controller.py +5 -3
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/health_controller.py +30 -40
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/lvol_controller.py +75 -39
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/pool_controller.py +8 -4
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/snapshot_controller.py +36 -3
- sbcli_pre-1.2.7/simplyblock_core/controllers/tasks_controller.py +103 -0
- sbcli_pre-1.2.7/simplyblock_core/controllers/tasks_events.py +37 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/distr_controller.py +13 -9
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/kv_store.py +62 -20
- sbcli_pre-1.2.7/simplyblock_core/mgmt_node_ops.py +205 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/events.py +9 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/job_schedule.py +6 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/nvme_device.py +42 -4
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/storage_node.py +14 -2
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/rpc_client.py +55 -10
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/__init__.py +0 -4
- sbcli_pre-1.2.5/simplyblock_core/scripts/alerting/alert_resources.yaml → sbcli_pre-1.2.7/simplyblock_core/scripts/alerting/alert_resources.yaml.j2 +54 -5
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/cluster.json +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/deploy_stack.sh +9 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/docker-compose-swarm-monitoring.yml +32 -15
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/docker-compose-swarm.yml +17 -2
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/haproxy.cfg +15 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/install_deps.sh +3 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/stack_deploy_wait.sh +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/capacity_and_stats_collector.py +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/device_monitor.py +5 -46
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/distr_event_collector.py +10 -11
- sbcli_pre-1.2.7/simplyblock_core/services/health_check_service.py +134 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/lvol_monitor.py +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/lvol_stat_collector.py +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/port_stat_collector.py +0 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/storage_node_monitor.py +49 -44
- sbcli_pre-1.2.7/simplyblock_core/services/tasks_runner_migration.py +61 -0
- sbcli_pre-1.2.5/simplyblock_core/services/job_tasks.py → sbcli_pre-1.2.7/simplyblock_core/services/tasks_runner_restart.py +95 -46
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/snode_client.py +12 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/storage_node_ops.py +630 -358
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/utils.py +126 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/snode_ops.py +103 -25
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_cluster.py +20 -43
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_device.py +10 -7
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_lvol.py +9 -5
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_pool.py +14 -5
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_storage_node.py +15 -15
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/node_utils.py +0 -2
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/utils.py +8 -0
- sbcli_pre-1.2.5/simplyblock_core/constants.py +0 -65
- sbcli_pre-1.2.5/simplyblock_core/mgmt_node_ops.py +0 -80
- sbcli_pre-1.2.5/simplyblock_core/scripts/apply_dashboard.sh +0 -22
- sbcli_pre-1.2.5/simplyblock_core/services/health_check_service.py +0 -136
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/README.md +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/pyproject.toml +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/dependency_links.txt +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/entry_points.txt +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/requires.txt +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/top_level.txt +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/setup.cfg +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/setup.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_cli/main.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/__init__.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/cnode_client.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/compute_node_ops.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/__init__.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/device_events.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/lvol_events.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/mgmt_events.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/pool_events.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/snapshot_events.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/storage_events.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/__init__.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/base_model.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/caching_node.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/cluster.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/compute_node.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/deployer.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/global_settings.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/iface.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/lvol_model.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/mgmt_node.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/pool.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/port_stat.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/snapshot.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/stats.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/pci_utils.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/alerting/alert_rules.yaml +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/clean_local_storage_deploy.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/config_docker.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/devices.json +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/lvols.json +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/node-exporter.json +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/nodes.json +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/pools.json +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/datasource.yml +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/db_config_double.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/db_config_single.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/prometheus.yml +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/run_ssh.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/set_db_config.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/__init__.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/caching_node_monitor.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/cap_monitor.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/install_service.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/log_agg_service.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/mgmt_node_monitor.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/remove_service.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/service_template.service +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/shell_utils.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/__init__.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/app.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/auth_middleware.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/__init__.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/caching_node_ops.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/caching_node_ops_k8s.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/node_api_basic.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/node_api_caching_docker.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/node_api_caching_ks.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_caching_node.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_deployer.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_mgmt_node.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_snapshot.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/caching_node_app.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/caching_node_app_k8s.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/node_webapp.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/snode_app.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/delete.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/deploy.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/deploy_cnode.yaml +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/deploy_spdk.yaml +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/is_up.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/list_deps.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/rpac.yaml +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/tst.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/templates/deploy_spdk.yaml.j2 +0 -0
@@ -1,15 +1,19 @@
|
|
1
1
|
# coding=utf-8
|
2
2
|
import json
|
3
3
|
import logging
|
4
|
-
import math
|
5
4
|
import os
|
5
|
+
import re
|
6
|
+
import tempfile
|
7
|
+
import shutil
|
8
|
+
import subprocess
|
6
9
|
import time
|
7
10
|
import uuid
|
8
11
|
|
9
12
|
import docker
|
10
13
|
import requests
|
14
|
+
from jinja2 import Environment, FileSystemLoader
|
11
15
|
|
12
|
-
from simplyblock_core import utils, scripts, constants, mgmt_node_ops, storage_node_ops
|
16
|
+
from simplyblock_core import utils, scripts, constants, mgmt_node_ops, storage_node_ops
|
13
17
|
from simplyblock_core.controllers import cluster_events, device_controller
|
14
18
|
from simplyblock_core.kv_store import DBController
|
15
19
|
from simplyblock_core.models.cluster import Cluster
|
@@ -17,10 +21,33 @@ from simplyblock_core.models.nvme_device import NVMeDevice
|
|
17
21
|
from simplyblock_core.models.storage_node import StorageNode
|
18
22
|
|
19
23
|
logger = logging.getLogger()
|
24
|
+
TOP_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
25
|
+
|
26
|
+
def _add_grafana_dashboards(username, password, cluster_ip):
|
27
|
+
url = f"http://{username}:{password}@{cluster_ip}/grafana/api/dashboards/import"
|
28
|
+
headers = {'Content-Type': 'application/json'}
|
29
|
+
dirpath, _, filenames = next(os.walk(os.path.join(constants.INSTALL_DIR, "scripts", "dashboards")))
|
30
|
+
ret = True
|
31
|
+
for filename in filenames:
|
32
|
+
with open(os.path.join(dirpath, filename), 'r') as f:
|
33
|
+
st = f.read()
|
34
|
+
# st = st.replace("$Cluster", cluster_id)
|
35
|
+
st = json.loads(st)
|
36
|
+
payload = json.dumps(st)
|
37
|
+
response = requests.post(url, headers=headers, data=payload)
|
38
|
+
logger.debug(response.status_code)
|
39
|
+
logger.debug(response.text)
|
40
|
+
if response.status_code == 200:
|
41
|
+
resp = response.json()
|
42
|
+
logger.info(f"Dashboard: {resp['title']}, imported: {resp['imported']}")
|
43
|
+
else:
|
44
|
+
logger.error(f"Error importing dashboard, status code:{response.status_code} text:{response.text}")
|
45
|
+
ret = False
|
46
|
+
return ret
|
20
47
|
|
21
48
|
|
22
49
|
def _add_graylog_input(cluster_ip, password):
|
23
|
-
url = f"http://{cluster_ip}
|
50
|
+
url = f"http://{cluster_ip}/graylog/api/system/inputs"
|
24
51
|
payload = json.dumps({
|
25
52
|
"title": "spdk log input",
|
26
53
|
"type": "org.graylog2.inputs.gelf.udp.GELFUDPInput",
|
@@ -47,7 +74,8 @@ def _add_graylog_input(cluster_ip, password):
|
|
47
74
|
|
48
75
|
|
49
76
|
def create_cluster(blk_size, page_size_in_blocks, cli_pass,
|
50
|
-
cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, ifname, log_del_interval, metrics_retention_period
|
77
|
+
cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, ifname, log_del_interval, metrics_retention_period,
|
78
|
+
contact_point, grafana_endpoint):
|
51
79
|
logger.info("Installing dependencies...")
|
52
80
|
ret = scripts.install_deps()
|
53
81
|
logger.info("Installing dependencies > Done")
|
@@ -102,6 +130,44 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
|
|
102
130
|
if prov_cap_crit and prov_cap_crit > 0:
|
103
131
|
c.prov_cap_crit = prov_cap_crit
|
104
132
|
|
133
|
+
alerts_template_folder = os.path.join(TOP_DIR, "simplyblock_core/scripts/alerting/")
|
134
|
+
alert_resources_file = "alert_resources.yaml"
|
135
|
+
|
136
|
+
env = Environment(loader=FileSystemLoader(alerts_template_folder), trim_blocks=True, lstrip_blocks=True)
|
137
|
+
template = env.get_template(f'{alert_resources_file}.j2')
|
138
|
+
|
139
|
+
slack_pattern = re.compile(r"https://hooks\.slack\.com/services/\S+")
|
140
|
+
email_pattern = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
|
141
|
+
|
142
|
+
if slack_pattern.match(contact_point):
|
143
|
+
ALERT_TYPE = "slack"
|
144
|
+
elif email_pattern.match(contact_point):
|
145
|
+
ALERT_TYPE = "email"
|
146
|
+
else:
|
147
|
+
ALERT_TYPE = "slack"
|
148
|
+
contact_point = 'https://hooks.slack.com/services/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'
|
149
|
+
|
150
|
+
values = {
|
151
|
+
'CONTACT_POINT': contact_point,
|
152
|
+
'GRAFANA_ENDPOINT': grafana_endpoint,
|
153
|
+
'ALERT_TYPE': ALERT_TYPE,
|
154
|
+
}
|
155
|
+
|
156
|
+
temp_dir = tempfile.mkdtemp()
|
157
|
+
|
158
|
+
temp_file_path = os.path.join(temp_dir, alert_resources_file)
|
159
|
+
with open(temp_file_path, 'w') as file:
|
160
|
+
file.write(template.render(values))
|
161
|
+
|
162
|
+
destination_file_path = os.path.join(alerts_template_folder, alert_resources_file)
|
163
|
+
try:
|
164
|
+
subprocess.run(['sudo', '-v'], check=True) # sudo -v checks if the current user has sudo permissions
|
165
|
+
subprocess.run(['sudo', 'mv', temp_file_path, destination_file_path], check=True)
|
166
|
+
print(f"File moved to {destination_file_path} successfully.")
|
167
|
+
except subprocess.CalledProcessError as e:
|
168
|
+
print(f"An error occurred: {e}")
|
169
|
+
shutil.rmtree(temp_dir)
|
170
|
+
|
105
171
|
logger.info("Deploying swarm stack ...")
|
106
172
|
ret = scripts.deploy_stack(cli_pass, DEV_IP, constants.SIMPLY_BLOCK_DOCKER_IMAGE, c.secret, c.uuid, log_del_interval, metrics_retention_period)
|
107
173
|
logger.info("Deploying swarm stack > Done")
|
@@ -122,8 +188,8 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
|
|
122
188
|
mgmt_node_ops.add_mgmt_node(DEV_IP, c.uuid)
|
123
189
|
|
124
190
|
logger.info("Applying dashboard...")
|
125
|
-
ret =
|
126
|
-
logger.info("Applying dashboard >
|
191
|
+
ret = _add_grafana_dashboards("admin", c.secret, DEV_IP)
|
192
|
+
logger.info(f"Applying dashboard > {ret}")
|
127
193
|
|
128
194
|
logger.info("New Cluster has been created")
|
129
195
|
logger.info(c.uuid)
|
@@ -176,203 +242,39 @@ def deploy_spdk(node_docker, spdk_cpu_mask, spdk_mem):
|
|
176
242
|
break
|
177
243
|
|
178
244
|
|
179
|
-
def
|
180
|
-
|
181
|
-
if role not in ["management", "storage", "storage-alloc"]:
|
182
|
-
logger.error(f"Unknown role: {role}")
|
183
|
-
return False
|
184
|
-
|
185
|
-
try:
|
186
|
-
resp = requests.get(f"http://{cluster_ip}/cluster/{cluster_id}")
|
187
|
-
resp_json = resp.json()
|
188
|
-
cluster_data = resp_json['results'][0]
|
189
|
-
logger.info(f"Cluster found! NQN:{cluster_data['nqn']}")
|
190
|
-
logger.debug(cluster_data)
|
191
|
-
except Exception as e:
|
192
|
-
logger.error("Error getting cluster data!")
|
193
|
-
logger.error(e)
|
194
|
-
return ""
|
195
|
-
|
196
|
-
logger.info("Installing dependencies...")
|
197
|
-
ret = scripts.install_deps()
|
198
|
-
logger.info("Installing dependencies > Done")
|
199
|
-
|
200
|
-
if not ifname:
|
201
|
-
ifname = "eth0"
|
202
|
-
|
203
|
-
DEV_IP = utils.get_iface_ip(ifname)
|
204
|
-
if not DEV_IP:
|
205
|
-
logger.error(f"Error getting interface ip: {ifname}")
|
206
|
-
return False
|
207
|
-
|
208
|
-
logger.info(f"Node IP: {DEV_IP}")
|
209
|
-
ret = scripts.configure_docker(DEV_IP)
|
210
|
-
|
211
|
-
db_connection = cluster_data['db_connection']
|
212
|
-
ret = scripts.set_db_config(db_connection)
|
213
|
-
|
214
|
-
if role == "storage":
|
215
|
-
logger.info("Deploying SPDK")
|
216
|
-
node_cpu_count = os.cpu_count()
|
217
|
-
if spdk_cpu_mask:
|
218
|
-
requested_cpu_count = len(format(int(spdk_cpu_mask, 16), 'b'))
|
219
|
-
if requested_cpu_count > node_cpu_count:
|
220
|
-
logger.error(f"The requested cpu count: {requested_cpu_count} "
|
221
|
-
f"is larger than the node's cpu count: {node_cpu_count}")
|
222
|
-
return False
|
223
|
-
else:
|
224
|
-
spdk_cpu_mask = hex(int(math.pow(2, node_cpu_count))-1)
|
225
|
-
if spdk_mem:
|
226
|
-
spdk_mem = int(spdk_mem/(1024*1024))
|
227
|
-
else:
|
228
|
-
spdk_mem = 4096
|
229
|
-
node_docker = docker.DockerClient(base_url=f"tcp://{DEV_IP}:2375", version="auto", timeout=60*5)
|
230
|
-
deploy_spdk(node_docker, spdk_cpu_mask, spdk_mem)
|
231
|
-
time.sleep(5)
|
232
|
-
|
233
|
-
logger.info("Joining docker swarm...")
|
245
|
+
def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit):
|
234
246
|
db_controller = DBController()
|
235
|
-
|
236
|
-
if not
|
237
|
-
logger.error("No
|
238
|
-
|
247
|
+
clusters = db_controller.get_clusters()
|
248
|
+
if not clusters:
|
249
|
+
logger.error("No previous clusters found!")
|
250
|
+
return False
|
239
251
|
|
240
|
-
|
241
|
-
|
242
|
-
|
252
|
+
default_cluster = clusters[0]
|
253
|
+
logger.info("Adding new cluster")
|
254
|
+
cluster = Cluster()
|
255
|
+
cluster.uuid = str(uuid.uuid4())
|
256
|
+
cluster.blk_size = blk_size
|
257
|
+
cluster.page_size_in_blocks = page_size_in_blocks
|
258
|
+
cluster.ha_type = default_cluster.ha_type
|
259
|
+
cluster.nqn = f"{constants.CLUSTER_NQN}:{cluster.uuid}"
|
260
|
+
cluster.cli_pass = default_cluster.cli_pass
|
261
|
+
cluster.secret = default_cluster.secret
|
262
|
+
cluster.db_connection = default_cluster.db_connection
|
263
|
+
if cap_warn and cap_warn > 0:
|
264
|
+
cluster.cap_warn = cap_warn
|
265
|
+
if cap_crit and cap_crit > 0:
|
266
|
+
cluster.cap_crit = cap_crit
|
267
|
+
if prov_cap_warn and prov_cap_warn > 0:
|
268
|
+
cluster.prov_cap_warn = prov_cap_warn
|
269
|
+
if prov_cap_crit and prov_cap_crit > 0:
|
270
|
+
cluster.prov_cap_crit = prov_cap_crit
|
243
271
|
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
node_docker = docker.DockerClient(base_url=f"tcp://{DEV_IP}:2375", version="auto")
|
250
|
-
if node_docker.info()["Swarm"]["LocalNodeState"] == "active":
|
251
|
-
logger.info("Node is part of another swarm, leaving swarm")
|
252
|
-
try:
|
253
|
-
cluster_docker.nodes.get(node_docker.info()["Swarm"]["NodeID"]).remove(force=True)
|
254
|
-
except:
|
255
|
-
pass
|
256
|
-
node_docker.swarm.leave(force=True)
|
257
|
-
time.sleep(5)
|
258
|
-
node_docker.swarm.join([f"{docker_ip}:2377"], join_token)
|
259
|
-
|
260
|
-
retries = 10
|
261
|
-
while retries > 0:
|
262
|
-
if node_docker.info()["Swarm"]["LocalNodeState"] == "active":
|
263
|
-
break
|
264
|
-
logger.info("Waiting for node to be active...")
|
265
|
-
retries -= 1
|
266
|
-
time.sleep(2)
|
267
|
-
logger.info("Joining docker swarm > Done")
|
268
|
-
time.sleep(5)
|
272
|
+
cluster.status = Cluster.STATUS_ACTIVE
|
273
|
+
cluster.updated_at = int(time.time())
|
274
|
+
cluster.write_to_db(db_controller.kv_store)
|
275
|
+
cluster_events.cluster_create(cluster)
|
269
276
|
|
270
|
-
|
271
|
-
raise e
|
272
|
-
|
273
|
-
if role == 'management':
|
274
|
-
mgmt_node_ops.add_mgmt_node(DEV_IP, cluster_id)
|
275
|
-
cluster_obj = db_controller.get_cluster_by_id(cluster_id)
|
276
|
-
nodes = db_controller.get_mgmt_nodes(cluster_id=cluster_id)
|
277
|
-
if len(nodes) >= 3:
|
278
|
-
logger.info("Waiting for FDB container to be active...")
|
279
|
-
fdb_cont = None
|
280
|
-
retries = 30
|
281
|
-
while retries > 0 and fdb_cont is None:
|
282
|
-
logger.info("Looking for FDB container...")
|
283
|
-
for cont in node_docker.containers.list(all=True):
|
284
|
-
logger.debug(cont.attrs['Name'])
|
285
|
-
if cont.attrs['Name'].startswith("/app_fdb"):
|
286
|
-
fdb_cont = cont
|
287
|
-
break
|
288
|
-
if fdb_cont:
|
289
|
-
logger.info("FDB container found")
|
290
|
-
break
|
291
|
-
else:
|
292
|
-
retries -= 1
|
293
|
-
time.sleep(5)
|
294
|
-
|
295
|
-
if not fdb_cont:
|
296
|
-
logger.warning("FDB container was not found")
|
297
|
-
else:
|
298
|
-
retries = 10
|
299
|
-
while retries > 0:
|
300
|
-
info = node_docker.containers.get(fdb_cont.attrs['Id'])
|
301
|
-
status = info.attrs['State']["Status"]
|
302
|
-
is_running = info.attrs['State']["Running"]
|
303
|
-
if not is_running:
|
304
|
-
logger.info("Container is not running, waiting...")
|
305
|
-
time.sleep(3)
|
306
|
-
retries -= 1
|
307
|
-
else:
|
308
|
-
logger.info(f"Container status: {status}, Is Running: {is_running}")
|
309
|
-
break
|
310
|
-
|
311
|
-
logger.info("Configuring Double DB...")
|
312
|
-
time.sleep(3)
|
313
|
-
out = scripts.set_db_config_double()
|
314
|
-
cluster_obj.ha_type = "ha"
|
315
|
-
cluster_obj.write_to_db(db_controller.kv_store)
|
316
|
-
logger.info("Configuring DB > Done")
|
317
|
-
|
318
|
-
elif role == "storage":
|
319
|
-
# add storage node
|
320
|
-
fdb_cont = None
|
321
|
-
retries = 30
|
322
|
-
while retries > 0 and fdb_cont is None:
|
323
|
-
logger.info("Looking for SpdkAppProxy container...")
|
324
|
-
for cont in node_docker.containers.list(all=True):
|
325
|
-
logger.debug(cont.attrs['Name'])
|
326
|
-
if cont.attrs['Name'].startswith("/app_SpdkAppProxy"):
|
327
|
-
fdb_cont = cont
|
328
|
-
break
|
329
|
-
if fdb_cont:
|
330
|
-
logger.info("SpdkAppProxy container found")
|
331
|
-
break
|
332
|
-
else:
|
333
|
-
retries -= 1
|
334
|
-
time.sleep(5)
|
335
|
-
|
336
|
-
if not fdb_cont:
|
337
|
-
logger.warning("SpdkAppProxy container was not found")
|
338
|
-
else:
|
339
|
-
retries = 10
|
340
|
-
while retries > 0:
|
341
|
-
info = node_docker.containers.get(fdb_cont.attrs['Id'])
|
342
|
-
status = info.attrs['State']["Status"]
|
343
|
-
is_running = info.attrs['State']["Running"]
|
344
|
-
if not is_running:
|
345
|
-
logger.info("Container is not running, waiting...")
|
346
|
-
time.sleep(3)
|
347
|
-
retries -= 1
|
348
|
-
else:
|
349
|
-
logger.info(f"Container status: {status}, Is Running: {is_running}")
|
350
|
-
break
|
351
|
-
storage_node_ops.add_storage_node(cluster_id, ifname, data_nics)
|
352
|
-
|
353
|
-
logger.info("Node joined the cluster")
|
354
|
-
|
355
|
-
|
356
|
-
def add_cluster(blk_size, page_size_in_blocks, model_ids, tls,
|
357
|
-
auth_hosts_only, dhchap, nqn, iscsi, cli_pass):
|
358
|
-
db_controller = DBController()
|
359
|
-
logger.info("Adding new cluster")
|
360
|
-
c = Cluster()
|
361
|
-
c.uuid = str(uuid.uuid4())
|
362
|
-
c.blk_size = blk_size
|
363
|
-
c.page_size_in_blocks = page_size_in_blocks
|
364
|
-
c.model_ids = model_ids
|
365
|
-
c.tls = tls
|
366
|
-
c.auth_hosts_only = auth_hosts_only
|
367
|
-
c.nqn = nqn
|
368
|
-
c.iscsi = iscsi
|
369
|
-
c.dhchap = dhchap
|
370
|
-
c.cli_pass = cli_pass
|
371
|
-
c.status = Cluster.STATUS_ACTIVE
|
372
|
-
c.updated_at = int(time.time())
|
373
|
-
c.write_to_db(db_controller.kv_store)
|
374
|
-
logger.info("New Cluster has been created")
|
375
|
-
logger.info(c.uuid)
|
277
|
+
return cluster.get_id()
|
376
278
|
|
377
279
|
|
378
280
|
def show_cluster(cl_id, is_json=False):
|
@@ -382,7 +284,7 @@ def show_cluster(cl_id, is_json=False):
|
|
382
284
|
logger.error(f"Cluster not found {cl_id}")
|
383
285
|
return False
|
384
286
|
|
385
|
-
st = db_controller.
|
287
|
+
st = db_controller.get_storage_nodes_by_cluster_id(cl_id)
|
386
288
|
data = []
|
387
289
|
for node in st:
|
388
290
|
for dev in node.nvme_devices:
|
@@ -443,7 +345,7 @@ def cluster_set_read_only(cl_id):
|
|
443
345
|
|
444
346
|
ret = set_cluster_status(cl_id, Cluster.STATUS_READONLY)
|
445
347
|
if ret:
|
446
|
-
st = db_controller.
|
348
|
+
st = db_controller.get_storage_nodes_by_cluster_id(cl_id)
|
447
349
|
for node in st:
|
448
350
|
for dev in node.nvme_devices:
|
449
351
|
if dev.status == NVMeDevice.STATUS_ONLINE:
|
@@ -463,7 +365,7 @@ def cluster_set_active(cl_id):
|
|
463
365
|
|
464
366
|
ret = set_cluster_status(cl_id, Cluster.STATUS_ACTIVE)
|
465
367
|
if ret:
|
466
|
-
st = db_controller.
|
368
|
+
st = db_controller.get_storage_nodes_by_cluster_id(cl_id)
|
467
369
|
for node in st:
|
468
370
|
for dev in node.nvme_devices:
|
469
371
|
if dev.status == NVMeDevice.STATUS_READONLY:
|
@@ -474,11 +376,11 @@ def cluster_set_active(cl_id):
|
|
474
376
|
def list():
|
475
377
|
db_controller = DBController()
|
476
378
|
cls = db_controller.get_clusters()
|
477
|
-
st = db_controller.get_storage_nodes()
|
478
379
|
mt = db_controller.get_mgmt_nodes()
|
479
380
|
|
480
381
|
data = []
|
481
382
|
for cl in cls:
|
383
|
+
st = db_controller.get_storage_nodes_by_cluster_id(cl.get_id())
|
482
384
|
data.append({
|
483
385
|
"UUID": cl.id,
|
484
386
|
"NQN": cl.nqn,
|
@@ -623,7 +525,7 @@ def get_logs(cluster_id, is_json=False):
|
|
623
525
|
vuid = record.object_dict['vuid']
|
624
526
|
|
625
527
|
out.append({
|
626
|
-
"Date":
|
528
|
+
"Date": record.get_date_string(),
|
627
529
|
"NodeId": record.node_id,
|
628
530
|
"Event": record.event,
|
629
531
|
"Level": record.event_level,
|
@@ -655,12 +557,12 @@ def update_cluster(cl_id):
|
|
655
557
|
logger.error(f"Cluster not found {cl_id}")
|
656
558
|
return False
|
657
559
|
|
658
|
-
try:
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
except Exception as e:
|
663
|
-
|
560
|
+
# try:
|
561
|
+
# out, _, ret_code = shell_utils.run_command("pip install sbcli-dev --upgrade")
|
562
|
+
# if ret_code == 0:
|
563
|
+
# logger.info("sbcli-dev is upgraded")
|
564
|
+
# except Exception as e:
|
565
|
+
# logger.error(e)
|
664
566
|
|
665
567
|
try:
|
666
568
|
logger.info("Updating mgmt cluster")
|
@@ -675,7 +577,7 @@ def update_cluster(cl_id):
|
|
675
577
|
except Exception as e:
|
676
578
|
print(e)
|
677
579
|
|
678
|
-
for node in db_controller.
|
580
|
+
for node in db_controller.get_storage_nodes_by_cluster_id(cl_id):
|
679
581
|
node_docker = docker.DockerClient(base_url=f"tcp://{node.mgmt_ip}:2375", version="auto")
|
680
582
|
logger.info(f"Pulling image {constants.SIMPLY_BLOCK_SPDK_ULTRA_IMAGE}")
|
681
583
|
node_docker.images.pull(constants.SIMPLY_BLOCK_SPDK_ULTRA_IMAGE)
|
@@ -688,28 +590,6 @@ def update_cluster(cl_id):
|
|
688
590
|
return True
|
689
591
|
|
690
592
|
|
691
|
-
def list_tasks(cluster_id):
|
692
|
-
db_controller = DBController()
|
693
|
-
cluster = db_controller.get_cluster_by_id(cluster_id)
|
694
|
-
if not cluster:
|
695
|
-
logger.error("Cluster not found: %s", cluster_id)
|
696
|
-
return False
|
697
|
-
|
698
|
-
data = []
|
699
|
-
tasks = db_controller.get_job_tasks(cluster_id)
|
700
|
-
for task in tasks:
|
701
|
-
data.append({
|
702
|
-
"UUID": task.uuid,
|
703
|
-
"Device": task.device_id,
|
704
|
-
"Function": task.function_name,
|
705
|
-
"Retry": f"{task.retry}/{constants.TASK_EXEC_RETRY_COUNT}",
|
706
|
-
"Status": task.status,
|
707
|
-
"Result": task.function_result,
|
708
|
-
"Date": time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(task.date)),
|
709
|
-
})
|
710
|
-
return utils.print_table(data)
|
711
|
-
|
712
|
-
|
713
593
|
def cluster_grace_startup(cl_id):
|
714
594
|
db_controller = DBController()
|
715
595
|
cluster = db_controller.get_cluster_by_id(cl_id)
|
@@ -719,7 +599,7 @@ def cluster_grace_startup(cl_id):
|
|
719
599
|
logger.info(f"Unsuspending cluster: {cl_id}")
|
720
600
|
unsuspend_cluster(cl_id)
|
721
601
|
|
722
|
-
st = db_controller.
|
602
|
+
st = db_controller.get_storage_nodes_by_cluster_id(cl_id)
|
723
603
|
for node in st:
|
724
604
|
logger.info(f"Restarting node: {node.get_id()}")
|
725
605
|
storage_node_ops.restart_storage_node(node.get_id())
|
@@ -738,7 +618,7 @@ def cluster_grace_shutdown(cl_id):
|
|
738
618
|
logger.error(f"Cluster not found {cl_id}")
|
739
619
|
return False
|
740
620
|
|
741
|
-
st = db_controller.
|
621
|
+
st = db_controller.get_storage_nodes_by_cluster_id(cl_id)
|
742
622
|
for node in st:
|
743
623
|
logger.info(f"Suspending node: {node.get_id()}")
|
744
624
|
storage_node_ops.suspend_storage_node(node.get_id())
|
@@ -748,3 +628,26 @@ def cluster_grace_shutdown(cl_id):
|
|
748
628
|
logger.info(f"Suspending cluster: {cl_id}")
|
749
629
|
suspend_cluster(cl_id)
|
750
630
|
return True
|
631
|
+
|
632
|
+
|
633
|
+
def delete_cluster(cl_id):
|
634
|
+
db_controller = DBController()
|
635
|
+
cluster = db_controller.get_cluster_by_id(cl_id)
|
636
|
+
if not cluster:
|
637
|
+
logger.error(f"Cluster not found {cl_id}")
|
638
|
+
return False
|
639
|
+
|
640
|
+
nodes = db_controller.get_storage_nodes_by_cluster_id(cl_id)
|
641
|
+
if nodes:
|
642
|
+
logger.error("Can only remove Empty cluster, Storage nodes found")
|
643
|
+
return False
|
644
|
+
|
645
|
+
pools = db_controller.get_pools(cl_id)
|
646
|
+
if pools:
|
647
|
+
logger.error("Can only remove Empty cluster, Pools found")
|
648
|
+
return False
|
649
|
+
|
650
|
+
logger.info(f"Deleting Cluster {cl_id}")
|
651
|
+
cluster_events.cluster_delete(cluster)
|
652
|
+
cluster.remove(db_controller.kv_store)
|
653
|
+
logger.info("Done")
|
@@ -0,0 +1,91 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
|
4
|
+
KVD_DB_VERSION = 730
|
5
|
+
KVD_DB_FILE_PATH = '/etc/foundationdb/fdb.cluster'
|
6
|
+
KVD_DB_TIMEOUT_MS = 10000
|
7
|
+
SPK_DIR = '/home/ec2-user/spdk'
|
8
|
+
RPC_HTTP_PROXY_PORT = 8080
|
9
|
+
LOG_LEVEL = logging.INFO
|
10
|
+
LOG_WEB_DEBUG = True
|
11
|
+
|
12
|
+
INSTALL_DIR = os.path.dirname(os.path.realpath(__file__))
|
13
|
+
|
14
|
+
NODE_MONITOR_INTERVAL_SEC = 3
|
15
|
+
DEVICE_MONITOR_INTERVAL_SEC = 5
|
16
|
+
STAT_COLLECTOR_INTERVAL_SEC = 60*5 # 5 minutes
|
17
|
+
LVOL_STAT_COLLECTOR_INTERVAL_SEC = 2
|
18
|
+
LVOL_MONITOR_INTERVAL_SEC = 60
|
19
|
+
DEV_MONITOR_INTERVAL_SEC = 10
|
20
|
+
DEV_STAT_COLLECTOR_INTERVAL_SEC = 2
|
21
|
+
PROT_STAT_COLLECTOR_INTERVAL_SEC = 2
|
22
|
+
DISTR_EVENT_COLLECTOR_INTERVAL_SEC = 2
|
23
|
+
DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS = 10
|
24
|
+
CAP_MONITOR_INTERVAL_SEC = 30
|
25
|
+
SSD_VENDOR_WHITE_LIST = ["1d0f:cd01", "1d0f:cd00"]
|
26
|
+
|
27
|
+
PMEM_DIR = '/tmp/pmem'
|
28
|
+
|
29
|
+
NVME_PROGRAM_FAIL_COUNT = 50
|
30
|
+
NVME_ERASE_FAIL_COUNT = 50
|
31
|
+
NVME_CRC_ERROR_COUNT = 50
|
32
|
+
DEVICE_OVERLOAD_STDEV_VALUE = 50
|
33
|
+
DEVICE_OVERLOAD_CAPACITY_THRESHOLD = 50
|
34
|
+
|
35
|
+
CLUSTER_NQN = "nqn.2023-02.io.simplyblock"
|
36
|
+
|
37
|
+
weights = {
|
38
|
+
"lvol": 50,
|
39
|
+
"cpu": 10,
|
40
|
+
"r_io": 10,
|
41
|
+
"w_io": 10,
|
42
|
+
"r_b": 10,
|
43
|
+
"w_b": 10
|
44
|
+
}
|
45
|
+
|
46
|
+
# To use 75% of hugepages to calculate ssd size to use for the ocf bdev
|
47
|
+
CACHING_NODE_MEMORY_FACTOR = 0.75
|
48
|
+
|
49
|
+
HEALTH_CHECK_INTERVAL_SEC = 60
|
50
|
+
|
51
|
+
GRAYLOG_CHECK_INTERVAL_SEC = 60
|
52
|
+
|
53
|
+
FDB_CHECK_INTERVAL_SEC = 60
|
54
|
+
|
55
|
+
SIMPLY_BLOCK_DOCKER_IMAGE = "simplyblock/simplyblock:pre-release"
|
56
|
+
SIMPLY_BLOCK_CLI_NAME = "sbcli"
|
57
|
+
TASK_EXEC_INTERVAL_SEC = 30
|
58
|
+
TASK_EXEC_RETRY_COUNT = 8
|
59
|
+
|
60
|
+
SIMPLY_BLOCK_SPDK_CORE_IMAGE = "simplyblock/spdk-core:latest"
|
61
|
+
SIMPLY_BLOCK_SPDK_ULTRA_IMAGE = "simplyblock/spdk:prerelease-latest"
|
62
|
+
|
63
|
+
GELF_PORT = 12201
|
64
|
+
|
65
|
+
MIN_HUGE_PAGE_MEMORY_FOR_LVOL = 209715200
|
66
|
+
MIN_SYS_MEMORY_FOR_LVOL = 524288000
|
67
|
+
EXTRA_SMALL_POOL_COUNT = 1024
|
68
|
+
EXTRA_LARGE_POOL_COUNT = 128
|
69
|
+
EXTRA_HUGE_PAGE_MEMORY = 2147483648
|
70
|
+
EXTRA_SYS_MEMORY = 2147483648
|
71
|
+
|
72
|
+
INSTANCE_STORAGE_DATA = {
|
73
|
+
'i4i.large': {'number_of_devices': 1, 'size_per_device_gb': 468},
|
74
|
+
'i4i.xlarge': {'number_of_devices': 1, 'size_per_device_gb': 937},
|
75
|
+
'i4i.2xlarge': {'number_of_devices': 1, 'size_per_device_gb': 1875},
|
76
|
+
'i4i.4xlarge': {'number_of_devices': 1, 'size_per_device_gb': 3750},
|
77
|
+
'i4i.8xlarge': {'number_of_devices': 2, 'size_per_device_gb': 3750},
|
78
|
+
'i4i.12xlarge': {'number_of_devices': 3, 'size_per_device_gb': 3750},
|
79
|
+
'i4i.16xlarge': {'number_of_devices': 4, 'size_per_device_gb': 3750},
|
80
|
+
'i4i.24xlarge': {'number_of_devices': 6, 'size_per_device_gb': 3750},
|
81
|
+
'i4i.32xlarge': {'number_of_devices': 8, 'size_per_device_gb': 3750},
|
82
|
+
'i4i.metal': {'number_of_devices': 8, 'size_per_device_gb': 3750},
|
83
|
+
'i3en.large': {'number_of_devices': 1, 'size_per_device_gb': 1250},
|
84
|
+
'i3en.xlarge': {'number_of_devices': 1, 'size_per_device_gb': 2500},
|
85
|
+
'i3en.2xlarge': {'number_of_devices': 2, 'size_per_device_gb': 2500},
|
86
|
+
'i3en.3xlarge': {'number_of_devices': 1, 'size_per_device_gb': 7500},
|
87
|
+
'i3en.6xlarge': {'number_of_devices': 2, 'size_per_device_gb': 7500},
|
88
|
+
'i3en.12xlarge': {'number_of_devices': 4, 'size_per_device_gb': 7500},
|
89
|
+
'i3en.24xlarge': {'number_of_devices': 8, 'size_per_device_gb': 7500},
|
90
|
+
'i3en.metal': {'number_of_devices': 8, 'size_per_device_gb': 7500},
|
91
|
+
}
|
@@ -66,7 +66,6 @@ def addNvmeDevices(cluster, rpc_client, devs, snode):
|
|
66
66
|
'model_id': model_number,
|
67
67
|
'serial_number': nvme_driver_data['ctrlr_data']['serial_number'],
|
68
68
|
'nvme_bdev': nvme_bdev,
|
69
|
-
'alloc_bdev': nvme_bdev,
|
70
69
|
'node_id': snode.get_id(),
|
71
70
|
'cluster_id': snode.cluster_id,
|
72
71
|
'status': 'online'
|
@@ -88,10 +87,10 @@ def add_node(cluster_id, node_ip, iface_name, data_nics_list, spdk_cpu_mask, spd
|
|
88
87
|
snode_api = CNodeClient(node_ip)
|
89
88
|
|
90
89
|
node_info, _ = snode_api.info()
|
91
|
-
|
92
|
-
|
90
|
+
system_id = node_info['system_id']
|
93
91
|
hostname = node_info['hostname']
|
94
|
-
|
92
|
+
logger.info(f"Node found: {node_info['hostname']}")
|
93
|
+
snode = db_controller.get_caching_node_by_system_id(system_id)
|
95
94
|
if snode:
|
96
95
|
logger.error("Node already exists, try remove it first.")
|
97
96
|
return False
|
@@ -119,9 +118,8 @@ def add_node(cluster_id, node_ip, iface_name, data_nics_list, spdk_cpu_mask, spd
|
|
119
118
|
snode.uuid = str(uuid.uuid4())
|
120
119
|
snode.status = CachingNode.STATUS_IN_CREATION
|
121
120
|
# snode.baseboard_sn = node_info['system_id']
|
122
|
-
snode.system_uuid =
|
121
|
+
snode.system_uuid = system_id
|
123
122
|
snode.hostname = hostname
|
124
|
-
# snode.host_nqn = subsystem_nqn
|
125
123
|
snode.subsystem = subsystem_nqn
|
126
124
|
snode.data_nics = data_nics
|
127
125
|
snode.mgmt_ip = node_info['network_interface'][iface_name]['ip']
|
@@ -350,6 +348,10 @@ def connect(caching_node_id, lvol_id):
|
|
350
348
|
logger.info(f"Already connected, dev path: {clvol.device_path}")
|
351
349
|
return False
|
352
350
|
|
351
|
+
if cnode.cluster_id != pool.cluster_id:
|
352
|
+
logger.error("Caching node and LVol are in different clusters")
|
353
|
+
return False
|
354
|
+
|
353
355
|
logger.info("Connecting to remote LVOL")
|
354
356
|
mini_id = lvol.get_id().split("-")[0]
|
355
357
|
rem_name = f"rem_{mini_id}"
|
@@ -60,3 +60,12 @@ def cluster_prov_cap_crit(cluster, util):
|
|
60
60
|
msg = f"Cluster provisioned capacity reached: {util}%"
|
61
61
|
_cluster_cap_event(cluster, msg, event_level=EventObj.LEVEL_CRITICAL)
|
62
62
|
|
63
|
+
|
64
|
+
def cluster_delete(cluster):
|
65
|
+
ec.log_event_cluster(
|
66
|
+
cluster_id=cluster.get_id(),
|
67
|
+
domain=ec.DOMAIN_CLUSTER,
|
68
|
+
event=ec.EVENT_OBJ_DELETED,
|
69
|
+
db_object=cluster,
|
70
|
+
caused_by=ec.CAUSED_BY_CLI,
|
71
|
+
message=f"Cluster deleted {cluster.get_id()}")
|