sbcli-pre 1.2.4__zip → 1.2.5__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/PKG-INFO +20 -5
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/README.md +19 -4
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/env_var +1 -1
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/sbcli_pre.egg-info/PKG-INFO +20 -5
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/sbcli_pre.egg-info/SOURCES.txt +5 -5
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_cli/cli.py +115 -113
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/cluster_ops.py +238 -141
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/constants.py +7 -5
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/caching_node_controller.py +6 -8
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/cluster_events.py +0 -9
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/device_controller.py +63 -56
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/events_controller.py +3 -5
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/health_controller.py +40 -30
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/lvol_controller.py +38 -51
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/pool_controller.py +4 -8
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/snapshot_controller.py +3 -9
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/distr_controller.py +9 -13
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/kv_store.py +29 -47
- sbcli_pre-1.2.5/simplyblock_core/mgmt_node_ops.py +80 -0
- sbcli_pre-1.2.5/simplyblock_core/models/deployer.py +62 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/events.py +1 -9
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/job_schedule.py +0 -6
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/nvme_device.py +4 -42
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/storage_node.py +1 -9
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/rpc_client.py +10 -55
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/__init__.py +4 -0
- sbcli_pre-1.2.4/simplyblock_core/scripts/alerting/alert_resources.yaml.j2 → sbcli_pre-1.2.5/simplyblock_core/scripts/alerting/alert_resources.yaml +5 -54
- sbcli_pre-1.2.5/simplyblock_core/scripts/apply_dashboard.sh +22 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/dashboards/cluster.json +1 -1
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/deploy_stack.sh +0 -2
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/docker-compose-swarm-monitoring.yml +13 -22
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/docker-compose-swarm.yml +2 -17
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/haproxy.cfg +0 -15
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/install_deps.sh +0 -1
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/capacity_and_stats_collector.py +1 -1
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/device_monitor.py +46 -5
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/distr_event_collector.py +11 -10
- sbcli_pre-1.2.5/simplyblock_core/services/health_check_service.py +136 -0
- sbcli_pre-1.2.4/simplyblock_core/services/tasks_runner_restart.py → sbcli_pre-1.2.5/simplyblock_core/services/job_tasks.py +46 -95
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/lvol_monitor.py +1 -1
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/lvol_stat_collector.py +1 -1
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/port_stat_collector.py +1 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/storage_node_monitor.py +44 -49
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/snode_client.py +0 -12
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/storage_node_ops.py +336 -525
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/utils.py +1 -46
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/app.py +2 -1
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/snode_ops.py +25 -103
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_cluster.py +43 -20
- sbcli_pre-1.2.5/simplyblock_web/blueprints/web_api_deployer.py +394 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_device.py +7 -10
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_lvol.py +5 -9
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_pool.py +5 -14
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_storage_node.py +10 -3
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/node_utils.py +2 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/utils.py +0 -8
- sbcli_pre-1.2.4/simplyblock_core/controllers/tasks_controller.py +0 -103
- sbcli_pre-1.2.4/simplyblock_core/controllers/tasks_events.py +0 -37
- sbcli_pre-1.2.4/simplyblock_core/mgmt_node_ops.py +0 -205
- sbcli_pre-1.2.4/simplyblock_core/services/health_check_service.py +0 -134
- sbcli_pre-1.2.4/simplyblock_core/services/tasks_runner_migration.py +0 -61
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/pyproject.toml +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/sbcli_pre.egg-info/dependency_links.txt +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/sbcli_pre.egg-info/entry_points.txt +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/sbcli_pre.egg-info/requires.txt +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/sbcli_pre.egg-info/top_level.txt +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/setup.cfg +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/setup.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_cli/main.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/__init__.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/cnode_client.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/compute_node_ops.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/__init__.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/device_events.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/lvol_events.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/mgmt_events.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/pool_events.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/snapshot_events.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/storage_events.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/__init__.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/base_model.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/caching_node.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/cluster.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/compute_node.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/global_settings.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/iface.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/lvol_model.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/mgmt_node.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/pool.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/port_stat.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/snapshot.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/stats.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/pci_utils.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/alerting/alert_rules.yaml +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/clean_local_storage_deploy.sh +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/config_docker.sh +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/dashboards/devices.json +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/dashboards/lvols.json +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/dashboards/node-exporter.json +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/dashboards/nodes.json +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/dashboards/pools.json +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/datasource.yml +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/db_config_double.sh +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/db_config_single.sh +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/prometheus.yml +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/run_ssh.sh +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/set_db_config.sh +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/stack_deploy_wait.sh +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/__init__.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/caching_node_monitor.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/cap_monitor.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/install_service.sh +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/log_agg_service.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/mgmt_node_monitor.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/remove_service.sh +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/service_template.service +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/shell_utils.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/__init__.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/auth_middleware.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/__init__.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/caching_node_ops.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/caching_node_ops_k8s.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/node_api_basic.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/node_api_caching_docker.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/node_api_caching_ks.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_caching_node.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_mgmt_node.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_snapshot.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/caching_node_app.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/caching_node_app_k8s.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/node_webapp.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/snode_app.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/delete.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/deploy.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/deploy_cnode.yaml +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/deploy_spdk.yaml +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/is_up.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/list_deps.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/rpac.yaml +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/tst.py +0 -0
- {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/templates/deploy_spdk.yaml.j2 +0 -0
@@ -14,11 +14,11 @@ import docker
|
|
14
14
|
from simplyblock_core import constants, scripts, distr_controller
|
15
15
|
from simplyblock_core import utils
|
16
16
|
from simplyblock_core.controllers import lvol_controller, storage_events, snapshot_controller, device_events, \
|
17
|
-
device_controller
|
17
|
+
device_controller
|
18
18
|
from simplyblock_core.kv_store import DBController
|
19
19
|
from simplyblock_core import shell_utils
|
20
20
|
from simplyblock_core.models.iface import IFace
|
21
|
-
from simplyblock_core.models.nvme_device import NVMeDevice
|
21
|
+
from simplyblock_core.models.nvme_device import NVMeDevice
|
22
22
|
from simplyblock_core.models.storage_node import StorageNode
|
23
23
|
from simplyblock_core.pci_utils import get_nvme_devices, bind_spdk_driver
|
24
24
|
from simplyblock_core.rpc_client import RPCClient
|
@@ -81,50 +81,55 @@ def _get_if_ip_address(ifname):
|
|
81
81
|
|
82
82
|
|
83
83
|
def addNvmeDevices(cluster, rpc_client, devs, snode):
|
84
|
+
sequential_number = 0
|
84
85
|
devices = []
|
85
86
|
ret = rpc_client.bdev_nvme_controller_list()
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
except:
|
91
|
-
pass
|
87
|
+
if ret:
|
88
|
+
ctr_map = {i["ctrlrs"][0]['trid']['traddr']: i["name"] for i in ret}
|
89
|
+
else:
|
90
|
+
ctr_map = {}
|
92
91
|
|
93
|
-
next_physical_label = get_next_physical_device_order()
|
94
92
|
for index, pcie in enumerate(devs):
|
95
93
|
|
96
94
|
if pcie in ctr_map:
|
97
|
-
|
95
|
+
nvme_bdev = ctr_map[pcie] + "n1"
|
98
96
|
else:
|
99
|
-
|
100
|
-
ret, err = rpc_client.bdev_nvme_controller_attach(
|
97
|
+
name = "nvme_%s" % index
|
98
|
+
ret, err = rpc_client.bdev_nvme_controller_attach(name, pcie)
|
101
99
|
time.sleep(2)
|
100
|
+
nvme_bdev = f"{name}n1"
|
102
101
|
|
103
|
-
nvme_bdev = f"{nvme_controller}n1"
|
104
|
-
rpc_client.bdev_examine(nvme_bdev)
|
105
|
-
time.sleep(5)
|
106
102
|
ret = rpc_client.get_bdevs(nvme_bdev)
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
103
|
+
if ret:
|
104
|
+
nvme_dict = ret[0]
|
105
|
+
nvme_driver_data = nvme_dict['driver_specific']['nvme'][0]
|
106
|
+
model_number = nvme_driver_data['ctrlr_data']['model_number']
|
111
107
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
108
|
+
size = nvme_dict['block_size'] * nvme_dict['num_blocks']
|
109
|
+
device_partitions_count = int(size / (cluster.blk_size * cluster.page_size_in_blocks))
|
110
|
+
devices.append(
|
111
|
+
NVMeDevice({
|
112
|
+
'uuid': str(uuid.uuid4()),
|
113
|
+
'device_name': nvme_dict['name'],
|
114
|
+
'sequential_number': sequential_number,
|
115
|
+
'partitions_count': device_partitions_count,
|
116
|
+
'capacity': size,
|
117
|
+
'size': size,
|
118
|
+
'pcie_address': nvme_driver_data['pci_address'],
|
119
|
+
'model_id': model_number,
|
120
|
+
'serial_number': nvme_driver_data['ctrlr_data']['serial_number'],
|
121
|
+
'nvme_bdev': nvme_bdev,
|
122
|
+
'alloc_bdev': nvme_bdev,
|
123
|
+
'node_id': snode.get_id(),
|
124
|
+
'cluster_id': snode.cluster_id,
|
125
|
+
|
126
|
+
# 'nvmf_nqn': subsystem_nqn,
|
127
|
+
# 'nvmf_ip': IP,
|
128
|
+
# 'nvmf_port': 4420,
|
129
|
+
|
130
|
+
'status': 'online'
|
131
|
+
}))
|
132
|
+
sequential_number += device_partitions_count
|
128
133
|
return devices
|
129
134
|
|
130
135
|
|
@@ -192,10 +197,10 @@ def _run_nvme_smart_log_add(dev_name):
|
|
192
197
|
return data
|
193
198
|
|
194
199
|
|
195
|
-
def get_next_cluster_device_order(db_controller
|
200
|
+
def get_next_cluster_device_order(db_controller):
|
196
201
|
max_order = 0
|
197
202
|
found = False
|
198
|
-
for node in db_controller.
|
203
|
+
for node in db_controller.get_storage_nodes():
|
199
204
|
for dev in node.nvme_devices:
|
200
205
|
found = True
|
201
206
|
max_order = max(max_order, dev.cluster_device_order)
|
@@ -204,320 +209,92 @@ def get_next_cluster_device_order(db_controller, cluster_id):
|
|
204
209
|
return 0
|
205
210
|
|
206
211
|
|
207
|
-
def
|
212
|
+
def _prepare_cluster_devices(snode, after_restart=False):
|
208
213
|
db_controller = DBController()
|
209
|
-
max_order = 0
|
210
|
-
found = False
|
211
|
-
for node in db_controller.get_storage_nodes():
|
212
|
-
for dev in node.nvme_devices:
|
213
|
-
found = True
|
214
|
-
max_order = max(max_order, dev.physical_label)
|
215
|
-
if found:
|
216
|
-
return max_order + 1
|
217
|
-
return 0
|
218
|
-
|
219
|
-
|
220
|
-
def _search_for_partitions(rpc_client, nvme_device):
|
221
|
-
partitioned_devices = []
|
222
|
-
for bdev in rpc_client.get_bdevs():
|
223
|
-
name = bdev['name']
|
224
|
-
if name.startswith(f"{nvme_device.nvme_bdev}p"):
|
225
|
-
new_dev = NVMeDevice(nvme_device.to_dict())
|
226
|
-
new_dev.uuid = str(uuid.uuid4())
|
227
|
-
new_dev.device_name = name
|
228
|
-
new_dev.nvme_bdev = name
|
229
|
-
new_dev.size = bdev['block_size'] * bdev['num_blocks']
|
230
|
-
partitioned_devices.append(new_dev)
|
231
|
-
return partitioned_devices
|
232
|
-
|
233
214
|
|
234
|
-
def _create_jm_stack_on_raid(rpc_client, jm_nvme_bdevs, snode, after_restart):
|
235
|
-
raid_bdev = f"raid_jm_{snode.get_id()}"
|
236
|
-
ret = rpc_client.bdev_raid_create(raid_bdev, jm_nvme_bdevs)
|
237
|
-
if not ret:
|
238
|
-
logger.error(f"Failed to create raid_jm_{snode.get_id()}")
|
239
|
-
return False
|
240
|
-
alceml_name = f"alceml_jm_{snode.get_id()}"
|
241
|
-
pba_init_mode = 3
|
242
|
-
if after_restart:
|
243
|
-
pba_init_mode = 2
|
244
|
-
ret = rpc_client.bdev_alceml_create(alceml_name, raid_bdev, str(uuid.uuid4()), pba_init_mode=pba_init_mode)
|
245
|
-
if not ret:
|
246
|
-
logger.error(f"Failed to create alceml bdev: {alceml_name}")
|
247
|
-
return False
|
248
|
-
|
249
|
-
jm_bdev = f"jm_{snode.get_id()}"
|
250
|
-
ret = rpc_client.bdev_jm_create(jm_bdev, alceml_name)
|
251
|
-
if not ret:
|
252
|
-
logger.error(f"Failed to create {jm_bdev}")
|
253
|
-
return False
|
254
|
-
ret = rpc_client.get_bdevs(raid_bdev)
|
255
|
-
|
256
|
-
return JMDevice({
|
257
|
-
'uuid': str(uuid.uuid4()),
|
258
|
-
'device_name': jm_bdev,
|
259
|
-
'size': ret[0]["block_size"] * ret[0]["num_blocks"],
|
260
|
-
'status': JMDevice.STATUS_ONLINE,
|
261
|
-
'jm_nvme_bdev_list': jm_nvme_bdevs,
|
262
|
-
'raid_bdev': raid_bdev,
|
263
|
-
'alceml_bdev': alceml_name,
|
264
|
-
'jm_bdev': jm_bdev
|
265
|
-
})
|
266
|
-
|
267
|
-
|
268
|
-
def _create_jm_stack_on_device(rpc_client, nvme, snode, after_restart):
|
269
|
-
|
270
|
-
alceml_id = nvme.get_id()
|
271
|
-
alceml_name = device_controller.get_alceml_name(alceml_id)
|
272
|
-
logger.info(f"adding {alceml_name}")
|
273
|
-
|
274
|
-
pba_init_mode = 3
|
275
|
-
if after_restart:
|
276
|
-
pba_init_mode = 2
|
277
|
-
ret = rpc_client.bdev_alceml_create(alceml_name, nvme.nvme_bdev, alceml_id, pba_init_mode=pba_init_mode)
|
278
|
-
if not ret:
|
279
|
-
logger.error(f"Failed to create alceml bdev: {alceml_name}")
|
280
|
-
return False
|
281
|
-
|
282
|
-
jm_bdev = f"jm_{snode.get_id()}"
|
283
|
-
ret = rpc_client.bdev_jm_create(jm_bdev, alceml_name)
|
284
|
-
if not ret:
|
285
|
-
logger.error(f"Failed to create {jm_bdev}")
|
286
|
-
return False
|
287
|
-
|
288
|
-
return JMDevice({
|
289
|
-
'uuid': alceml_id,
|
290
|
-
'device_name': jm_bdev,
|
291
|
-
'size': nvme.size,
|
292
|
-
'status': JMDevice.STATUS_ONLINE,
|
293
|
-
'alceml_bdev': alceml_name,
|
294
|
-
'nvme_bdev': nvme.nvme_bdev,
|
295
|
-
'jm_bdev': jm_bdev
|
296
|
-
})
|
297
|
-
|
298
|
-
|
299
|
-
def _create_storage_device_stack(rpc_client, nvme, snode, after_restart):
|
300
|
-
test_name = f"{nvme.nvme_bdev}_test"
|
301
|
-
ret = rpc_client.bdev_passtest_create(test_name, nvme.nvme_bdev)
|
302
|
-
if not ret:
|
303
|
-
logger.error(f"Failed to create passtest bdev {test_name}")
|
304
|
-
return False
|
305
|
-
alceml_id = nvme.get_id()
|
306
|
-
alceml_name = device_controller.get_alceml_name(alceml_id)
|
307
|
-
logger.info(f"adding {alceml_name}")
|
308
|
-
pba_init_mode = 3
|
309
|
-
if after_restart:
|
310
|
-
pba_init_mode = 2
|
311
|
-
ret = rpc_client.bdev_alceml_create(alceml_name, test_name, alceml_id, pba_init_mode=pba_init_mode,
|
312
|
-
dev_cpu_mask=snode.dev_cpu_mask)
|
313
|
-
if not ret:
|
314
|
-
logger.error(f"Failed to create alceml bdev: {alceml_name}")
|
315
|
-
return False
|
316
|
-
|
317
|
-
# add pass through
|
318
|
-
pt_name = f"{alceml_name}_PT"
|
319
|
-
ret = rpc_client.bdev_PT_NoExcl_create(pt_name, alceml_name)
|
320
|
-
if not ret:
|
321
|
-
logger.error(f"Failed to create pt noexcl bdev: {pt_name}")
|
322
|
-
return False
|
323
|
-
|
324
|
-
subsystem_nqn = snode.subsystem + ":dev:" + alceml_id
|
325
|
-
logger.info("creating subsystem %s", subsystem_nqn)
|
326
|
-
ret = rpc_client.subsystem_create(subsystem_nqn, 'sbcli-cn', alceml_id)
|
327
|
-
IP = None
|
328
|
-
for iface in snode.data_nics:
|
329
|
-
if iface.ip4_address:
|
330
|
-
tr_type = iface.get_transport_type()
|
331
|
-
ret = rpc_client.transport_list()
|
332
|
-
found = False
|
333
|
-
if ret:
|
334
|
-
for ty in ret:
|
335
|
-
if ty['trtype'] == tr_type:
|
336
|
-
found = True
|
337
|
-
if found is False:
|
338
|
-
ret = rpc_client.transport_create(tr_type)
|
339
|
-
logger.info("adding listener for %s on IP %s" % (subsystem_nqn, iface.ip4_address))
|
340
|
-
ret = rpc_client.listeners_create(subsystem_nqn, tr_type, iface.ip4_address, "4420")
|
341
|
-
IP = iface.ip4_address
|
342
|
-
break
|
343
|
-
logger.info(f"add {pt_name} to subsystem")
|
344
|
-
ret = rpc_client.nvmf_subsystem_add_ns(subsystem_nqn, pt_name)
|
345
|
-
if not ret:
|
346
|
-
logger.error(f"Failed to add: {pt_name} to the subsystem: {subsystem_nqn}")
|
347
|
-
return False
|
348
|
-
|
349
|
-
nvme.testing_bdev = test_name
|
350
|
-
nvme.alceml_bdev = alceml_name
|
351
|
-
nvme.pt_bdev = pt_name
|
352
|
-
nvme.nvmf_nqn = subsystem_nqn
|
353
|
-
nvme.nvmf_ip = IP
|
354
|
-
nvme.nvmf_port = 4420
|
355
|
-
nvme.io_error = False
|
356
|
-
nvme.status = NVMeDevice.STATUS_ONLINE
|
357
|
-
return nvme
|
358
|
-
|
359
|
-
|
360
|
-
def _create_device_partitions(rpc_client, nvme, snode):
|
361
|
-
nbd_device = rpc_client.nbd_start_disk(nvme.nvme_bdev)
|
362
|
-
time.sleep(3)
|
363
|
-
if not nbd_device:
|
364
|
-
logger.error(f"Failed to start nbd dev")
|
365
|
-
return False
|
366
|
-
snode_api = SNodeClient(snode.api_endpoint)
|
367
|
-
result, error = snode_api.make_gpt_partitions(
|
368
|
-
nbd_device, snode.jm_percent, snode.num_partitions_per_dev)
|
369
|
-
if error:
|
370
|
-
logger.error(f"Failed to make partitions")
|
371
|
-
logger.error(error)
|
372
|
-
return False
|
373
|
-
time.sleep(3)
|
374
|
-
rpc_client.nbd_stop_disk(nbd_device)
|
375
|
-
time.sleep(1)
|
376
|
-
rpc_client.bdev_nvme_detach_controller(nvme.nvme_controller)
|
377
|
-
time.sleep(1)
|
378
|
-
rpc_client.bdev_nvme_controller_attach(nvme.nvme_controller, nvme.pcie_address)
|
379
|
-
time.sleep(1)
|
380
|
-
rpc_client.bdev_examine(nvme.nvme_bdev)
|
381
|
-
time.sleep(1)
|
382
|
-
return True
|
383
|
-
|
384
|
-
|
385
|
-
def _prepare_cluster_devices_partitions(snode, devices):
|
386
|
-
db_controller = DBController()
|
387
215
|
rpc_client = RPCClient(
|
388
216
|
snode.mgmt_ip, snode.rpc_port,
|
389
217
|
snode.rpc_username, snode.rpc_password)
|
390
218
|
|
391
|
-
|
392
|
-
jm_devices = []
|
393
|
-
dev_order = get_next_cluster_device_order(db_controller, snode.cluster_id)
|
394
|
-
for index, nvme in enumerate(devices):
|
395
|
-
if nvme.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_UNAVAILABLE, NVMeDevice.STATUS_READONLY]:
|
396
|
-
logger.debug(f"Device is skipped: {nvme.get_id()}, status: {nvme.status}")
|
397
|
-
continue
|
398
|
-
|
399
|
-
# look for partitions
|
400
|
-
partitioned_devices = _search_for_partitions(rpc_client, nvme)
|
401
|
-
logger.debug("partitioned_devices")
|
402
|
-
logger.debug(partitioned_devices)
|
403
|
-
if len(partitioned_devices) == (1 + snode.num_partitions_per_dev):
|
404
|
-
logger.info("Partitioned devices found")
|
405
|
-
else:
|
406
|
-
logger.info(f"Creating partitions for {nvme.nvme_bdev}")
|
407
|
-
_create_device_partitions(rpc_client, nvme, snode)
|
408
|
-
partitioned_devices = _search_for_partitions(rpc_client, nvme)
|
409
|
-
if len(partitioned_devices) == (1 + snode.num_partitions_per_dev):
|
410
|
-
logger.info("Device partitions created")
|
411
|
-
else:
|
412
|
-
logger.error("Failed to create partitions")
|
413
|
-
return False
|
414
|
-
|
415
|
-
jm_devices.append(partitioned_devices.pop(0))
|
416
|
-
|
417
|
-
for dev in partitioned_devices:
|
418
|
-
new_device = _create_storage_device_stack(rpc_client, dev, snode, after_restart=False)
|
419
|
-
if not new_device:
|
420
|
-
logger.error("failed to create dev stack")
|
421
|
-
return False
|
422
|
-
new_device.cluster_device_order = dev_order
|
423
|
-
dev_order += 1
|
424
|
-
new_devices.append(new_device)
|
425
|
-
device_events.device_create(new_device)
|
426
|
-
|
427
|
-
snode.nvme_devices = new_devices
|
428
|
-
|
429
|
-
if jm_devices:
|
430
|
-
jm_nvme_bdevs = [dev.nvme_bdev for dev in jm_devices]
|
431
|
-
jm_device = _create_jm_stack_on_raid(rpc_client, jm_nvme_bdevs, snode, after_restart=False)
|
432
|
-
if not jm_device:
|
433
|
-
logger.error(f"Failed to create JM device")
|
434
|
-
return False
|
435
|
-
snode.jm_device = jm_device
|
436
|
-
|
437
|
-
return True
|
438
|
-
|
439
|
-
|
440
|
-
def _prepare_cluster_devices_jm_on_dev(snode, devices):
|
441
|
-
db_controller = DBController()
|
442
|
-
|
443
|
-
jm_device = devices[0]
|
444
|
-
# Set device cluster order
|
445
|
-
dev_order = get_next_cluster_device_order(db_controller, snode.cluster_id)
|
446
|
-
for index, nvme in enumerate(devices):
|
447
|
-
nvme.cluster_device_order = dev_order
|
448
|
-
dev_order += 1
|
449
|
-
if nvme.size < jm_device.size:
|
450
|
-
jm_device = nvme
|
451
|
-
device_events.device_create(nvme)
|
452
|
-
jm_device.status = NVMeDevice.STATUS_JM
|
453
|
-
|
454
|
-
rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password)
|
455
|
-
|
456
|
-
new_devices = []
|
457
|
-
for index, nvme in enumerate(devices):
|
219
|
+
for index, nvme in enumerate(snode.nvme_devices):
|
458
220
|
if nvme.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_UNAVAILABLE,
|
459
221
|
NVMeDevice.STATUS_JM, NVMeDevice.STATUS_READONLY]:
|
460
222
|
logger.debug(f"Device is not online or unavailable: {nvme.get_id()}, status: {nvme.status}")
|
461
223
|
continue
|
462
224
|
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
snode.jm_device = jm_device
|
469
|
-
else:
|
470
|
-
new_device = _create_storage_device_stack(rpc_client, nvme, snode, after_restart=False)
|
471
|
-
if not new_device:
|
472
|
-
logger.error("failed to create dev stack")
|
473
|
-
return False
|
474
|
-
new_device.cluster_device_order = dev_order
|
475
|
-
dev_order += 1
|
476
|
-
new_devices.append(new_device)
|
477
|
-
device_events.device_create(new_device)
|
478
|
-
|
479
|
-
snode.nvme_devices = new_devices
|
480
|
-
return True
|
481
|
-
|
482
|
-
|
483
|
-
def _prepare_cluster_devices_on_restart(snode):
|
484
|
-
db_controller = DBController()
|
485
|
-
|
486
|
-
rpc_client = RPCClient(
|
487
|
-
snode.mgmt_ip, snode.rpc_port,
|
488
|
-
snode.rpc_username, snode.rpc_password)
|
489
|
-
|
490
|
-
for index, nvme in enumerate(snode.nvme_devices):
|
491
|
-
if nvme.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_UNAVAILABLE, NVMeDevice.STATUS_READONLY]:
|
492
|
-
logger.debug(f"Device is skipped: {nvme.get_id()}, status: {nvme.status}")
|
493
|
-
continue
|
494
|
-
|
495
|
-
dev = _create_storage_device_stack(rpc_client, nvme, snode, after_restart=True)
|
496
|
-
if not dev:
|
497
|
-
logger.error(f"Failed to create dev stack {nvme.get_id()}")
|
225
|
+
test_name = f"{nvme.nvme_bdev}_test"
|
226
|
+
# create testing bdev
|
227
|
+
ret = rpc_client.bdev_passtest_create(test_name, nvme.nvme_bdev)
|
228
|
+
if not ret:
|
229
|
+
logger.error(f"Failed to create bdev: {test_name}")
|
498
230
|
return False
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
231
|
+
alceml_id = nvme.get_id()
|
232
|
+
alceml_name = device_controller.get_alceml_name(alceml_id)
|
233
|
+
logger.info(f"adding {alceml_name}")
|
234
|
+
pba_init_mode = 3
|
235
|
+
if after_restart:
|
236
|
+
pba_init_mode = 2
|
237
|
+
ret = rpc_client.bdev_alceml_create(alceml_name, test_name, alceml_id, pba_init_mode=pba_init_mode)
|
505
238
|
if not ret:
|
506
|
-
logger.error(f"Failed to create
|
239
|
+
logger.error(f"Failed to create alceml bdev: {alceml_name}")
|
507
240
|
return False
|
508
|
-
else:
|
509
241
|
|
510
|
-
|
242
|
+
# create jm
|
243
|
+
if nvme.jm_bdev:
|
244
|
+
ret = rpc_client.bdev_jm_create(nvme.jm_bdev, alceml_name)
|
245
|
+
if not ret:
|
246
|
+
logger.error(f"Failed to create JM bdev: {nvme.jm_bdev}")
|
247
|
+
return False
|
248
|
+
nvme.testing_bdev = test_name
|
249
|
+
nvme.alceml_bdev = alceml_name
|
250
|
+
nvme.io_error = True
|
251
|
+
nvme.status = NVMeDevice.STATUS_JM
|
252
|
+
continue
|
253
|
+
|
254
|
+
# add pass through
|
255
|
+
pt_name = f"{alceml_name}_PT"
|
256
|
+
ret = rpc_client.bdev_PT_NoExcl_create(pt_name, alceml_name)
|
511
257
|
if not ret:
|
512
|
-
logger.error(f"Failed to create
|
258
|
+
logger.error(f"Failed to create pt noexcl bdev: {pt_name}")
|
513
259
|
return False
|
514
260
|
|
515
|
-
|
516
|
-
|
261
|
+
subsystem_nqn = snode.subsystem + ":dev:" + alceml_id
|
262
|
+
logger.info("creating subsystem %s", subsystem_nqn)
|
263
|
+
ret = rpc_client.subsystem_create(subsystem_nqn, 'sbcli-cn', alceml_id)
|
264
|
+
IP = None
|
265
|
+
for iface in snode.data_nics:
|
266
|
+
if iface.ip4_address:
|
267
|
+
tr_type = iface.get_transport_type()
|
268
|
+
ret = rpc_client.transport_list()
|
269
|
+
found = False
|
270
|
+
if ret:
|
271
|
+
for ty in ret:
|
272
|
+
if ty['trtype'] == tr_type:
|
273
|
+
found = True
|
274
|
+
if found is False:
|
275
|
+
ret = rpc_client.transport_create(tr_type)
|
276
|
+
logger.info("adding listener for %s on IP %s" % (subsystem_nqn, iface.ip4_address))
|
277
|
+
ret = rpc_client.listeners_create(subsystem_nqn, tr_type, iface.ip4_address, "4420")
|
278
|
+
IP = iface.ip4_address
|
279
|
+
break
|
280
|
+
logger.info(f"add {pt_name} to subsystem")
|
281
|
+
ret = rpc_client.nvmf_subsystem_add_ns(subsystem_nqn, pt_name)
|
517
282
|
if not ret:
|
518
|
-
logger.error(f"Failed to
|
283
|
+
logger.error(f"Failed to add: {pt_name} to the subsystem: {subsystem_nqn}")
|
519
284
|
return False
|
520
285
|
|
286
|
+
nvme.testing_bdev = test_name
|
287
|
+
nvme.alceml_bdev = alceml_name
|
288
|
+
nvme.pt_bdev = pt_name
|
289
|
+
nvme.nvmf_nqn = subsystem_nqn
|
290
|
+
nvme.nvmf_ip = IP
|
291
|
+
nvme.nvmf_port = 4420
|
292
|
+
nvme.io_error = False
|
293
|
+
old_status = nvme.status
|
294
|
+
nvme.status = NVMeDevice.STATUS_ONLINE
|
295
|
+
device_events.device_status_change(nvme, nvme.status, old_status)
|
296
|
+
snode.write_to_db(db_controller.kv_store)
|
297
|
+
|
521
298
|
return True
|
522
299
|
|
523
300
|
|
@@ -530,7 +307,7 @@ def _connect_to_remote_devs(this_node):
|
|
530
307
|
|
531
308
|
remote_devices = []
|
532
309
|
# connect to remote devs
|
533
|
-
snodes = db_controller.
|
310
|
+
snodes = db_controller.get_storage_nodes()
|
534
311
|
for node_index, node in enumerate(snodes):
|
535
312
|
if node.get_id() == this_node.get_id() or node.status == node.STATUS_OFFLINE:
|
536
313
|
continue
|
@@ -549,10 +326,9 @@ def _connect_to_remote_devs(this_node):
|
|
549
326
|
return remote_devices
|
550
327
|
|
551
328
|
|
552
|
-
def add_node(cluster_id, node_ip, iface_name, data_nics_list,
|
329
|
+
def add_node(cluster_id, node_ip, iface_name, data_nics_list, spdk_cpu_mask,
|
553
330
|
spdk_mem, spdk_image=None, spdk_debug=False,
|
554
|
-
small_pool_count=0, large_pool_count=0, small_bufsize=0, large_bufsize=0,
|
555
|
-
num_partitions_per_dev=0, jm_percent=0):
|
331
|
+
small_pool_count=0, large_pool_count=0, small_bufsize=0, large_bufsize=0, jm_device_pcie=None):
|
556
332
|
db_controller = DBController()
|
557
333
|
kv_store = db_controller.kv_store
|
558
334
|
|
@@ -623,27 +399,6 @@ def add_node(cluster_id, node_ip, iface_name, data_nics_list,
|
|
623
399
|
spdk_mem = huge_free
|
624
400
|
logger.info(f"Using the free hugepages for spdk memory: {utils.humanbytes(huge_free)}")
|
625
401
|
|
626
|
-
# Tune cpu maks parameters
|
627
|
-
cpu_count = node_info["cpu_count"]
|
628
|
-
pollers_mask = ""
|
629
|
-
app_thread_mask = ""
|
630
|
-
dev_cpu_mask = ""
|
631
|
-
if cpu_count < 8:
|
632
|
-
mask = (1 << (cpu_count - 1)) - 1
|
633
|
-
mask <<= 1
|
634
|
-
spdk_cpu_mask = f'0x{mask:X}'
|
635
|
-
os_cores = [0]
|
636
|
-
else:
|
637
|
-
os_cores, nvme_pollers_cores, app_thread_core, dev_cpu_cores = \
|
638
|
-
utils.calculate_core_allocation(cpu_count)
|
639
|
-
spdk_cores = nvme_pollers_cores + app_thread_core + dev_cpu_cores
|
640
|
-
|
641
|
-
pollers_mask = utils.generate_mask(nvme_pollers_cores)
|
642
|
-
app_thread_mask = utils.generate_mask(app_thread_core)
|
643
|
-
spdk_cpu_mask = utils.generate_mask(spdk_cores)
|
644
|
-
dev_cpu_mask = utils.generate_mask(dev_cpu_cores)
|
645
|
-
|
646
|
-
|
647
402
|
logger.info("Joining docker swarm...")
|
648
403
|
cluster_docker = utils.get_docker_client(cluster_id)
|
649
404
|
cluster_ip = cluster_docker.info()["Swarm"]["NodeAddr"]
|
@@ -721,19 +476,12 @@ def add_node(cluster_id, node_ip, iface_name, data_nics_list,
|
|
721
476
|
snode.spdk_image = spdk_image or ""
|
722
477
|
snode.spdk_debug = spdk_debug or 0
|
723
478
|
snode.write_to_db(kv_store)
|
724
|
-
snode.app_thread_mask = app_thread_mask or ""
|
725
|
-
snode.pollers_mask = pollers_mask or ""
|
726
|
-
snode.dev_cpu_mask = dev_cpu_mask or ""
|
727
|
-
snode.os_cores = os_cores or []
|
728
479
|
|
729
480
|
snode.iobuf_small_pool_count = small_pool_count or 0
|
730
481
|
snode.iobuf_large_pool_count = large_pool_count or 0
|
731
482
|
snode.iobuf_small_bufsize = small_bufsize or 0
|
732
483
|
snode.iobuf_large_bufsize = large_bufsize or 0
|
733
484
|
|
734
|
-
snode.num_partitions_per_dev = num_partitions_per_dev
|
735
|
-
snode.jm_percent = jm_percent
|
736
|
-
|
737
485
|
snode.write_to_db(kv_store)
|
738
486
|
|
739
487
|
# creating RPCClient instance
|
@@ -751,41 +499,13 @@ def add_node(cluster_id, node_ip, iface_name, data_nics_list,
|
|
751
499
|
logger.error("Failed to set iobuf options")
|
752
500
|
return False
|
753
501
|
|
754
|
-
# 2-
|
755
|
-
ret = rpc_client.sock_impl_set_options()
|
756
|
-
if not ret:
|
757
|
-
logger.error("Failed socket implement set options")
|
758
|
-
return False
|
759
|
-
|
760
|
-
# 3- set nvme config
|
761
|
-
if snode.pollers_mask:
|
762
|
-
ret = rpc_client.nvmf_set_config(snode.pollers_mask)
|
763
|
-
if not ret:
|
764
|
-
logger.error("Failed to set pollers mask")
|
765
|
-
return False
|
766
|
-
|
767
|
-
# 4- start spdk framework
|
502
|
+
# 2- start spdk framework
|
768
503
|
ret = rpc_client.framework_start_init()
|
769
504
|
if not ret:
|
770
505
|
logger.error("Failed to start framework")
|
771
506
|
return False
|
772
507
|
|
773
|
-
#
|
774
|
-
if snode.app_thread_mask:
|
775
|
-
ret = rpc_client.thread_get_stats()
|
776
|
-
app_thread_process_id = 0
|
777
|
-
if ret.get("threads"):
|
778
|
-
for entry in ret["threads"]:
|
779
|
-
if entry['name'] == 'app_thread':
|
780
|
-
app_thread_process_id = entry['id']
|
781
|
-
break
|
782
|
-
|
783
|
-
ret = rpc_client.thread_set_cpumask(app_thread_process_id, snode.app_thread_mask)
|
784
|
-
if not ret:
|
785
|
-
logger.error("Failed to set app thread mask")
|
786
|
-
return False
|
787
|
-
|
788
|
-
# 6- set nvme bdev options
|
508
|
+
# 3- set nvme bdev options
|
789
509
|
ret = rpc_client.bdev_nvme_set_options()
|
790
510
|
if not ret:
|
791
511
|
logger.error("Failed to set nvme options")
|
@@ -793,18 +513,36 @@ def add_node(cluster_id, node_ip, iface_name, data_nics_list,
|
|
793
513
|
|
794
514
|
# get new node info after starting spdk
|
795
515
|
node_info, _ = snode_api.info()
|
796
|
-
|
797
|
-
# discover devices
|
516
|
+
# adding devices
|
798
517
|
nvme_devs = addNvmeDevices(cluster, rpc_client, node_info['spdk_pcie_list'], snode)
|
799
518
|
if not nvme_devs:
|
800
519
|
logger.error("No NVMe devices was found!")
|
801
520
|
return False
|
802
521
|
|
522
|
+
snode.nvme_devices = nvme_devs
|
523
|
+
|
524
|
+
jm_device = snode.nvme_devices[0]
|
525
|
+
# Set device cluster order
|
526
|
+
dev_order = get_next_cluster_device_order(db_controller)
|
527
|
+
for index, nvme in enumerate(snode.nvme_devices):
|
528
|
+
nvme.cluster_device_order = dev_order
|
529
|
+
dev_order += 1
|
530
|
+
if jm_device_pcie:
|
531
|
+
if nvme.pcie_address == jm_device_pcie:
|
532
|
+
jm_device = nvme
|
533
|
+
elif nvme.size < jm_device.size:
|
534
|
+
jm_device = nvme
|
535
|
+
device_events.device_create(nvme)
|
536
|
+
|
537
|
+
# create jm
|
538
|
+
logger.info(f"Using device for JM: {jm_device.get_id()}")
|
539
|
+
jm_device.jm_bdev = f"jm_{snode.get_id()}"
|
540
|
+
|
541
|
+
# save object
|
542
|
+
snode.write_to_db(db_controller.kv_store)
|
543
|
+
|
803
544
|
# prepare devices
|
804
|
-
|
805
|
-
ret = _prepare_cluster_devices_jm_on_dev(snode, nvme_devs)
|
806
|
-
else:
|
807
|
-
ret = _prepare_cluster_devices_partitions(snode, nvme_devs)
|
545
|
+
ret = _prepare_cluster_devices(snode)
|
808
546
|
if not ret:
|
809
547
|
logger.error("Failed to prepare cluster devices")
|
810
548
|
return False
|
@@ -819,7 +557,7 @@ def add_node(cluster_id, node_ip, iface_name, data_nics_list,
|
|
819
557
|
|
820
558
|
# make other nodes connect to the new devices
|
821
559
|
logger.info("Make other nodes connect to the new devices")
|
822
|
-
snodes = db_controller.
|
560
|
+
snodes = db_controller.get_storage_nodes()
|
823
561
|
for node_index, node in enumerate(snodes):
|
824
562
|
if node.get_id() == snode.get_id() or node.status != StorageNode.STATUS_ONLINE:
|
825
563
|
continue
|
@@ -861,16 +599,150 @@ def add_node(cluster_id, node_ip, iface_name, data_nics_list,
|
|
861
599
|
time.sleep(3)
|
862
600
|
|
863
601
|
logger.info("Sending cluster event updates")
|
864
|
-
distr_controller.send_node_status_event(snode,
|
602
|
+
distr_controller.send_node_status_event(snode.get_id(), "online")
|
865
603
|
|
866
604
|
for dev in snode.nvme_devices:
|
867
|
-
distr_controller.send_dev_status_event(dev,
|
605
|
+
distr_controller.send_dev_status_event(dev.cluster_device_order, "online")
|
868
606
|
|
869
607
|
storage_events.snode_add(snode)
|
870
608
|
logger.info("Done")
|
871
609
|
return "Success"
|
872
610
|
|
873
611
|
|
612
|
+
# Deprecated
|
613
|
+
def add_storage_node(cluster_id, iface_name, data_nics):
|
614
|
+
db_controller = DBController()
|
615
|
+
kv_store = db_controller.kv_store
|
616
|
+
|
617
|
+
cluster = db_controller.get_cluster_by_id(cluster_id)
|
618
|
+
if not cluster:
|
619
|
+
logger.error("Cluster not found: %s", cluster_id)
|
620
|
+
return False
|
621
|
+
|
622
|
+
logger.info("Add Storage node")
|
623
|
+
|
624
|
+
hostname = utils.get_hostname()
|
625
|
+
snode = db_controller.get_storage_node_by_hostname(hostname)
|
626
|
+
if snode:
|
627
|
+
logger.error("Node already exists, try remove it first.")
|
628
|
+
exit(1)
|
629
|
+
else:
|
630
|
+
snode = StorageNode()
|
631
|
+
snode.uuid = str(uuid.uuid4())
|
632
|
+
|
633
|
+
mgmt_ip = _get_if_ip_address(iface_name)
|
634
|
+
system_id = utils.get_system_id()
|
635
|
+
|
636
|
+
BASE_NQN = cluster.nqn.split(":")[0]
|
637
|
+
subsystem_nqn = f"{BASE_NQN}:{hostname}"
|
638
|
+
|
639
|
+
if data_nics:
|
640
|
+
data_nics = _get_data_nics(data_nics)
|
641
|
+
else:
|
642
|
+
data_nics = _get_data_nics([iface_name])
|
643
|
+
|
644
|
+
rpc_user, rpc_pass = utils.generate_rpc_user_and_pass()
|
645
|
+
|
646
|
+
# creating storage node object
|
647
|
+
snode.status = StorageNode.STATUS_IN_CREATION
|
648
|
+
snode.baseboard_sn = utils.get_baseboard_sn()
|
649
|
+
snode.system_uuid = system_id
|
650
|
+
snode.hostname = hostname
|
651
|
+
snode.host_nqn = subsystem_nqn
|
652
|
+
snode.subsystem = subsystem_nqn
|
653
|
+
snode.data_nics = data_nics
|
654
|
+
snode.mgmt_ip = mgmt_ip
|
655
|
+
snode.rpc_port = constants.RPC_HTTP_PROXY_PORT
|
656
|
+
snode.rpc_username = rpc_user
|
657
|
+
snode.rpc_password = rpc_pass
|
658
|
+
snode.cluster_id = cluster_id
|
659
|
+
snode.write_to_db(kv_store)
|
660
|
+
|
661
|
+
# creating RPCClient instance
|
662
|
+
rpc_client = RPCClient(
|
663
|
+
snode.mgmt_ip,
|
664
|
+
snode.rpc_port,
|
665
|
+
snode.rpc_username,
|
666
|
+
snode.rpc_password)
|
667
|
+
|
668
|
+
logger.info("Getting nvme devices")
|
669
|
+
devs = get_nvme_devices()
|
670
|
+
logger.debug(devs)
|
671
|
+
pcies = [d[0] for d in devs]
|
672
|
+
nvme_devs = addNvmeDevices(cluster, rpc_client, pcies, snode)
|
673
|
+
if not nvme_devs:
|
674
|
+
logger.error("No NVMe devices was found!")
|
675
|
+
|
676
|
+
logger.debug(nvme_devs)
|
677
|
+
snode.nvme_devices = nvme_devs
|
678
|
+
|
679
|
+
# Set device cluster order
|
680
|
+
dev_order = get_next_cluster_device_order(db_controller)
|
681
|
+
for index, nvme in enumerate(snode.nvme_devices):
|
682
|
+
nvme.cluster_device_order = dev_order
|
683
|
+
dev_order += 1
|
684
|
+
snode.write_to_db(db_controller.kv_store)
|
685
|
+
|
686
|
+
# prepare devices
|
687
|
+
_prepare_cluster_devices(snode)
|
688
|
+
|
689
|
+
logger.info("Connecting to remote devices")
|
690
|
+
remote_devices = _connect_to_remote_devs(snode)
|
691
|
+
snode.remote_devices = remote_devices
|
692
|
+
|
693
|
+
logger.info("Setting node status to Active")
|
694
|
+
snode.status = StorageNode.STATUS_ONLINE
|
695
|
+
snode.write_to_db(kv_store)
|
696
|
+
|
697
|
+
# make other nodes connect to the new devices
|
698
|
+
logger.info("Make other nodes connect to the new devices")
|
699
|
+
snodes = db_controller.get_storage_nodes()
|
700
|
+
for node_index, node in enumerate(snodes):
|
701
|
+
if node.get_id() == snode.get_id():
|
702
|
+
continue
|
703
|
+
logger.info(f"Connecting to node: {node.get_id()}")
|
704
|
+
rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password)
|
705
|
+
count = 0
|
706
|
+
for dev in snode.nvme_devices:
|
707
|
+
name = f"remote_{dev.alceml_bdev}"
|
708
|
+
ret = rpc_client.bdev_nvme_attach_controller_tcp(name, dev.nvmf_nqn, dev.nvmf_ip, dev.nvmf_port)
|
709
|
+
if not ret:
|
710
|
+
logger.error(f"Failed to connect to device: {name}")
|
711
|
+
continue
|
712
|
+
|
713
|
+
dev.remote_bdev = f"{name}n1"
|
714
|
+
idx = -1
|
715
|
+
for i, d in enumerate(node.remote_devices):
|
716
|
+
if d.get_id() == dev.get_id():
|
717
|
+
idx = i
|
718
|
+
break
|
719
|
+
if idx >= 0:
|
720
|
+
node.remote_devices[idx] = dev
|
721
|
+
else:
|
722
|
+
node.remote_devices.append(dev)
|
723
|
+
count += 1
|
724
|
+
node.write_to_db(kv_store)
|
725
|
+
logger.info(f"connected to devices count: {count}")
|
726
|
+
|
727
|
+
logger.info("Sending cluster map")
|
728
|
+
ret = distr_controller.send_cluster_map_to_node(snode)
|
729
|
+
if not ret:
|
730
|
+
return False, "Failed to send cluster map"
|
731
|
+
ret = distr_controller.send_cluster_map_add_node(snode)
|
732
|
+
if not ret:
|
733
|
+
return False, "Failed to send cluster map add node"
|
734
|
+
time.sleep(3)
|
735
|
+
|
736
|
+
logger.info("Sending cluster event updates")
|
737
|
+
distr_controller.send_node_status_event(snode.get_id(), "online")
|
738
|
+
|
739
|
+
for dev in snode.nvme_devices:
|
740
|
+
distr_controller.send_dev_status_event(dev.cluster_device_order, "online")
|
741
|
+
|
742
|
+
logger.info("Done")
|
743
|
+
return "Success"
|
744
|
+
|
745
|
+
|
874
746
|
def delete_storage_node(node_id):
|
875
747
|
db_controller = DBController()
|
876
748
|
snode = db_controller.get_storage_node_by_id(node_id)
|
@@ -884,7 +756,7 @@ def delete_storage_node(node_id):
|
|
884
756
|
|
885
757
|
snode.remove(db_controller.kv_store)
|
886
758
|
|
887
|
-
for lvol in db_controller.get_lvols(
|
759
|
+
for lvol in db_controller.get_lvols():
|
888
760
|
logger.info(f"Sending cluster map to LVol: {lvol.get_id()}")
|
889
761
|
lvol_controller.send_cluster_map(lvol.get_id())
|
890
762
|
|
@@ -892,7 +764,7 @@ def delete_storage_node(node_id):
|
|
892
764
|
logger.info("done")
|
893
765
|
|
894
766
|
|
895
|
-
def remove_storage_node(node_id, force_remove=False
|
767
|
+
def remove_storage_node(node_id, force_remove=False):
|
896
768
|
db_controller = DBController()
|
897
769
|
snode = db_controller.get_storage_node_by_id(node_id)
|
898
770
|
if not snode:
|
@@ -939,7 +811,7 @@ def remove_storage_node(node_id, force_remove=False, force_migrate=False):
|
|
939
811
|
distr_controller.disconnect_device(dev)
|
940
812
|
old_status = dev.status
|
941
813
|
dev.status = NVMeDevice.STATUS_FAILED
|
942
|
-
distr_controller.send_dev_status_event(dev, NVMeDevice.STATUS_FAILED)
|
814
|
+
distr_controller.send_dev_status_event(dev.cluster_device_order, NVMeDevice.STATUS_FAILED)
|
943
815
|
device_events.device_status_change(dev, NVMeDevice.STATUS_FAILED, old_status)
|
944
816
|
|
945
817
|
logger.info("Removing storage node")
|
@@ -953,29 +825,24 @@ def remove_storage_node(node_id, force_remove=False, force_migrate=False):
|
|
953
825
|
pass
|
954
826
|
|
955
827
|
try:
|
956
|
-
snode_api = SNodeClient(snode.api_endpoint
|
828
|
+
snode_api = SNodeClient(snode.api_endpoint)
|
957
829
|
snode_api.spdk_process_kill()
|
958
830
|
snode_api.leave_swarm()
|
959
|
-
pci_address = []
|
960
|
-
for dev in snode.nvme_devices:
|
961
|
-
if dev.pcie_address not in pci_address:
|
962
|
-
ret = snode_api.delete_dev_gpt_partitions(dev.pcie_address)
|
963
|
-
logger.debug(ret)
|
964
|
-
pci_address.append(dev.pcie_address)
|
965
831
|
except Exception as e:
|
966
|
-
logger.
|
832
|
+
logger.warning(f"Failed to remove SPDK process: {e}")
|
967
833
|
|
968
834
|
old_status = snode.status
|
969
835
|
snode.status = StorageNode.STATUS_REMOVED
|
970
836
|
snode.write_to_db(db_controller.kv_store)
|
971
837
|
logger.info("Sending node event update")
|
972
|
-
distr_controller.send_node_status_event(snode, snode.status)
|
838
|
+
distr_controller.send_node_status_event(snode.get_id(), snode.status)
|
973
839
|
storage_events.snode_status_change(snode, StorageNode.STATUS_REMOVED, old_status)
|
974
840
|
logger.info("done")
|
975
841
|
|
976
842
|
|
977
843
|
def restart_storage_node(
|
978
844
|
node_id,
|
845
|
+
spdk_cpu_mask=None,
|
979
846
|
spdk_mem=None,
|
980
847
|
spdk_image=None,
|
981
848
|
set_spdk_debug=None,
|
@@ -1001,7 +868,7 @@ def restart_storage_node(
|
|
1001
868
|
snode.status = StorageNode.STATUS_RESTARTING
|
1002
869
|
snode.write_to_db(kv_store)
|
1003
870
|
logger.info("Sending node event update")
|
1004
|
-
distr_controller.send_node_status_event(snode, snode.status)
|
871
|
+
distr_controller.send_node_status_event(snode.get_id(), snode.status)
|
1005
872
|
storage_events.snode_status_change(snode, snode.status, old_status)
|
1006
873
|
|
1007
874
|
logger.info(f"Restarting Storage node: {snode.mgmt_ip}")
|
@@ -1011,6 +878,10 @@ def restart_storage_node(
|
|
1011
878
|
logger.info(f"Node info: {node_info}")
|
1012
879
|
|
1013
880
|
logger.info("Restarting SPDK")
|
881
|
+
cpu = snode.spdk_cpu_mask
|
882
|
+
if spdk_cpu_mask:
|
883
|
+
cpu = spdk_cpu_mask
|
884
|
+
snode.spdk_cpu_mask = cpu
|
1014
885
|
mem = snode.spdk_mem
|
1015
886
|
if spdk_mem:
|
1016
887
|
mem = spdk_mem
|
@@ -1026,7 +897,7 @@ def restart_storage_node(
|
|
1026
897
|
|
1027
898
|
cluster_docker = utils.get_docker_client(snode.cluster_id)
|
1028
899
|
cluster_ip = cluster_docker.info()["Swarm"]["NodeAddr"]
|
1029
|
-
results, err = snode_api.spdk_process_start(
|
900
|
+
results, err = snode_api.spdk_process_start(cpu, mem, img, spdk_debug, cluster_ip)
|
1030
901
|
|
1031
902
|
if not results:
|
1032
903
|
logger.error(f"Failed to start spdk: {err}")
|
@@ -1060,41 +931,13 @@ def restart_storage_node(
|
|
1060
931
|
logger.error("Failed to set iobuf options")
|
1061
932
|
return False
|
1062
933
|
|
1063
|
-
# 2-
|
1064
|
-
ret = rpc_client.sock_impl_set_options()
|
1065
|
-
if not ret:
|
1066
|
-
logger.error("Failed socket implement set options")
|
1067
|
-
return False
|
1068
|
-
|
1069
|
-
# 3- set nvme config
|
1070
|
-
if snode.pollers_mask:
|
1071
|
-
ret = rpc_client.nvmf_set_config(snode.pollers_mask)
|
1072
|
-
if not ret:
|
1073
|
-
logger.error("Failed to set pollers mask")
|
1074
|
-
return False
|
1075
|
-
|
1076
|
-
# 4- start spdk framework
|
934
|
+
# 2- start spdk framework
|
1077
935
|
ret = rpc_client.framework_start_init()
|
1078
936
|
if not ret:
|
1079
937
|
logger.error("Failed to start framework")
|
1080
938
|
return False
|
1081
939
|
|
1082
|
-
#
|
1083
|
-
if snode.app_thread_mask:
|
1084
|
-
ret = rpc_client.thread_get_stats()
|
1085
|
-
app_thread_process_id = 0
|
1086
|
-
if ret.get("threads"):
|
1087
|
-
for entry in ret["threads"]:
|
1088
|
-
if entry['name'] == 'app_thread':
|
1089
|
-
app_thread_process_id = entry['id']
|
1090
|
-
break
|
1091
|
-
|
1092
|
-
ret = rpc_client.thread_set_cpumask(app_thread_process_id, snode.app_thread_mask)
|
1093
|
-
if not ret:
|
1094
|
-
logger.error("Failed to set app thread mask")
|
1095
|
-
return False
|
1096
|
-
|
1097
|
-
# 6- set nvme bdev options
|
940
|
+
# 3- set nvme bdev options
|
1098
941
|
ret = rpc_client.bdev_nvme_set_options()
|
1099
942
|
if not ret:
|
1100
943
|
logger.error("Failed to set nvme options")
|
@@ -1127,23 +970,22 @@ def restart_storage_node(
|
|
1127
970
|
else:
|
1128
971
|
logger.info(f"Device not found: {db_dev.get_id()}")
|
1129
972
|
db_dev.status = NVMeDevice.STATUS_REMOVED
|
1130
|
-
distr_controller.send_dev_status_event(db_dev,
|
973
|
+
distr_controller.send_dev_status_event(db_dev.cluster_device_order, "offline")
|
1131
974
|
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
# snode.nvme_devices.append(dev)
|
975
|
+
for dev in nvme_devs:
|
976
|
+
if dev.serial_number not in known_devices_sn:
|
977
|
+
logger.info(f"New device found: {dev.get_id()}")
|
978
|
+
dev.status = 'new'
|
979
|
+
new_devices.append(dev)
|
980
|
+
snode.nvme_devices.append(dev)
|
1139
981
|
|
1140
|
-
|
1141
|
-
|
1142
|
-
|
1143
|
-
|
982
|
+
dev_order = get_next_cluster_device_order(db_controller)
|
983
|
+
for index, nvme in enumerate(new_devices):
|
984
|
+
nvme.cluster_device_order = dev_order
|
985
|
+
dev_order += 1
|
1144
986
|
|
1145
987
|
# prepare devices
|
1146
|
-
ret =
|
988
|
+
ret = _prepare_cluster_devices(snode, after_restart=True)
|
1147
989
|
if not ret:
|
1148
990
|
logger.error("Failed to prepare cluster devices")
|
1149
991
|
return False
|
@@ -1154,7 +996,7 @@ def restart_storage_node(
|
|
1154
996
|
|
1155
997
|
# make other nodes connect to the new devices
|
1156
998
|
logger.info("Make other nodes connect to the node devices")
|
1157
|
-
snodes = db_controller.
|
999
|
+
snodes = db_controller.get_storage_nodes()
|
1158
1000
|
for node_index, node in enumerate(snodes):
|
1159
1001
|
if node.get_id() == snode.get_id() or node.status != StorageNode.STATUS_ONLINE:
|
1160
1002
|
continue
|
@@ -1192,23 +1034,20 @@ def restart_storage_node(
|
|
1192
1034
|
storage_events.snode_status_change(snode, snode.status, old_status)
|
1193
1035
|
|
1194
1036
|
logger.info("Sending node event update")
|
1195
|
-
distr_controller.send_node_status_event(snode,
|
1037
|
+
distr_controller.send_node_status_event(snode.get_id(), NVMeDevice.STATUS_ONLINE)
|
1196
1038
|
|
1197
1039
|
logger.info("Sending devices event updates")
|
1198
|
-
logger.info("Starting migration tasks")
|
1199
1040
|
for dev in snode.nvme_devices:
|
1200
1041
|
if dev.status != NVMeDevice.STATUS_ONLINE:
|
1201
|
-
logger.
|
1042
|
+
logger.debug(f"Device is not online: {dev.get_id()}, status: {dev.status}")
|
1202
1043
|
continue
|
1044
|
+
distr_controller.send_dev_status_event(dev.cluster_device_order, NVMeDevice.STATUS_ONLINE)
|
1203
1045
|
|
1204
|
-
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1208
|
-
|
1209
|
-
# if not ret:
|
1210
|
-
# return False, "Failed to send cluster map"
|
1211
|
-
# time.sleep(3)
|
1046
|
+
logger.info("Sending cluster map to current node")
|
1047
|
+
ret = distr_controller.send_cluster_map_to_node(snode)
|
1048
|
+
if not ret:
|
1049
|
+
return False, "Failed to send cluster map"
|
1050
|
+
time.sleep(3)
|
1212
1051
|
|
1213
1052
|
for lvol_id in snode.lvols:
|
1214
1053
|
lvol = lvol_controller.recreate_lvol(lvol_id, snode)
|
@@ -1223,12 +1062,9 @@ def restart_storage_node(
|
|
1223
1062
|
return "Success"
|
1224
1063
|
|
1225
1064
|
|
1226
|
-
def list_storage_nodes(
|
1227
|
-
db_controller = DBController()
|
1228
|
-
|
1229
|
-
nodes = db_controller.get_storage_nodes_by_cluster_id(cluster_id)
|
1230
|
-
else:
|
1231
|
-
nodes = db_controller.get_storage_nodes()
|
1065
|
+
def list_storage_nodes(kv_store, is_json):
|
1066
|
+
db_controller = DBController(kv_store)
|
1067
|
+
nodes = db_controller.get_storage_nodes()
|
1232
1068
|
data = []
|
1233
1069
|
output = ""
|
1234
1070
|
|
@@ -1275,43 +1111,26 @@ def list_storage_devices(kv_store, node_id, sort, is_json):
|
|
1275
1111
|
logger.error("This storage node is not part of the cluster")
|
1276
1112
|
return False
|
1277
1113
|
|
1278
|
-
|
1279
|
-
jm_devices = []
|
1280
|
-
remote_devices = []
|
1114
|
+
data = []
|
1281
1115
|
for device in snode.nvme_devices:
|
1282
1116
|
logger.debug(device)
|
1283
1117
|
logger.debug("*" * 20)
|
1284
|
-
|
1118
|
+
data.append({
|
1285
1119
|
"UUID": device.uuid,
|
1286
1120
|
"Name": device.device_name,
|
1121
|
+
"Hostname": snode.hostname,
|
1287
1122
|
"Size": utils.humanbytes(device.size),
|
1123
|
+
# "Sequential Number": device.sequential_number,
|
1124
|
+
# "Partitions Count": device.partitions_count,
|
1125
|
+
# "Model ID": device.model_id,
|
1288
1126
|
"Serial Number": device.serial_number,
|
1289
1127
|
"PCIe": device.pcie_address,
|
1290
1128
|
"Status": device.status,
|
1291
1129
|
"IO Err": device.io_error,
|
1292
|
-
"Health": device.health_check
|
1293
|
-
})
|
1130
|
+
"Health": device.health_check,
|
1294
1131
|
|
1295
|
-
if snode.jm_device:
|
1296
|
-
jm_devices.append({
|
1297
|
-
"UUID": snode.jm_device.uuid,
|
1298
|
-
"Name": snode.jm_device.device_name,
|
1299
|
-
"Size": utils.humanbytes(snode.jm_device.size),
|
1300
|
-
"Status": snode.jm_device.status,
|
1301
|
-
"IO Err": snode.jm_device.io_error,
|
1302
|
-
"Health": snode.jm_device.health_check
|
1303
1132
|
})
|
1304
1133
|
|
1305
|
-
for device in snode.remote_devices:
|
1306
|
-
logger.debug(device)
|
1307
|
-
logger.debug("*" * 20)
|
1308
|
-
remote_devices.append({
|
1309
|
-
"UUID": device.uuid,
|
1310
|
-
"Name": device.device_name,
|
1311
|
-
"Size": utils.humanbytes(device.size),
|
1312
|
-
"Serial Number": device.serial_number,
|
1313
|
-
"Node ID": device.node_id,
|
1314
|
-
})
|
1315
1134
|
if sort and sort in ['node-seq', 'dev-seq', 'serial']:
|
1316
1135
|
if sort == 'serial':
|
1317
1136
|
sort_key = "Serial Number"
|
@@ -1320,20 +1139,13 @@ def list_storage_devices(kv_store, node_id, sort, is_json):
|
|
1320
1139
|
elif sort == 'node-seq':
|
1321
1140
|
# TODO: check this key
|
1322
1141
|
sort_key = "Sequential Number"
|
1323
|
-
|
1142
|
+
sorted_data = sorted(data, key=lambda d: d[sort_key])
|
1143
|
+
data = sorted_data
|
1324
1144
|
|
1325
|
-
data = {
|
1326
|
-
"Storage Devices": storage_devices,
|
1327
|
-
"JM Devices": jm_devices,
|
1328
|
-
"Remote Devices": remote_devices,
|
1329
|
-
}
|
1330
1145
|
if is_json:
|
1331
1146
|
return json.dumps(data, indent=2)
|
1332
1147
|
else:
|
1333
|
-
|
1334
|
-
for d in data:
|
1335
|
-
out += f"{d}\n{utils.print_table(data[d])}\n\n"
|
1336
|
-
return out
|
1148
|
+
return utils.print_table(data)
|
1337
1149
|
|
1338
1150
|
|
1339
1151
|
def shutdown_storage_node(node_id, force=False):
|
@@ -1374,7 +1186,7 @@ def shutdown_storage_node(node_id, force=False):
|
|
1374
1186
|
for dev in snode.nvme_devices:
|
1375
1187
|
if dev.status in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY]:
|
1376
1188
|
device_controller.device_set_unavailable(dev.get_id())
|
1377
|
-
distr_controller.send_node_status_event(snode,
|
1189
|
+
distr_controller.send_node_status_event(snode.get_id(), "in_shutdown")
|
1378
1190
|
|
1379
1191
|
# shutdown node
|
1380
1192
|
# make other nodes disconnect from this node
|
@@ -1394,7 +1206,7 @@ def shutdown_storage_node(node_id, force=False):
|
|
1394
1206
|
snode_api = SNodeClient(snode.api_endpoint)
|
1395
1207
|
results, err = snode_api.spdk_process_kill()
|
1396
1208
|
|
1397
|
-
distr_controller.send_node_status_event(snode, StorageNode.STATUS_OFFLINE)
|
1209
|
+
distr_controller.send_node_status_event(snode.get_id(), StorageNode.STATUS_OFFLINE)
|
1398
1210
|
|
1399
1211
|
logger.info("Setting node status to offline")
|
1400
1212
|
snode = db_controller.get_storage_node_by_id(node_id)
|
@@ -1421,24 +1233,22 @@ def suspend_storage_node(node_id, force=False):
|
|
1421
1233
|
return False
|
1422
1234
|
|
1423
1235
|
cluster = db_controller.get_cluster_by_id(snode.cluster_id)
|
1424
|
-
snodes = db_controller.
|
1236
|
+
snodes = db_controller.get_storage_nodes()
|
1425
1237
|
online_nodes = 0
|
1426
1238
|
for node in snodes:
|
1427
1239
|
if node.status == node.STATUS_ONLINE:
|
1428
1240
|
online_nodes += 1
|
1429
|
-
|
1430
|
-
|
1431
|
-
if
|
1432
|
-
logger.warning(f"Cluster mode is HA but online storage nodes are less than 3")
|
1433
|
-
if force is False:
|
1434
|
-
return False
|
1435
|
-
|
1436
|
-
if cluster.status == cluster.STATUS_DEGRADED and force is False:
|
1437
|
-
logger.warning(f"Cluster status is degraded, use --force but this will suspend the cluster")
|
1241
|
+
if cluster.ha_type == "ha" and online_nodes <= 3 and cluster.status == cluster.STATUS_ACTIVE:
|
1242
|
+
logger.warning(f"Cluster mode is HA but online storage nodes are less than 3")
|
1243
|
+
if force is False:
|
1438
1244
|
return False
|
1439
1245
|
|
1246
|
+
if cluster.ha_type == "ha" and cluster.status == cluster.STATUS_DEGRADED and force is False:
|
1247
|
+
logger.warning(f"Cluster status is degraded, use --force but this will suspend the cluster")
|
1248
|
+
return False
|
1249
|
+
|
1440
1250
|
logger.info("Suspending node")
|
1441
|
-
distr_controller.send_node_status_event(snode,
|
1251
|
+
distr_controller.send_node_status_event(snode.get_id(), "suspended")
|
1442
1252
|
for dev in snode.nvme_devices:
|
1443
1253
|
if dev.status == NVMeDevice.STATUS_ONLINE:
|
1444
1254
|
device_controller.device_set_unavailable(dev.get_id())
|
@@ -1482,7 +1292,7 @@ def resume_storage_node(node_id):
|
|
1482
1292
|
logger.info("Resuming node")
|
1483
1293
|
|
1484
1294
|
logger.info("Sending cluster event updates")
|
1485
|
-
distr_controller.send_node_status_event(snode,
|
1295
|
+
distr_controller.send_node_status_event(snode.get_id(), "online")
|
1486
1296
|
|
1487
1297
|
for dev in snode.nvme_devices:
|
1488
1298
|
if dev.status == NVMeDevice.STATUS_UNAVAILABLE:
|
@@ -1858,6 +1668,7 @@ def deploy_cleaner():
|
|
1858
1668
|
return True
|
1859
1669
|
|
1860
1670
|
|
1671
|
+
|
1861
1672
|
def get_host_secret(node_id):
|
1862
1673
|
db_controller = DBController()
|
1863
1674
|
node = db_controller.get_storage_node_by_id(node_id)
|
@@ -2020,7 +1831,7 @@ def set_node_status(node_id, status):
|
|
2020
1831
|
snode.updated_at = str(datetime.datetime.now())
|
2021
1832
|
snode.write_to_db(db_controller.kv_store)
|
2022
1833
|
storage_events.snode_status_change(snode, snode.status, old_status, caused_by="monitor")
|
2023
|
-
distr_controller.send_node_status_event(snode, status)
|
1834
|
+
distr_controller.send_node_status_event(snode.get_id(), status)
|
2024
1835
|
|
2025
1836
|
if snode.status == StorageNode.STATUS_ONLINE:
|
2026
1837
|
logger.info("Connecting to remote devices")
|