sbcli-pre 1.2.5__zip → 1.2.7__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/PKG-INFO +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/env_var +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/PKG-INFO +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/SOURCES.txt +5 -3
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_cli/cli.py +138 -136
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/cluster_ops.py +138 -235
- sbcli_pre-1.2.7/simplyblock_core/constants.py +91 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/caching_node_controller.py +8 -6
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/cluster_events.py +9 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/device_controller.py +56 -63
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/events_controller.py +5 -3
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/health_controller.py +30 -40
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/lvol_controller.py +75 -39
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/pool_controller.py +8 -4
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/snapshot_controller.py +36 -3
- sbcli_pre-1.2.7/simplyblock_core/controllers/tasks_controller.py +103 -0
- sbcli_pre-1.2.7/simplyblock_core/controllers/tasks_events.py +37 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/distr_controller.py +13 -9
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/kv_store.py +62 -20
- sbcli_pre-1.2.7/simplyblock_core/mgmt_node_ops.py +205 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/events.py +9 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/job_schedule.py +6 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/nvme_device.py +42 -4
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/storage_node.py +14 -2
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/rpc_client.py +55 -10
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/__init__.py +0 -4
- sbcli_pre-1.2.5/simplyblock_core/scripts/alerting/alert_resources.yaml → sbcli_pre-1.2.7/simplyblock_core/scripts/alerting/alert_resources.yaml.j2 +54 -5
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/cluster.json +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/deploy_stack.sh +9 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/docker-compose-swarm-monitoring.yml +32 -15
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/docker-compose-swarm.yml +17 -2
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/haproxy.cfg +15 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/install_deps.sh +3 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/stack_deploy_wait.sh +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/capacity_and_stats_collector.py +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/device_monitor.py +5 -46
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/distr_event_collector.py +10 -11
- sbcli_pre-1.2.7/simplyblock_core/services/health_check_service.py +134 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/lvol_monitor.py +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/lvol_stat_collector.py +1 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/port_stat_collector.py +0 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/storage_node_monitor.py +49 -44
- sbcli_pre-1.2.7/simplyblock_core/services/tasks_runner_migration.py +61 -0
- sbcli_pre-1.2.5/simplyblock_core/services/job_tasks.py → sbcli_pre-1.2.7/simplyblock_core/services/tasks_runner_restart.py +95 -46
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/snode_client.py +12 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/storage_node_ops.py +630 -358
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/utils.py +126 -1
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/snode_ops.py +103 -25
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_cluster.py +20 -43
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_device.py +10 -7
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_lvol.py +9 -5
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_pool.py +14 -5
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_storage_node.py +15 -15
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/node_utils.py +0 -2
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/utils.py +8 -0
- sbcli_pre-1.2.5/simplyblock_core/constants.py +0 -65
- sbcli_pre-1.2.5/simplyblock_core/mgmt_node_ops.py +0 -80
- sbcli_pre-1.2.5/simplyblock_core/scripts/apply_dashboard.sh +0 -22
- sbcli_pre-1.2.5/simplyblock_core/services/health_check_service.py +0 -136
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/README.md +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/pyproject.toml +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/dependency_links.txt +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/entry_points.txt +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/requires.txt +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/top_level.txt +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/setup.cfg +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/setup.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_cli/main.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/__init__.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/cnode_client.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/compute_node_ops.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/__init__.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/device_events.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/lvol_events.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/mgmt_events.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/pool_events.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/snapshot_events.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/storage_events.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/__init__.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/base_model.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/caching_node.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/cluster.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/compute_node.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/deployer.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/global_settings.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/iface.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/lvol_model.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/mgmt_node.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/pool.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/port_stat.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/snapshot.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/stats.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/pci_utils.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/alerting/alert_rules.yaml +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/clean_local_storage_deploy.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/config_docker.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/devices.json +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/lvols.json +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/node-exporter.json +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/nodes.json +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/pools.json +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/datasource.yml +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/db_config_double.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/db_config_single.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/prometheus.yml +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/run_ssh.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/set_db_config.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/__init__.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/caching_node_monitor.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/cap_monitor.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/install_service.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/log_agg_service.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/mgmt_node_monitor.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/remove_service.sh +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/service_template.service +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/shell_utils.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/__init__.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/app.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/auth_middleware.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/__init__.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/caching_node_ops.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/caching_node_ops_k8s.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/node_api_basic.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/node_api_caching_docker.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/node_api_caching_ks.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_caching_node.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_deployer.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_mgmt_node.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_snapshot.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/caching_node_app.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/caching_node_app_k8s.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/node_webapp.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/snode_app.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/delete.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/deploy.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/deploy_cnode.yaml +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/deploy_spdk.yaml +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/is_up.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/list_deps.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/rpac.yaml +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/tst.py +0 -0
- {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/templates/deploy_spdk.yaml.j2 +0 -0
@@ -1,8 +1,8 @@
|
|
1
1
|
import time
|
2
2
|
import logging
|
3
3
|
|
4
|
-
from simplyblock_core import distr_controller, utils
|
5
|
-
from simplyblock_core.controllers import device_events, lvol_controller
|
4
|
+
from simplyblock_core import distr_controller, utils, storage_node_ops
|
5
|
+
from simplyblock_core.controllers import device_events, lvol_controller, tasks_controller
|
6
6
|
from simplyblock_core.kv_store import DBController
|
7
7
|
from simplyblock_core.models.nvme_device import NVMeDevice
|
8
8
|
from simplyblock_core.rpc_client import RPCClient
|
@@ -16,6 +16,7 @@ def device_set_state(device_id, state):
|
|
16
16
|
dev = db_controller.get_storage_devices(device_id)
|
17
17
|
if not dev:
|
18
18
|
logger.error("device not found")
|
19
|
+
return False
|
19
20
|
|
20
21
|
snode = db_controller.get_storage_node_by_id(dev.node_id)
|
21
22
|
if not snode:
|
@@ -35,7 +36,7 @@ def device_set_state(device_id, state):
|
|
35
36
|
|
36
37
|
old_status = dev.status
|
37
38
|
device.status = state
|
38
|
-
distr_controller.send_dev_status_event(device
|
39
|
+
distr_controller.send_dev_status_event(device, device.status)
|
39
40
|
snode.write_to_db(db_controller.kv_store)
|
40
41
|
device_events.device_status_change(device, device.status, old_status)
|
41
42
|
return True
|
@@ -78,14 +79,20 @@ def device_set_read_only(device_id):
|
|
78
79
|
|
79
80
|
|
80
81
|
def device_set_online(device_id):
|
81
|
-
|
82
|
+
ret = device_set_state(device_id, NVMeDevice.STATUS_ONLINE)
|
83
|
+
if ret:
|
84
|
+
logger.info("Adding task to device data migration")
|
85
|
+
task_id = tasks_controller.add_device_mig_task(device_id)
|
86
|
+
if task_id:
|
87
|
+
logger.info(f"Task id: {task_id}")
|
88
|
+
return ret
|
82
89
|
|
83
90
|
|
84
91
|
def get_alceml_name(alceml_id):
|
85
92
|
return f"alceml_{alceml_id}"
|
86
93
|
|
87
94
|
|
88
|
-
def _def_create_device_stack(device_obj, snode):
|
95
|
+
def _def_create_device_stack(device_obj, snode, force=False):
|
89
96
|
|
90
97
|
rpc_client = RPCClient(
|
91
98
|
snode.mgmt_ip, snode.rpc_port,
|
@@ -98,22 +105,26 @@ def _def_create_device_stack(device_obj, snode):
|
|
98
105
|
ret = rpc_client.bdev_passtest_create(test_name, device_obj.nvme_bdev)
|
99
106
|
if not ret:
|
100
107
|
logger.error(f"Failed to create bdev: {test_name}")
|
101
|
-
|
108
|
+
if not force:
|
109
|
+
return False
|
102
110
|
|
103
111
|
alceml_id = device_obj.get_id()
|
104
112
|
alceml_name = get_alceml_name(alceml_id)
|
105
113
|
logger.info(f"adding {alceml_name}")
|
106
|
-
ret = rpc_client.bdev_alceml_create(alceml_name, test_name, alceml_id, pba_init_mode=2
|
114
|
+
ret = rpc_client.bdev_alceml_create(alceml_name, test_name, alceml_id, pba_init_mode=2,
|
115
|
+
dev_cpu_mask=snode.dev_cpu_mask)
|
107
116
|
if not ret:
|
108
117
|
logger.error(f"Failed to create alceml bdev: {alceml_name}")
|
109
|
-
|
118
|
+
if not force:
|
119
|
+
return False
|
110
120
|
|
111
121
|
# add pass through
|
112
122
|
pt_name = f"{alceml_name}_PT"
|
113
123
|
ret = rpc_client.bdev_PT_NoExcl_create(pt_name, alceml_name)
|
114
124
|
if not ret:
|
115
125
|
logger.error(f"Failed to create pt noexcl bdev: {pt_name}")
|
116
|
-
|
126
|
+
if not force:
|
127
|
+
return False
|
117
128
|
|
118
129
|
subsystem_nqn = snode.subsystem + ":dev:" + alceml_id
|
119
130
|
logger.info("Creating subsystem %s", subsystem_nqn)
|
@@ -137,11 +148,13 @@ def _def_create_device_stack(device_obj, snode):
|
|
137
148
|
logger.info(f"Adding {pt_name} to the subsystem")
|
138
149
|
ret = rpc_client.nvmf_subsystem_add_ns(subsystem_nqn, pt_name)
|
139
150
|
|
140
|
-
if device_obj.jm_bdev:
|
141
|
-
ret = rpc_client.bdev_jm_create(device_obj.jm_bdev, device_obj.alceml_bdev
|
151
|
+
if hasattr(device_obj, 'jm_bdev') and device_obj.jm_bdev:
|
152
|
+
ret = rpc_client.bdev_jm_create(device_obj.jm_bdev, device_obj.alceml_bdev,
|
153
|
+
dev_cpu_mask=snode.dev_cpu_mask)
|
142
154
|
if not ret:
|
143
|
-
logger.error(f"Failed to create bdev: {device_obj.jm_bdev}")
|
144
|
-
|
155
|
+
logger.error(f"Failed to create jm bdev: {device_obj.jm_bdev}")
|
156
|
+
if not force:
|
157
|
+
return False
|
145
158
|
|
146
159
|
device_obj.testing_bdev = test_name
|
147
160
|
device_obj.alceml_bdev = alceml_name
|
@@ -174,23 +187,15 @@ def restart_device(device_id, force=False):
|
|
174
187
|
device_obj = dev
|
175
188
|
break
|
176
189
|
|
177
|
-
device_obj.status = 'restarting'
|
178
|
-
snode.write_to_db(db_controller.kv_store)
|
179
|
-
|
180
190
|
logger.info(f"Restarting device {device_id}")
|
191
|
+
device_set_unavailable(device_id)
|
181
192
|
|
182
|
-
ret = _def_create_device_stack(device_obj, snode)
|
193
|
+
ret = _def_create_device_stack(device_obj, snode, force=force)
|
183
194
|
|
184
195
|
if not ret:
|
185
196
|
logger.error("Failed to create device stack")
|
186
|
-
|
187
|
-
|
188
|
-
return False
|
189
|
-
|
190
|
-
device_obj.io_error = False
|
191
|
-
device_obj.retries_exhausted = False
|
192
|
-
device_obj.status = NVMeDevice.STATUS_ONLINE
|
193
|
-
snode.write_to_db(db_controller.kv_store)
|
197
|
+
if not force:
|
198
|
+
return False
|
194
199
|
|
195
200
|
logger.info("Make other nodes connect to the device")
|
196
201
|
snodes = db_controller.get_storage_nodes()
|
@@ -221,10 +226,11 @@ def restart_device(device_id, force=False):
|
|
221
226
|
node.write_to_db(db_controller.kv_store)
|
222
227
|
time.sleep(3)
|
223
228
|
|
224
|
-
logger.info("
|
225
|
-
|
229
|
+
logger.info("Setting device io_error to False")
|
230
|
+
device_set_io_error(device_id, False)
|
231
|
+
logger.info("Setting device online")
|
232
|
+
device_set_online(device_id)
|
226
233
|
device_events.device_restarted(device_obj)
|
227
|
-
|
228
234
|
return "Done"
|
229
235
|
|
230
236
|
|
@@ -267,15 +273,8 @@ def device_remove(device_id, force=True):
|
|
267
273
|
device = dev
|
268
274
|
break
|
269
275
|
|
270
|
-
if device.jm_bdev:
|
271
|
-
if snode.lvols:
|
272
|
-
logger.error(f"Failed to remove device: {device.get_id()}, "
|
273
|
-
f"there are LVols that uses JM from this device, delete LVol to continue")
|
274
|
-
# if not force:
|
275
|
-
return False
|
276
|
-
|
277
276
|
logger.info("Sending device event")
|
278
|
-
distr_controller.send_dev_status_event(device
|
277
|
+
distr_controller.send_dev_status_event(device, NVMeDevice.STATUS_REMOVED)
|
279
278
|
|
280
279
|
logger.info("Disconnecting device from all nodes")
|
281
280
|
distr_controller.disconnect_device(device)
|
@@ -291,13 +290,6 @@ def device_remove(device_id, force=True):
|
|
291
290
|
if not force:
|
292
291
|
return False
|
293
292
|
|
294
|
-
if device.jm_bdev:
|
295
|
-
ret = rpc_client.bdev_jm_delete(f"jm_{snode.get_id()}")
|
296
|
-
if not ret:
|
297
|
-
logger.error(f"Failed to remove journal manager: jm_{snode.get_id()}")
|
298
|
-
if not force:
|
299
|
-
return False
|
300
|
-
|
301
293
|
logger.info("Removing device bdevs")
|
302
294
|
ret = rpc_client.bdev_PT_NoExcl_delete(f"{device.alceml_bdev}_PT")
|
303
295
|
if not ret:
|
@@ -405,19 +397,12 @@ def get_device_iostats(device_id, history, records_count=20, parse_sizes=True):
|
|
405
397
|
|
406
398
|
def reset_storage_device(dev_id):
|
407
399
|
db_controller = DBController()
|
408
|
-
device =
|
409
|
-
snode = None
|
410
|
-
for node in db_controller.get_storage_nodes():
|
411
|
-
for dev in node.nvme_devices:
|
412
|
-
if dev.get_id() == dev_id:
|
413
|
-
device = dev
|
414
|
-
snode = node
|
415
|
-
break
|
416
|
-
|
400
|
+
device = db_controller.get_storage_devices(dev_id)
|
417
401
|
if not device:
|
418
402
|
logger.error(f"Device not found: {dev_id}")
|
419
403
|
return False
|
420
404
|
|
405
|
+
snode = db_controller.get_storage_node_by_id(device.node_id)
|
421
406
|
if not snode:
|
422
407
|
logger.error(f"Node not found {device.node_id}")
|
423
408
|
return False
|
@@ -426,30 +411,38 @@ def reset_storage_device(dev_id):
|
|
426
411
|
logger.error(f"Device status: {device.status} is removed")
|
427
412
|
return False
|
428
413
|
|
429
|
-
logger.info("Setting
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
414
|
+
logger.info("Setting devices to unavailable")
|
415
|
+
device_set_unavailable(dev_id)
|
416
|
+
devs = []
|
417
|
+
for dev in snode.nvme_devices:
|
418
|
+
if dev.get_id() == device.get_id():
|
419
|
+
continue
|
420
|
+
if dev.status == NVMeDevice.STATUS_ONLINE and dev.physical_label == device.physical_label:
|
421
|
+
devs.append(dev)
|
422
|
+
device_set_unavailable(dev.get_id())
|
435
423
|
|
436
424
|
logger.info("Resetting device")
|
437
425
|
rpc_client = RPCClient(
|
438
426
|
snode.mgmt_ip, snode.rpc_port,
|
439
427
|
snode.rpc_username, snode.rpc_password)
|
440
428
|
|
441
|
-
controller_name = device.
|
429
|
+
controller_name = device.nvme_controller
|
442
430
|
response = rpc_client.reset_device(controller_name)
|
443
431
|
if not response:
|
444
432
|
logger.error(f"Failed to reset NVMe BDev {controller_name}")
|
445
433
|
return False
|
434
|
+
time.sleep(3)
|
446
435
|
|
447
|
-
|
448
|
-
|
449
|
-
|
436
|
+
logger.info("Setting devices online")
|
437
|
+
for dev in devs:
|
438
|
+
device_set_online(dev.get_id())
|
450
439
|
|
451
|
-
|
440
|
+
# set io_error flag False
|
441
|
+
device_set_io_error(dev_id, False)
|
442
|
+
device_set_retries_exhausted(dev_id, False)
|
443
|
+
# set device to online
|
452
444
|
device_set_online(dev_id)
|
445
|
+
device_events.device_reset(device)
|
453
446
|
return True
|
454
447
|
|
455
448
|
|
@@ -40,7 +40,7 @@ def log_distr_event(cluster_id, node_id, event_dict):
|
|
40
40
|
ds.uuid = str(uuid.uuid4())
|
41
41
|
ds.cluster_uuid = cluster_id
|
42
42
|
ds.node_id = node_id
|
43
|
-
ds.date =
|
43
|
+
ds.date = round(time.time()*1000)
|
44
44
|
ds.domain = DOMAIN_DISTR
|
45
45
|
ds.event_level = EventObj.LEVEL_ERROR
|
46
46
|
ds.caused_by = CAUSED_BY_MONITOR
|
@@ -66,7 +66,7 @@ def log_distr_event(cluster_id, node_id, event_dict):
|
|
66
66
|
|
67
67
|
|
68
68
|
def log_event_cluster(cluster_id, domain, event, db_object, caused_by, message,
|
69
|
-
node_id=None, event_level=EventObj.LEVEL_INFO):
|
69
|
+
node_id=None, event_level=EventObj.LEVEL_INFO, status=None):
|
70
70
|
"""
|
71
71
|
uuid:
|
72
72
|
cluster_uuid: 1234
|
@@ -83,7 +83,7 @@ def log_event_cluster(cluster_id, domain, event, db_object, caused_by, message,
|
|
83
83
|
ds = EventObj()
|
84
84
|
ds.uuid = str(uuid.uuid4())
|
85
85
|
ds.cluster_uuid = cluster_id
|
86
|
-
ds.date =
|
86
|
+
ds.date = round(time.time()*1000)
|
87
87
|
ds.node_id = node_id
|
88
88
|
ds.event_level = event_level
|
89
89
|
|
@@ -93,12 +93,14 @@ def log_event_cluster(cluster_id, domain, event, db_object, caused_by, message,
|
|
93
93
|
ds.object_dict = db_object.get_clean_dict()
|
94
94
|
ds.caused_by = caused_by
|
95
95
|
ds.message = message
|
96
|
+
ds.status = status
|
96
97
|
|
97
98
|
log_event_based_on_level(cluster_id, event, db_object.name, message, caused_by, event_level)
|
98
99
|
|
99
100
|
db_controller = DBController()
|
100
101
|
ds.write_to_db(db_controller.kv_store)
|
101
102
|
|
103
|
+
|
102
104
|
def log_event_based_on_level(cluster_id, event, db_object, message, caused_by, event_level):
|
103
105
|
json_str = json.dumps({
|
104
106
|
"cluster_id": cluster_id,
|
@@ -16,11 +16,13 @@ logger = log.getLogger()
|
|
16
16
|
|
17
17
|
def check_cluster(cluster_id):
|
18
18
|
db_controller = DBController()
|
19
|
-
st = db_controller.
|
19
|
+
st = db_controller.get_storage_nodes_by_cluster_id(cluster_id)
|
20
20
|
data = []
|
21
|
+
result = True
|
21
22
|
for node in st:
|
22
23
|
# check if node is online, unavailable, restarting
|
23
24
|
ret = check_node(node.get_id(), with_devices=False)
|
25
|
+
result &= ret
|
24
26
|
print("*"*100)
|
25
27
|
data.append({
|
26
28
|
"Kind": "Node",
|
@@ -28,30 +30,32 @@ def check_cluster(cluster_id):
|
|
28
30
|
"Status": "ok" if ret else "failed"
|
29
31
|
})
|
30
32
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
"
|
45
|
-
|
46
|
-
|
47
|
-
|
33
|
+
for device in node.nvme_devices:
|
34
|
+
ret = check_device(device.get_id())
|
35
|
+
result &= ret
|
36
|
+
print("*" * 100)
|
37
|
+
data.append({
|
38
|
+
"Kind": "Device",
|
39
|
+
"UUID": device.get_id(),
|
40
|
+
"Status": "ok" if ret else "failed"
|
41
|
+
})
|
42
|
+
|
43
|
+
for lvol in db_controller.get_lvols(cluster_id):
|
44
|
+
ret = check_lvol(lvol.get_id())
|
45
|
+
result &= ret
|
46
|
+
print("*" * 100)
|
47
|
+
data.append({
|
48
|
+
"Kind": "LVol",
|
49
|
+
"UUID": lvol.get_id(),
|
50
|
+
"Status": "ok" if ret else "failed"
|
51
|
+
})
|
48
52
|
print(utils.print_table(data))
|
49
|
-
return
|
53
|
+
return result
|
50
54
|
|
51
55
|
|
52
56
|
def _check_node_docker_api(ip):
|
53
57
|
try:
|
54
|
-
node_docker = docker.DockerClient(base_url=f"tcp://{ip}:2375", version="auto")
|
58
|
+
node_docker = docker.DockerClient(base_url=f"tcp://{ip}:2375", version="auto", timeout=3)
|
55
59
|
ret = node_docker.info()
|
56
60
|
if ret:
|
57
61
|
logger.debug(ret)
|
@@ -65,7 +69,7 @@ def _check_node_rpc(rpc_ip, rpc_port, rpc_username, rpc_password):
|
|
65
69
|
try:
|
66
70
|
rpc_client = RPCClient(
|
67
71
|
rpc_ip, rpc_port, rpc_username, rpc_password,
|
68
|
-
timeout=
|
72
|
+
timeout=10, retry=1)
|
69
73
|
ret = rpc_client.get_version()
|
70
74
|
if ret:
|
71
75
|
logger.debug(f"SPDK version: {ret['version']}")
|
@@ -167,7 +171,7 @@ def check_node(node_id, with_devices=True):
|
|
167
171
|
|
168
172
|
def check_device(device_id):
|
169
173
|
db_controller = DBController()
|
170
|
-
device = db_controller.
|
174
|
+
device = db_controller.get_storage_device_by_id(device_id)
|
171
175
|
if not device:
|
172
176
|
logger.error("device not found")
|
173
177
|
return False
|
@@ -192,8 +196,8 @@ def check_device(device_id):
|
|
192
196
|
snode.rpc_username, snode.rpc_password)
|
193
197
|
|
194
198
|
bdevs_stack = [device.nvme_bdev, device.testing_bdev, device.alceml_bdev, device.pt_bdev]
|
195
|
-
if device.jm_bdev:
|
196
|
-
|
199
|
+
# if device.jm_bdev:
|
200
|
+
# bdevs_stack.append(device.jm_bdev)
|
197
201
|
logger.info(f"Checking Device: {device_id}, status:{device.status}")
|
198
202
|
problems = 0
|
199
203
|
for bdev in bdevs_stack:
|
@@ -231,7 +235,7 @@ def check_device(device_id):
|
|
231
235
|
|
232
236
|
def check_remote_device(device_id):
|
233
237
|
db_controller = DBController()
|
234
|
-
device = db_controller.
|
238
|
+
device = db_controller.get_storage_device_by_id(device_id)
|
235
239
|
if not device:
|
236
240
|
logger.error("device not found")
|
237
241
|
return False
|
@@ -241,7 +245,7 @@ def check_remote_device(device_id):
|
|
241
245
|
return False
|
242
246
|
|
243
247
|
result = True
|
244
|
-
for node in db_controller.
|
248
|
+
for node in db_controller.get_storage_nodes_by_cluster_id(snode.cluster_id):
|
245
249
|
if node.status == StorageNode.STATUS_ONLINE:
|
246
250
|
if node.get_id() == snode.get_id():
|
247
251
|
continue
|
@@ -306,20 +310,6 @@ def check_lvol_on_node(lvol_id, node_id):
|
|
306
310
|
logger.exception(e)
|
307
311
|
return False
|
308
312
|
|
309
|
-
# check ndcs+npcs <= online devices
|
310
|
-
# then change its status to offline if fails this check
|
311
|
-
online_devices = 0
|
312
|
-
for node in db_controller.get_storage_nodes():
|
313
|
-
for dev in node.nvme_devices:
|
314
|
-
if dev.status == dev.STATUS_ONLINE:
|
315
|
-
online_devices += 1
|
316
|
-
|
317
|
-
# if lvol.ndcs + lvol.npcs < online_devices:
|
318
|
-
# logger.info(f"Checking Distr ndcs+npcs: {lvol.ndcs}+{lvol.npcs}, online devices: {online_devices} ... ok")
|
319
|
-
# else:
|
320
|
-
# logger.info(f"Checking Distr ndcs+npcs: {lvol.ndcs}+{lvol.npcs}, online devices: {online_devices} ... failed")
|
321
|
-
# passed = False
|
322
|
-
|
323
313
|
return passed
|
324
314
|
|
325
315
|
|