sbcli-pre 1.1.3__zip → 1.1.5__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/PKG-INFO +1 -1
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/env_var +1 -1
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/sbcli_pre.egg-info/PKG-INFO +1 -1
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/sbcli_pre.egg-info/SOURCES.txt +1 -1
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_cli/cli.py +7 -12
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/cluster_ops.py +45 -2
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/constants.py +1 -1
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/health_controller.py +1 -1
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/rpc_client.py +5 -4
- sbcli_pre-1.1.3/simplyblock_core/scripts/alerting/alert_resources.yaml → sbcli_pre-1.1.5/simplyblock_core/scripts/alerting/alert_resources.yaml.j2 +54 -5
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/distr_event_collector.py +7 -8
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/health_check_service.py +1 -1
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/storage_node_monitor.py +3 -3
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/tasks_runner_restart.py +1 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/storage_node_ops.py +0 -134
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/README.md +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/pyproject.toml +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/sbcli_pre.egg-info/dependency_links.txt +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/sbcli_pre.egg-info/entry_points.txt +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/sbcli_pre.egg-info/requires.txt +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/sbcli_pre.egg-info/top_level.txt +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/setup.cfg +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/setup.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_cli/main.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/__init__.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/cnode_client.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/compute_node_ops.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/__init__.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/caching_node_controller.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/cluster_events.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/device_controller.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/device_events.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/events_controller.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/lvol_controller.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/lvol_events.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/mgmt_events.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/pool_controller.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/pool_events.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/snapshot_controller.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/snapshot_events.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/storage_events.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/tasks_controller.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/tasks_events.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/distr_controller.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/kv_store.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/mgmt_node_ops.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/__init__.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/base_model.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/caching_node.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/cluster.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/compute_node.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/events.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/global_settings.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/iface.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/job_schedule.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/lvol_model.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/mgmt_node.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/nvme_device.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/pool.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/port_stat.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/snapshot.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/stats.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/storage_node.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/pci_utils.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/__init__.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/alerting/alert_rules.yaml +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/clean_local_storage_deploy.sh +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/config_docker.sh +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/dashboards/cluster.json +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/dashboards/devices.json +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/dashboards/lvols.json +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/dashboards/node-exporter.json +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/dashboards/nodes.json +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/dashboards/pools.json +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/datasource.yml +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/db_config_double.sh +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/db_config_single.sh +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/deploy_stack.sh +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/docker-compose-swarm-monitoring.yml +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/docker-compose-swarm.yml +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/haproxy.cfg +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/install_deps.sh +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/prometheus.yml +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/run_ssh.sh +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/set_db_config.sh +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/stack_deploy_wait.sh +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/__init__.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/caching_node_monitor.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/cap_monitor.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/capacity_and_stats_collector.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/device_monitor.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/install_service.sh +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/log_agg_service.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/lvol_monitor.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/lvol_stat_collector.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/mgmt_node_monitor.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/port_stat_collector.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/remove_service.sh +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/service_template.service +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/tasks_runner_migration.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/shell_utils.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/snode_client.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/utils.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/__init__.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/app.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/auth_middleware.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/__init__.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/caching_node_ops.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/caching_node_ops_k8s.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/node_api_basic.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/node_api_caching_docker.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/node_api_caching_ks.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/snode_ops.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_caching_node.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_cluster.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_device.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_lvol.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_mgmt_node.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_pool.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_snapshot.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_storage_node.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/caching_node_app.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/caching_node_app_k8s.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/node_utils.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/node_webapp.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/snode_app.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/delete.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/deploy.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/deploy_cnode.yaml +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/deploy_spdk.yaml +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/is_up.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/list_deps.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/rpac.yaml +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/tst.py +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/templates/deploy_spdk.yaml.j2 +0 -0
- {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/utils.py +0 -0
@@ -73,7 +73,7 @@ simplyblock_core/scripts/prometheus.yml
|
|
73
73
|
simplyblock_core/scripts/run_ssh.sh
|
74
74
|
simplyblock_core/scripts/set_db_config.sh
|
75
75
|
simplyblock_core/scripts/stack_deploy_wait.sh
|
76
|
-
simplyblock_core/scripts/alerting/alert_resources.yaml
|
76
|
+
simplyblock_core/scripts/alerting/alert_resources.yaml.j2
|
77
77
|
simplyblock_core/scripts/alerting/alert_rules.yaml
|
78
78
|
simplyblock_core/scripts/dashboards/cluster.json
|
79
79
|
simplyblock_core/scripts/dashboards/devices.json
|
@@ -235,6 +235,10 @@ class CLIWrapper:
|
|
235
235
|
dest='log_del_interval', default='7d')
|
236
236
|
sub_command.add_argument("--metrics-retention-period", help='retention period for prometheus metrics, default: 7d',
|
237
237
|
dest='metrics_retention_period', default='7d')
|
238
|
+
sub_command.add_argument("--contact-point", help='the email or slack webhook url to be used for alerting',
|
239
|
+
dest='contact_point', default='')
|
240
|
+
sub_command.add_argument("--grafana-endpoint", help='the endpoint url for grafana',
|
241
|
+
dest='grafana_endpoint', default='')
|
238
242
|
|
239
243
|
# add cluster
|
240
244
|
sub_command = self.add_sub_command(subparser, 'add', 'Add new cluster')
|
@@ -626,9 +630,6 @@ class CLIWrapper:
|
|
626
630
|
elif sub_command == "deploy-cleaner":
|
627
631
|
ret = storage_ops.deploy_cleaner()
|
628
632
|
|
629
|
-
elif sub_command == "add":
|
630
|
-
ret = self.storage_node_add(args)
|
631
|
-
|
632
633
|
elif sub_command == "add-node":
|
633
634
|
cluster_id = args.cluster_id
|
634
635
|
node_ip = args.node_ip
|
@@ -1098,14 +1099,6 @@ class CLIWrapper:
|
|
1098
1099
|
out = storage_ops.list_storage_nodes(self.db_store, args.json)
|
1099
1100
|
return out
|
1100
1101
|
|
1101
|
-
def storage_node_add(self, args):
|
1102
|
-
cluster_id = args.cluster_id
|
1103
|
-
ifname = args.ifname
|
1104
|
-
data_nics = args.data_nics
|
1105
|
-
# TODO: Validate the inputs
|
1106
|
-
out = storage_ops.add_storage_node(cluster_id, ifname, data_nics)
|
1107
|
-
return out
|
1108
|
-
|
1109
1102
|
def storage_node_list_devices(self, args):
|
1110
1103
|
node_id = args.node_id
|
1111
1104
|
sort = args.sort
|
@@ -1137,11 +1130,13 @@ class CLIWrapper:
|
|
1137
1130
|
ifname = args.ifname
|
1138
1131
|
log_del_interval = args.log_del_interval
|
1139
1132
|
metrics_retention_period = args.metrics_retention_period
|
1133
|
+
contact_point = args.contact_point
|
1134
|
+
grafana_endpoint = args.grafana_endpoint
|
1140
1135
|
|
1141
1136
|
return cluster_ops.create_cluster(
|
1142
1137
|
blk_size, page_size_in_blocks,
|
1143
1138
|
CLI_PASS, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit,
|
1144
|
-
ifname, log_del_interval, metrics_retention_period)
|
1139
|
+
ifname, log_del_interval, metrics_retention_period, contact_point, grafana_endpoint)
|
1145
1140
|
|
1146
1141
|
def query_yes_no(self, question, default="yes"):
|
1147
1142
|
"""Ask a yes/no question via raw_input() and return their answer.
|
@@ -2,11 +2,16 @@
|
|
2
2
|
import json
|
3
3
|
import logging
|
4
4
|
import os
|
5
|
+
import re
|
6
|
+
import tempfile
|
7
|
+
import shutil
|
8
|
+
import subprocess
|
5
9
|
import time
|
6
10
|
import uuid
|
7
11
|
|
8
12
|
import docker
|
9
13
|
import requests
|
14
|
+
from jinja2 import Environment, FileSystemLoader
|
10
15
|
|
11
16
|
from simplyblock_core import utils, scripts, constants, mgmt_node_ops, storage_node_ops
|
12
17
|
from simplyblock_core.controllers import cluster_events, device_controller
|
@@ -16,7 +21,7 @@ from simplyblock_core.models.nvme_device import NVMeDevice
|
|
16
21
|
from simplyblock_core.models.storage_node import StorageNode
|
17
22
|
|
18
23
|
logger = logging.getLogger()
|
19
|
-
|
24
|
+
TOP_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
20
25
|
|
21
26
|
def _add_grafana_dashboards(username, password, cluster_ip):
|
22
27
|
url = f"http://{username}:{password}@{cluster_ip}/grafana/api/dashboards/import"
|
@@ -69,7 +74,8 @@ def _add_graylog_input(cluster_ip, password):
|
|
69
74
|
|
70
75
|
|
71
76
|
def create_cluster(blk_size, page_size_in_blocks, cli_pass,
|
72
|
-
cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, ifname, log_del_interval, metrics_retention_period
|
77
|
+
cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, ifname, log_del_interval, metrics_retention_period,
|
78
|
+
contact_point, grafana_endpoint):
|
73
79
|
logger.info("Installing dependencies...")
|
74
80
|
ret = scripts.install_deps()
|
75
81
|
logger.info("Installing dependencies > Done")
|
@@ -124,6 +130,43 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
|
|
124
130
|
if prov_cap_crit and prov_cap_crit > 0:
|
125
131
|
c.prov_cap_crit = prov_cap_crit
|
126
132
|
|
133
|
+
alerts_template_folder = os.path.join(TOP_DIR, "simplyblock_core/scripts/alerting/")
|
134
|
+
alert_resources_file = "alert_resources.yaml"
|
135
|
+
|
136
|
+
env = Environment(loader=FileSystemLoader(alerts_template_folder), trim_blocks=True, lstrip_blocks=True)
|
137
|
+
template = env.get_template(f'{alert_resources_file}.j2')
|
138
|
+
|
139
|
+
slack_pattern = re.compile(r"https://hooks\.slack\.com/services/\S+")
|
140
|
+
email_pattern = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
|
141
|
+
|
142
|
+
if slack_pattern.match(contact_point):
|
143
|
+
ALERT_TYPE = "slack"
|
144
|
+
elif email_pattern.match(contact_point):
|
145
|
+
ALERT_TYPE = "email"
|
146
|
+
else:
|
147
|
+
ALERT_TYPE = "slack"
|
148
|
+
|
149
|
+
values = {
|
150
|
+
'CONTACT_POINT': contact_point,
|
151
|
+
'GRAFANA_ENDPOINT': grafana_endpoint,
|
152
|
+
'ALERT_TYPE': ALERT_TYPE,
|
153
|
+
}
|
154
|
+
|
155
|
+
temp_dir = tempfile.mkdtemp()
|
156
|
+
|
157
|
+
temp_file_path = os.path.join(temp_dir, alert_resources_file)
|
158
|
+
with open(temp_file_path, 'w') as file:
|
159
|
+
file.write(template.render(values))
|
160
|
+
|
161
|
+
destination_file_path = os.path.join(alerts_template_folder, alert_resources_file)
|
162
|
+
try:
|
163
|
+
subprocess.run(['sudo', '-v'], check=True) # sudo -v checks if the current user has sudo permissions
|
164
|
+
subprocess.run(['sudo', 'mv', temp_file_path, destination_file_path], check=True)
|
165
|
+
print(f"File moved to {destination_file_path} successfully.")
|
166
|
+
except subprocess.CalledProcessError as e:
|
167
|
+
print(f"An error occurred: {e}")
|
168
|
+
shutil.rmtree(temp_dir)
|
169
|
+
|
127
170
|
logger.info("Deploying swarm stack ...")
|
128
171
|
ret = scripts.deploy_stack(cli_pass, DEV_IP, constants.SIMPLY_BLOCK_DOCKER_IMAGE, c.secret, c.uuid, log_del_interval, metrics_retention_period)
|
129
172
|
logger.info("Deploying swarm stack > Done")
|
@@ -20,7 +20,7 @@ DEV_MONITOR_INTERVAL_SEC = 10
|
|
20
20
|
DEV_STAT_COLLECTOR_INTERVAL_SEC = 2
|
21
21
|
PROT_STAT_COLLECTOR_INTERVAL_SEC = 2
|
22
22
|
DISTR_EVENT_COLLECTOR_INTERVAL_SEC = 2
|
23
|
-
DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS =
|
23
|
+
DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS = 10
|
24
24
|
CAP_MONITOR_INTERVAL_SEC = 30
|
25
25
|
SSD_VENDOR_WHITE_LIST = ["1d0f:cd01", "1d0f:cd00"]
|
26
26
|
|
@@ -69,7 +69,7 @@ def _check_node_rpc(rpc_ip, rpc_port, rpc_username, rpc_password):
|
|
69
69
|
try:
|
70
70
|
rpc_client = RPCClient(
|
71
71
|
rpc_ip, rpc_port, rpc_username, rpc_password,
|
72
|
-
timeout=
|
72
|
+
timeout=10, retry=1)
|
73
73
|
ret = rpc_client.get_version()
|
74
74
|
if ret:
|
75
75
|
logger.debug(f"SPDK version: {ret['version']}")
|
@@ -434,8 +434,9 @@ class RPCClient:
|
|
434
434
|
"trsvcid": str(port),
|
435
435
|
"subnqn": nqn,
|
436
436
|
"fabrics_connect_timeout_us": 100000,
|
437
|
-
"fast_io_fail_timeout_sec":
|
437
|
+
"fast_io_fail_timeout_sec": 1,
|
438
438
|
"num_io_queues": 16384,
|
439
|
+
"ctrlr_loss_timeout_sec": 2,
|
439
440
|
}
|
440
441
|
return self._request("bdev_nvme_attach_controller", params)
|
441
442
|
|
@@ -483,9 +484,9 @@ class RPCClient:
|
|
483
484
|
params = {
|
484
485
|
"bdev_retry_count": 0,
|
485
486
|
"transport_retry_count": 0,
|
486
|
-
"ctrlr_loss_timeout_sec":
|
487
|
-
"fast_io_fail_timeout_sec":
|
488
|
-
"reconnect_delay_sec":
|
487
|
+
"ctrlr_loss_timeout_sec": 2,
|
488
|
+
"fast_io_fail_timeout_sec": 1,
|
489
|
+
"reconnect_delay_sec": 1,
|
489
490
|
"keep_alive_timeout_ms": 200,
|
490
491
|
"transport_ack_timeout": 7,
|
491
492
|
"timeout_us": 100000
|
@@ -12,15 +12,26 @@ contactPoints:
|
|
12
12
|
name: grafana-alerts
|
13
13
|
receivers:
|
14
14
|
- uid: grafana
|
15
|
-
type:
|
15
|
+
type: {{ ALERT_TYPE }}
|
16
|
+
{% if ALERT_TYPE == 'slack' %}
|
16
17
|
settings:
|
17
18
|
username: grafana_bot
|
18
|
-
url: '
|
19
|
+
url: '{{ CONTACT_POINT }}'
|
19
20
|
title: |
|
20
|
-
{{ template "slack.title" . }}
|
21
|
+
{{ '{{' }} template "slack.title" . {{ '}}' }}
|
21
22
|
text: |
|
22
|
-
{{ template "slack.message" . }}
|
23
|
+
{{ '{{' }} template "slack.message" . {{ '}}' }}
|
24
|
+
{% else %}
|
25
|
+
settings:
|
26
|
+
addresses: '{{ CONTACT_POINT }}'
|
27
|
+
subject: |
|
28
|
+
{{ '{{' }} template "email.subject" . {{ '}}' }}
|
29
|
+
body: |
|
30
|
+
{{ '{{' }} template "email.body" . {{ '}}' }}
|
31
|
+
{% endif %}
|
23
32
|
|
33
|
+
{% if ALERT_TYPE == 'slack' %}
|
34
|
+
{% raw %}
|
24
35
|
templates:
|
25
36
|
- orgId: 1
|
26
37
|
name: slack.title
|
@@ -38,7 +49,9 @@ templates:
|
|
38
49
|
*Description*: {{ .Annotations.description }}
|
39
50
|
{{ end -}}
|
40
51
|
*Log message*: {{ index .Labels "message" }}
|
41
|
-
|
52
|
+
{% endraw %}
|
53
|
+
*Explore logs:* {{ GRAFANA_ENDPOINT }}
|
54
|
+
{% raw %}
|
42
55
|
{{ if .DashboardURL -}}
|
43
56
|
*Go to dashboard:* {{ .DashboardURL }}
|
44
57
|
{{- end }}
|
@@ -65,3 +78,39 @@ templates:
|
|
65
78
|
{{ end }}
|
66
79
|
|
67
80
|
{{- end }}
|
81
|
+
{% endraw %}
|
82
|
+
{% else %}
|
83
|
+
{% raw %}
|
84
|
+
- orgId: 1
|
85
|
+
name: email.subject
|
86
|
+
template: |-
|
87
|
+
{{ define "email.subject" -}}
|
88
|
+
[{{ .Status | toUpper }}] Grafana Alert
|
89
|
+
{{- end -}}
|
90
|
+
- orgId: 1
|
91
|
+
name: email.body
|
92
|
+
template: |-
|
93
|
+
{{ define "email.body" -}}
|
94
|
+
Alert: {{ .Labels.alertname }}
|
95
|
+
{{ if .Annotations -}}
|
96
|
+
Summary: {{ .Annotations.summary}}
|
97
|
+
Description: {{ .Annotations.description }}
|
98
|
+
{{ end -}}
|
99
|
+
Log message: {{ index .Labels "message" }}
|
100
|
+
Explore logs: {{ GRAFANA_ENDPOINT }}
|
101
|
+
{{ if .DashboardURL -}}
|
102
|
+
Go to dashboard: {{ .DashboardURL }}
|
103
|
+
{{- end }}
|
104
|
+
{{ if .PanelURL -}}
|
105
|
+
Go to panel: {{ .PanelURL }}
|
106
|
+
{{- end }}
|
107
|
+
Details:
|
108
|
+
{{ range .Labels.SortedPairs -}}
|
109
|
+
- {{ .Name }}: `{{ .Value }}`
|
110
|
+
{{ end -}}
|
111
|
+
{{ if .SilenceURL -}}
|
112
|
+
Silence this alert: {{ .SilenceURL }}
|
113
|
+
{{- end }}
|
114
|
+
{{- end }}
|
115
|
+
{% endraw %}
|
116
|
+
{% endif %}
|
@@ -140,14 +140,13 @@ while True:
|
|
140
140
|
snode.rpc_port,
|
141
141
|
snode.rpc_username,
|
142
142
|
snode.rpc_password,
|
143
|
-
timeout=
|
144
|
-
|
145
|
-
num_of_events = constants.DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS
|
143
|
+
timeout=10, retry=2)
|
144
|
+
|
146
145
|
try:
|
147
|
-
|
148
|
-
|
146
|
+
events = client.distr_status_events_discard_then_get(0, constants.DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS)
|
147
|
+
|
149
148
|
if not events:
|
150
|
-
logger.
|
149
|
+
logger.debug("no events found")
|
151
150
|
continue
|
152
151
|
|
153
152
|
logger.info(f"Found events: {len(events)}")
|
@@ -161,8 +160,8 @@ while True:
|
|
161
160
|
logger.info(f"Processing event: {eid}")
|
162
161
|
process_event(eid)
|
163
162
|
|
164
|
-
|
165
|
-
|
163
|
+
logger.info(f"Discarding events: {len(events)}")
|
164
|
+
client.distr_status_events_discard_then_get(len(events), 0)
|
166
165
|
|
167
166
|
except Exception as e:
|
168
167
|
logger.error("Failed to process distr events")
|
@@ -118,7 +118,7 @@ while True:
|
|
118
118
|
rpc_client = RPCClient(
|
119
119
|
snode.mgmt_ip, snode.rpc_port,
|
120
120
|
snode.rpc_username, snode.rpc_password,
|
121
|
-
timeout=
|
121
|
+
timeout=10, retry=1)
|
122
122
|
for remote_device in snode.remote_devices:
|
123
123
|
ret = rpc_client.get_bdevs(remote_device.remote_bdev)
|
124
124
|
if ret:
|
@@ -66,8 +66,8 @@ def get_cluster_target_status(cluster_id):
|
|
66
66
|
logger.debug(f"online_devices: {online_devices}")
|
67
67
|
logger.debug(f"offline_devices: {offline_devices}")
|
68
68
|
|
69
|
-
# if more than two affected
|
70
|
-
if affected_nodes > 2:
|
69
|
+
# if more than two affected nodes then cluster is suspended
|
70
|
+
if affected_nodes > 2 or offline_nodes > 2:
|
71
71
|
return Cluster.STATUS_SUSPENDED
|
72
72
|
|
73
73
|
# if any device goes offline then cluster is degraded
|
@@ -105,7 +105,7 @@ def update_cluster_status(cluster_id):
|
|
105
105
|
|
106
106
|
def set_node_online(node):
|
107
107
|
if node.status != StorageNode.STATUS_ONLINE:
|
108
|
-
storage_node_ops.set_node_status(snode, StorageNode.STATUS_ONLINE)
|
108
|
+
storage_node_ops.set_node_status(snode.get_id(), StorageNode.STATUS_ONLINE)
|
109
109
|
|
110
110
|
|
111
111
|
def set_node_offline(node):
|
@@ -175,6 +175,7 @@ def task_runner_node(task):
|
|
175
175
|
if ret:
|
176
176
|
logger.info(f"Node restart succeeded")
|
177
177
|
|
178
|
+
time.sleep(5)
|
178
179
|
if _get_node_unavailable_devices_count(node.get_id()) == 0 and node.status == StorageNode.STATUS_ONLINE:
|
179
180
|
logger.info(f"Node is online: {node.get_id()}")
|
180
181
|
task.function_result = "done"
|
@@ -799,140 +799,6 @@ def add_node(cluster_id, node_ip, iface_name, data_nics_list,
|
|
799
799
|
return "Success"
|
800
800
|
|
801
801
|
|
802
|
-
# Deprecated
|
803
|
-
def add_storage_node(cluster_id, iface_name, data_nics):
|
804
|
-
db_controller = DBController()
|
805
|
-
kv_store = db_controller.kv_store
|
806
|
-
|
807
|
-
cluster = db_controller.get_cluster_by_id(cluster_id)
|
808
|
-
if not cluster:
|
809
|
-
logger.error("Cluster not found: %s", cluster_id)
|
810
|
-
return False
|
811
|
-
|
812
|
-
logger.info("Add Storage node")
|
813
|
-
|
814
|
-
hostname = utils.get_hostname()
|
815
|
-
snode = db_controller.get_storage_node_by_hostname(hostname)
|
816
|
-
if snode:
|
817
|
-
logger.error("Node already exists, try remove it first.")
|
818
|
-
exit(1)
|
819
|
-
else:
|
820
|
-
snode = StorageNode()
|
821
|
-
snode.uuid = str(uuid.uuid4())
|
822
|
-
|
823
|
-
mgmt_ip = _get_if_ip_address(iface_name)
|
824
|
-
system_id = utils.get_system_id()
|
825
|
-
|
826
|
-
BASE_NQN = cluster.nqn.split(":")[0]
|
827
|
-
subsystem_nqn = f"{BASE_NQN}:{hostname}"
|
828
|
-
|
829
|
-
if data_nics:
|
830
|
-
data_nics = _get_data_nics(data_nics)
|
831
|
-
else:
|
832
|
-
data_nics = _get_data_nics([iface_name])
|
833
|
-
|
834
|
-
rpc_user, rpc_pass = utils.generate_rpc_user_and_pass()
|
835
|
-
|
836
|
-
# creating storage node object
|
837
|
-
snode.status = StorageNode.STATUS_IN_CREATION
|
838
|
-
snode.baseboard_sn = utils.get_baseboard_sn()
|
839
|
-
snode.system_uuid = system_id
|
840
|
-
snode.hostname = hostname
|
841
|
-
snode.host_nqn = subsystem_nqn
|
842
|
-
snode.subsystem = subsystem_nqn
|
843
|
-
snode.data_nics = data_nics
|
844
|
-
snode.mgmt_ip = mgmt_ip
|
845
|
-
snode.rpc_port = constants.RPC_HTTP_PROXY_PORT
|
846
|
-
snode.rpc_username = rpc_user
|
847
|
-
snode.rpc_password = rpc_pass
|
848
|
-
snode.cluster_id = cluster_id
|
849
|
-
snode.write_to_db(kv_store)
|
850
|
-
|
851
|
-
# creating RPCClient instance
|
852
|
-
rpc_client = RPCClient(
|
853
|
-
snode.mgmt_ip,
|
854
|
-
snode.rpc_port,
|
855
|
-
snode.rpc_username,
|
856
|
-
snode.rpc_password)
|
857
|
-
|
858
|
-
logger.info("Getting nvme devices")
|
859
|
-
devs = get_nvme_devices()
|
860
|
-
logger.debug(devs)
|
861
|
-
pcies = [d[0] for d in devs]
|
862
|
-
nvme_devs = addNvmeDevices(cluster, rpc_client, pcies, snode)
|
863
|
-
if not nvme_devs:
|
864
|
-
logger.error("No NVMe devices was found!")
|
865
|
-
|
866
|
-
logger.debug(nvme_devs)
|
867
|
-
snode.nvme_devices = nvme_devs
|
868
|
-
|
869
|
-
# Set device cluster order
|
870
|
-
dev_order = get_next_cluster_device_order(db_controller, cluster_id)
|
871
|
-
for index, nvme in enumerate(snode.nvme_devices):
|
872
|
-
nvme.cluster_device_order = dev_order
|
873
|
-
dev_order += 1
|
874
|
-
snode.write_to_db(db_controller.kv_store)
|
875
|
-
|
876
|
-
# prepare devices
|
877
|
-
# _prepare_cluster_devices(snode)
|
878
|
-
|
879
|
-
logger.info("Connecting to remote devices")
|
880
|
-
remote_devices = _connect_to_remote_devs(snode)
|
881
|
-
snode.remote_devices = remote_devices
|
882
|
-
|
883
|
-
logger.info("Setting node status to Active")
|
884
|
-
snode.status = StorageNode.STATUS_ONLINE
|
885
|
-
snode.write_to_db(kv_store)
|
886
|
-
|
887
|
-
# make other nodes connect to the new devices
|
888
|
-
logger.info("Make other nodes connect to the new devices")
|
889
|
-
snodes = db_controller.get_storage_nodes_by_cluster_id(cluster_id)
|
890
|
-
for node_index, node in enumerate(snodes):
|
891
|
-
if node.get_id() == snode.get_id():
|
892
|
-
continue
|
893
|
-
logger.info(f"Connecting to node: {node.get_id()}")
|
894
|
-
rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password)
|
895
|
-
count = 0
|
896
|
-
for dev in snode.nvme_devices:
|
897
|
-
name = f"remote_{dev.alceml_bdev}"
|
898
|
-
ret = rpc_client.bdev_nvme_attach_controller_tcp(name, dev.nvmf_nqn, dev.nvmf_ip, dev.nvmf_port)
|
899
|
-
if not ret:
|
900
|
-
logger.error(f"Failed to connect to device: {name}")
|
901
|
-
continue
|
902
|
-
|
903
|
-
dev.remote_bdev = f"{name}n1"
|
904
|
-
idx = -1
|
905
|
-
for i, d in enumerate(node.remote_devices):
|
906
|
-
if d.get_id() == dev.get_id():
|
907
|
-
idx = i
|
908
|
-
break
|
909
|
-
if idx >= 0:
|
910
|
-
node.remote_devices[idx] = dev
|
911
|
-
else:
|
912
|
-
node.remote_devices.append(dev)
|
913
|
-
count += 1
|
914
|
-
node.write_to_db(kv_store)
|
915
|
-
logger.info(f"connected to devices count: {count}")
|
916
|
-
|
917
|
-
logger.info("Sending cluster map")
|
918
|
-
ret = distr_controller.send_cluster_map_to_node(snode)
|
919
|
-
if not ret:
|
920
|
-
return False, "Failed to send cluster map"
|
921
|
-
ret = distr_controller.send_cluster_map_add_node(snode)
|
922
|
-
if not ret:
|
923
|
-
return False, "Failed to send cluster map add node"
|
924
|
-
time.sleep(3)
|
925
|
-
|
926
|
-
logger.info("Sending cluster event updates")
|
927
|
-
distr_controller.send_node_status_event(snode, StorageNode.STATUS_ONLINE)
|
928
|
-
|
929
|
-
for dev in snode.nvme_devices:
|
930
|
-
distr_controller.send_dev_status_event(dev, NVMeDevice.STATUS_ONLINE)
|
931
|
-
|
932
|
-
logger.info("Done")
|
933
|
-
return "Success"
|
934
|
-
|
935
|
-
|
936
802
|
def delete_storage_node(node_id):
|
937
803
|
db_controller = DBController()
|
938
804
|
snode = db_controller.get_storage_node_by_id(node_id)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/docker-compose-swarm-monitoring.yml
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/capacity_and_stats_collector.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|