sbcli-pre 1.1.2__zip → 1.1.4__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/PKG-INFO +1 -1
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/env_var +1 -1
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/sbcli_pre.egg-info/PKG-INFO +1 -1
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/sbcli_pre.egg-info/SOURCES.txt +1 -1
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_cli/cli.py +7 -1
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/cluster_ops.py +45 -2
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/constants.py +1 -1
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/health_controller.py +1 -1
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/rpc_client.py +5 -4
- sbcli_pre-1.1.2/simplyblock_core/scripts/alerting/alert_resources.yaml → sbcli_pre-1.1.4/simplyblock_core/scripts/alerting/alert_resources.yaml.j2 +54 -5
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/distr_event_collector.py +7 -8
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/health_check_service.py +1 -1
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/storage_node_monitor.py +3 -3
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/tasks_runner_restart.py +6 -7
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/README.md +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/pyproject.toml +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/sbcli_pre.egg-info/dependency_links.txt +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/sbcli_pre.egg-info/entry_points.txt +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/sbcli_pre.egg-info/requires.txt +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/sbcli_pre.egg-info/top_level.txt +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/setup.cfg +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/setup.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_cli/main.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/__init__.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/cnode_client.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/compute_node_ops.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/__init__.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/caching_node_controller.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/cluster_events.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/device_controller.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/device_events.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/events_controller.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/lvol_controller.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/lvol_events.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/mgmt_events.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/pool_controller.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/pool_events.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/snapshot_controller.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/snapshot_events.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/storage_events.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/tasks_controller.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/controllers/tasks_events.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/distr_controller.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/kv_store.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/mgmt_node_ops.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/__init__.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/base_model.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/caching_node.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/cluster.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/compute_node.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/events.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/global_settings.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/iface.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/job_schedule.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/lvol_model.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/mgmt_node.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/nvme_device.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/pool.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/port_stat.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/snapshot.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/stats.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/models/storage_node.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/pci_utils.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/__init__.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/alerting/alert_rules.yaml +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/clean_local_storage_deploy.sh +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/config_docker.sh +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/dashboards/cluster.json +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/dashboards/devices.json +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/dashboards/lvols.json +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/dashboards/node-exporter.json +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/dashboards/nodes.json +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/dashboards/pools.json +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/datasource.yml +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/db_config_double.sh +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/db_config_single.sh +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/deploy_stack.sh +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/docker-compose-swarm-monitoring.yml +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/docker-compose-swarm.yml +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/haproxy.cfg +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/install_deps.sh +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/prometheus.yml +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/run_ssh.sh +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/set_db_config.sh +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/stack_deploy_wait.sh +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/__init__.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/caching_node_monitor.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/cap_monitor.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/capacity_and_stats_collector.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/device_monitor.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/install_service.sh +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/log_agg_service.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/lvol_monitor.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/lvol_stat_collector.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/mgmt_node_monitor.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/port_stat_collector.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/remove_service.sh +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/service_template.service +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/tasks_runner_migration.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/shell_utils.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/snode_client.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/storage_node_ops.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/utils.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/__init__.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/app.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/auth_middleware.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/__init__.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/caching_node_ops.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/caching_node_ops_k8s.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/node_api_basic.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/node_api_caching_docker.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/node_api_caching_ks.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/snode_ops.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_caching_node.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_cluster.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_device.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_lvol.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_mgmt_node.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_pool.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_snapshot.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_storage_node.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/caching_node_app.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/caching_node_app_k8s.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/node_utils.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/node_webapp.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/snode_app.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/static/delete.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/static/deploy.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/static/deploy_cnode.yaml +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/static/deploy_spdk.yaml +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/static/is_up.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/static/list_deps.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/static/rpac.yaml +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/static/tst.py +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/templates/deploy_spdk.yaml.j2 +0 -0
- {sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_web/utils.py +0 -0
@@ -73,7 +73,7 @@ simplyblock_core/scripts/prometheus.yml
|
|
73
73
|
simplyblock_core/scripts/run_ssh.sh
|
74
74
|
simplyblock_core/scripts/set_db_config.sh
|
75
75
|
simplyblock_core/scripts/stack_deploy_wait.sh
|
76
|
-
simplyblock_core/scripts/alerting/alert_resources.yaml
|
76
|
+
simplyblock_core/scripts/alerting/alert_resources.yaml.j2
|
77
77
|
simplyblock_core/scripts/alerting/alert_rules.yaml
|
78
78
|
simplyblock_core/scripts/dashboards/cluster.json
|
79
79
|
simplyblock_core/scripts/dashboards/devices.json
|
@@ -235,6 +235,10 @@ class CLIWrapper:
|
|
235
235
|
dest='log_del_interval', default='7d')
|
236
236
|
sub_command.add_argument("--metrics-retention-period", help='retention period for prometheus metrics, default: 7d',
|
237
237
|
dest='metrics_retention_period', default='7d')
|
238
|
+
sub_command.add_argument("--contact-point", help='the email or slack webhook url to be used for alerting',
|
239
|
+
dest='contact_point', default='')
|
240
|
+
sub_command.add_argument("--grafana-endpoint", help='the endpoint url for grafana',
|
241
|
+
dest='grafana_endpoint', default='')
|
238
242
|
|
239
243
|
# add cluster
|
240
244
|
sub_command = self.add_sub_command(subparser, 'add', 'Add new cluster')
|
@@ -1137,11 +1141,13 @@ class CLIWrapper:
|
|
1137
1141
|
ifname = args.ifname
|
1138
1142
|
log_del_interval = args.log_del_interval
|
1139
1143
|
metrics_retention_period = args.metrics_retention_period
|
1144
|
+
contact_point = args.contact_point
|
1145
|
+
grafana_endpoint = args.grafana_endpoint
|
1140
1146
|
|
1141
1147
|
return cluster_ops.create_cluster(
|
1142
1148
|
blk_size, page_size_in_blocks,
|
1143
1149
|
CLI_PASS, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit,
|
1144
|
-
ifname, log_del_interval, metrics_retention_period)
|
1150
|
+
ifname, log_del_interval, metrics_retention_period, contact_point, grafana_endpoint)
|
1145
1151
|
|
1146
1152
|
def query_yes_no(self, question, default="yes"):
|
1147
1153
|
"""Ask a yes/no question via raw_input() and return their answer.
|
@@ -2,11 +2,16 @@
|
|
2
2
|
import json
|
3
3
|
import logging
|
4
4
|
import os
|
5
|
+
import re
|
6
|
+
import tempfile
|
7
|
+
import shutil
|
8
|
+
import subprocess
|
5
9
|
import time
|
6
10
|
import uuid
|
7
11
|
|
8
12
|
import docker
|
9
13
|
import requests
|
14
|
+
from jinja2 import Environment, FileSystemLoader
|
10
15
|
|
11
16
|
from simplyblock_core import utils, scripts, constants, mgmt_node_ops, storage_node_ops
|
12
17
|
from simplyblock_core.controllers import cluster_events, device_controller
|
@@ -16,7 +21,7 @@ from simplyblock_core.models.nvme_device import NVMeDevice
|
|
16
21
|
from simplyblock_core.models.storage_node import StorageNode
|
17
22
|
|
18
23
|
logger = logging.getLogger()
|
19
|
-
|
24
|
+
TOP_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
20
25
|
|
21
26
|
def _add_grafana_dashboards(username, password, cluster_ip):
|
22
27
|
url = f"http://{username}:{password}@{cluster_ip}/grafana/api/dashboards/import"
|
@@ -69,7 +74,8 @@ def _add_graylog_input(cluster_ip, password):
|
|
69
74
|
|
70
75
|
|
71
76
|
def create_cluster(blk_size, page_size_in_blocks, cli_pass,
|
72
|
-
cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, ifname, log_del_interval, metrics_retention_period
|
77
|
+
cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, ifname, log_del_interval, metrics_retention_period,
|
78
|
+
contact_point, grafana_endpoint):
|
73
79
|
logger.info("Installing dependencies...")
|
74
80
|
ret = scripts.install_deps()
|
75
81
|
logger.info("Installing dependencies > Done")
|
@@ -124,6 +130,43 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
|
|
124
130
|
if prov_cap_crit and prov_cap_crit > 0:
|
125
131
|
c.prov_cap_crit = prov_cap_crit
|
126
132
|
|
133
|
+
alerts_template_folder = os.path.join(TOP_DIR, "simplyblock_core/scripts/alerting/")
|
134
|
+
alert_resources_file = "alert_resources.yaml"
|
135
|
+
|
136
|
+
env = Environment(loader=FileSystemLoader(alerts_template_folder), trim_blocks=True, lstrip_blocks=True)
|
137
|
+
template = env.get_template(f'{alert_resources_file}.j2')
|
138
|
+
|
139
|
+
slack_pattern = re.compile(r"https://hooks\.slack\.com/services/\S+")
|
140
|
+
email_pattern = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
|
141
|
+
|
142
|
+
if slack_pattern.match(contact_point):
|
143
|
+
ALERT_TYPE = "slack"
|
144
|
+
elif email_pattern.match(contact_point):
|
145
|
+
ALERT_TYPE = "email"
|
146
|
+
else:
|
147
|
+
ALERT_TYPE = "slack"
|
148
|
+
|
149
|
+
values = {
|
150
|
+
'CONTACT_POINT': contact_point,
|
151
|
+
'GRAFANA_ENDPOINT': grafana_endpoint,
|
152
|
+
'ALERT_TYPE': ALERT_TYPE,
|
153
|
+
}
|
154
|
+
|
155
|
+
temp_dir = tempfile.mkdtemp()
|
156
|
+
|
157
|
+
temp_file_path = os.path.join(temp_dir, alert_resources_file)
|
158
|
+
with open(temp_file_path, 'w') as file:
|
159
|
+
file.write(template.render(values))
|
160
|
+
|
161
|
+
destination_file_path = os.path.join(alerts_template_folder, alert_resources_file)
|
162
|
+
try:
|
163
|
+
subprocess.run(['sudo', '-v'], check=True) # sudo -v checks if the current user has sudo permissions
|
164
|
+
subprocess.run(['sudo', 'mv', temp_file_path, destination_file_path], check=True)
|
165
|
+
print(f"File moved to {destination_file_path} successfully.")
|
166
|
+
except subprocess.CalledProcessError as e:
|
167
|
+
print(f"An error occurred: {e}")
|
168
|
+
shutil.rmtree(temp_dir)
|
169
|
+
|
127
170
|
logger.info("Deploying swarm stack ...")
|
128
171
|
ret = scripts.deploy_stack(cli_pass, DEV_IP, constants.SIMPLY_BLOCK_DOCKER_IMAGE, c.secret, c.uuid, log_del_interval, metrics_retention_period)
|
129
172
|
logger.info("Deploying swarm stack > Done")
|
@@ -20,7 +20,7 @@ DEV_MONITOR_INTERVAL_SEC = 10
|
|
20
20
|
DEV_STAT_COLLECTOR_INTERVAL_SEC = 2
|
21
21
|
PROT_STAT_COLLECTOR_INTERVAL_SEC = 2
|
22
22
|
DISTR_EVENT_COLLECTOR_INTERVAL_SEC = 2
|
23
|
-
DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS =
|
23
|
+
DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS = 10
|
24
24
|
CAP_MONITOR_INTERVAL_SEC = 30
|
25
25
|
SSD_VENDOR_WHITE_LIST = ["1d0f:cd01", "1d0f:cd00"]
|
26
26
|
|
@@ -69,7 +69,7 @@ def _check_node_rpc(rpc_ip, rpc_port, rpc_username, rpc_password):
|
|
69
69
|
try:
|
70
70
|
rpc_client = RPCClient(
|
71
71
|
rpc_ip, rpc_port, rpc_username, rpc_password,
|
72
|
-
timeout=
|
72
|
+
timeout=10, retry=1)
|
73
73
|
ret = rpc_client.get_version()
|
74
74
|
if ret:
|
75
75
|
logger.debug(f"SPDK version: {ret['version']}")
|
@@ -434,8 +434,9 @@ class RPCClient:
|
|
434
434
|
"trsvcid": str(port),
|
435
435
|
"subnqn": nqn,
|
436
436
|
"fabrics_connect_timeout_us": 100000,
|
437
|
-
"fast_io_fail_timeout_sec":
|
437
|
+
"fast_io_fail_timeout_sec": 1,
|
438
438
|
"num_io_queues": 16384,
|
439
|
+
"ctrlr_loss_timeout_sec": 2,
|
439
440
|
}
|
440
441
|
return self._request("bdev_nvme_attach_controller", params)
|
441
442
|
|
@@ -483,9 +484,9 @@ class RPCClient:
|
|
483
484
|
params = {
|
484
485
|
"bdev_retry_count": 0,
|
485
486
|
"transport_retry_count": 0,
|
486
|
-
"ctrlr_loss_timeout_sec":
|
487
|
-
"fast_io_fail_timeout_sec":
|
488
|
-
"reconnect_delay_sec":
|
487
|
+
"ctrlr_loss_timeout_sec": 2,
|
488
|
+
"fast_io_fail_timeout_sec": 1,
|
489
|
+
"reconnect_delay_sec": 1,
|
489
490
|
"keep_alive_timeout_ms": 200,
|
490
491
|
"transport_ack_timeout": 7,
|
491
492
|
"timeout_us": 100000
|
@@ -12,15 +12,26 @@ contactPoints:
|
|
12
12
|
name: grafana-alerts
|
13
13
|
receivers:
|
14
14
|
- uid: grafana
|
15
|
-
type:
|
15
|
+
type: {{ ALERT_TYPE }}
|
16
|
+
{% if ALERT_TYPE == 'slack' %}
|
16
17
|
settings:
|
17
18
|
username: grafana_bot
|
18
|
-
url: '
|
19
|
+
url: '{{ CONTACT_POINT }}'
|
19
20
|
title: |
|
20
|
-
{{ template "slack.title" . }}
|
21
|
+
{{ '{{' }} template "slack.title" . {{ '}}' }}
|
21
22
|
text: |
|
22
|
-
{{ template "slack.message" . }}
|
23
|
+
{{ '{{' }} template "slack.message" . {{ '}}' }}
|
24
|
+
{% else %}
|
25
|
+
settings:
|
26
|
+
addresses: '{{ CONTACT_POINT }}'
|
27
|
+
subject: |
|
28
|
+
{{ '{{' }} template "email.subject" . {{ '}}' }}
|
29
|
+
body: |
|
30
|
+
{{ '{{' }} template "email.body" . {{ '}}' }}
|
31
|
+
{% endif %}
|
23
32
|
|
33
|
+
{% if ALERT_TYPE == 'slack' %}
|
34
|
+
{% raw %}
|
24
35
|
templates:
|
25
36
|
- orgId: 1
|
26
37
|
name: slack.title
|
@@ -38,7 +49,9 @@ templates:
|
|
38
49
|
*Description*: {{ .Annotations.description }}
|
39
50
|
{{ end -}}
|
40
51
|
*Log message*: {{ index .Labels "message" }}
|
41
|
-
|
52
|
+
{% endraw %}
|
53
|
+
*Explore logs:* {{ GRAFANA_ENDPOINT }}
|
54
|
+
{% raw %}
|
42
55
|
{{ if .DashboardURL -}}
|
43
56
|
*Go to dashboard:* {{ .DashboardURL }}
|
44
57
|
{{- end }}
|
@@ -65,3 +78,39 @@ templates:
|
|
65
78
|
{{ end }}
|
66
79
|
|
67
80
|
{{- end }}
|
81
|
+
{% endraw %}
|
82
|
+
{% else %}
|
83
|
+
{% raw %}
|
84
|
+
- orgId: 1
|
85
|
+
name: email.subject
|
86
|
+
template: |-
|
87
|
+
{{ define "email.subject" -}}
|
88
|
+
[{{ .Status | toUpper }}] Grafana Alert
|
89
|
+
{{- end -}}
|
90
|
+
- orgId: 1
|
91
|
+
name: email.body
|
92
|
+
template: |-
|
93
|
+
{{ define "email.body" -}}
|
94
|
+
Alert: {{ .Labels.alertname }}
|
95
|
+
{{ if .Annotations -}}
|
96
|
+
Summary: {{ .Annotations.summary}}
|
97
|
+
Description: {{ .Annotations.description }}
|
98
|
+
{{ end -}}
|
99
|
+
Log message: {{ index .Labels "message" }}
|
100
|
+
Explore logs: {{ GRAFANA_ENDPOINT }}
|
101
|
+
{{ if .DashboardURL -}}
|
102
|
+
Go to dashboard: {{ .DashboardURL }}
|
103
|
+
{{- end }}
|
104
|
+
{{ if .PanelURL -}}
|
105
|
+
Go to panel: {{ .PanelURL }}
|
106
|
+
{{- end }}
|
107
|
+
Details:
|
108
|
+
{{ range .Labels.SortedPairs -}}
|
109
|
+
- {{ .Name }}: `{{ .Value }}`
|
110
|
+
{{ end -}}
|
111
|
+
{{ if .SilenceURL -}}
|
112
|
+
Silence this alert: {{ .SilenceURL }}
|
113
|
+
{{- end }}
|
114
|
+
{{- end }}
|
115
|
+
{% endraw %}
|
116
|
+
{% endif %}
|
@@ -140,14 +140,13 @@ while True:
|
|
140
140
|
snode.rpc_port,
|
141
141
|
snode.rpc_username,
|
142
142
|
snode.rpc_password,
|
143
|
-
timeout=
|
144
|
-
|
145
|
-
num_of_events = constants.DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS
|
143
|
+
timeout=10, retry=2)
|
144
|
+
|
146
145
|
try:
|
147
|
-
|
148
|
-
|
146
|
+
events = client.distr_status_events_discard_then_get(0, constants.DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS)
|
147
|
+
|
149
148
|
if not events:
|
150
|
-
logger.
|
149
|
+
logger.debug("no events found")
|
151
150
|
continue
|
152
151
|
|
153
152
|
logger.info(f"Found events: {len(events)}")
|
@@ -161,8 +160,8 @@ while True:
|
|
161
160
|
logger.info(f"Processing event: {eid}")
|
162
161
|
process_event(eid)
|
163
162
|
|
164
|
-
|
165
|
-
|
163
|
+
logger.info(f"Discarding events: {len(events)}")
|
164
|
+
client.distr_status_events_discard_then_get(len(events), 0)
|
166
165
|
|
167
166
|
except Exception as e:
|
168
167
|
logger.error("Failed to process distr events")
|
@@ -118,7 +118,7 @@ while True:
|
|
118
118
|
rpc_client = RPCClient(
|
119
119
|
snode.mgmt_ip, snode.rpc_port,
|
120
120
|
snode.rpc_username, snode.rpc_password,
|
121
|
-
timeout=
|
121
|
+
timeout=10, retry=1)
|
122
122
|
for remote_device in snode.remote_devices:
|
123
123
|
ret = rpc_client.get_bdevs(remote_device.remote_bdev)
|
124
124
|
if ret:
|
@@ -66,8 +66,8 @@ def get_cluster_target_status(cluster_id):
|
|
66
66
|
logger.debug(f"online_devices: {online_devices}")
|
67
67
|
logger.debug(f"offline_devices: {offline_devices}")
|
68
68
|
|
69
|
-
# if more than two affected
|
70
|
-
if affected_nodes > 2:
|
69
|
+
# if more than two affected nodes then cluster is suspended
|
70
|
+
if affected_nodes > 2 or offline_nodes > 2:
|
71
71
|
return Cluster.STATUS_SUSPENDED
|
72
72
|
|
73
73
|
# if any device goes offline then cluster is degraded
|
@@ -105,7 +105,7 @@ def update_cluster_status(cluster_id):
|
|
105
105
|
|
106
106
|
def set_node_online(node):
|
107
107
|
if node.status != StorageNode.STATUS_ONLINE:
|
108
|
-
storage_node_ops.set_node_status(snode, StorageNode.STATUS_ONLINE)
|
108
|
+
storage_node_ops.set_node_status(snode.get_id(), StorageNode.STATUS_ONLINE)
|
109
109
|
|
110
110
|
|
111
111
|
def set_node_offline(node):
|
@@ -150,9 +150,8 @@ def task_runner_node(task):
|
|
150
150
|
task.write_to_db(db_controller.kv_store)
|
151
151
|
return True
|
152
152
|
|
153
|
-
|
154
|
-
|
155
|
-
logger.info(f"Node is online: {node.get_id()}, no restart needed")
|
153
|
+
if _get_node_unavailable_devices_count(node.get_id()) == 0 and node.status == StorageNode.STATUS_ONLINE:
|
154
|
+
logger.info(f"Node is online: {node.get_id()}")
|
156
155
|
task.function_result = "Node is online"
|
157
156
|
task.status = JobSchedule.STATUS_DONE
|
158
157
|
task.write_to_db(db_controller.kv_store)
|
@@ -168,7 +167,7 @@ def task_runner_node(task):
|
|
168
167
|
ret = storage_node_ops.shutdown_storage_node(node.get_id(), force=True)
|
169
168
|
if ret:
|
170
169
|
logger.info(f"Node shutdown succeeded")
|
171
|
-
time.sleep(
|
170
|
+
time.sleep(3)
|
172
171
|
|
173
172
|
# resetting node
|
174
173
|
logger.info(f"Restart node {node.get_id()}")
|
@@ -176,9 +175,9 @@ def task_runner_node(task):
|
|
176
175
|
if ret:
|
177
176
|
logger.info(f"Node restart succeeded")
|
178
177
|
|
179
|
-
|
180
|
-
if node.status == StorageNode.STATUS_ONLINE:
|
181
|
-
logger.info(f"Node is online: {node.get_id()}
|
178
|
+
time.sleep(5)
|
179
|
+
if _get_node_unavailable_devices_count(node.get_id()) == 0 and node.status == StorageNode.STATUS_ONLINE:
|
180
|
+
logger.info(f"Node is online: {node.get_id()}")
|
182
181
|
task.function_result = "done"
|
183
182
|
task.status = JobSchedule.STATUS_DONE
|
184
183
|
task.write_to_db(db_controller.kv_store)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/scripts/docker-compose-swarm-monitoring.yml
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sbcli_pre-1.1.2 → sbcli_pre-1.1.4}/simplyblock_core/services/capacity_and_stats_collector.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|