sbcli-pre 1.1.3__zip → 1.1.4__zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/PKG-INFO +1 -1
  2. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/env_var +1 -1
  3. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/sbcli_pre.egg-info/PKG-INFO +1 -1
  4. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/sbcli_pre.egg-info/SOURCES.txt +1 -1
  5. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_cli/cli.py +7 -1
  6. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/cluster_ops.py +45 -2
  7. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/constants.py +1 -1
  8. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/health_controller.py +1 -1
  9. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/rpc_client.py +5 -4
  10. sbcli_pre-1.1.3/simplyblock_core/scripts/alerting/alert_resources.yaml → sbcli_pre-1.1.4/simplyblock_core/scripts/alerting/alert_resources.yaml.j2 +54 -5
  11. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/distr_event_collector.py +7 -8
  12. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/health_check_service.py +1 -1
  13. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/storage_node_monitor.py +3 -3
  14. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/tasks_runner_restart.py +1 -0
  15. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/README.md +0 -0
  16. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/pyproject.toml +0 -0
  17. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/sbcli_pre.egg-info/dependency_links.txt +0 -0
  18. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/sbcli_pre.egg-info/entry_points.txt +0 -0
  19. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/sbcli_pre.egg-info/requires.txt +0 -0
  20. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/sbcli_pre.egg-info/top_level.txt +0 -0
  21. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/setup.cfg +0 -0
  22. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/setup.py +0 -0
  23. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_cli/main.py +0 -0
  24. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/__init__.py +0 -0
  25. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/cnode_client.py +0 -0
  26. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/compute_node_ops.py +0 -0
  27. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/__init__.py +0 -0
  28. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/caching_node_controller.py +0 -0
  29. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/cluster_events.py +0 -0
  30. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/device_controller.py +0 -0
  31. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/device_events.py +0 -0
  32. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/events_controller.py +0 -0
  33. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/lvol_controller.py +0 -0
  34. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/lvol_events.py +0 -0
  35. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/mgmt_events.py +0 -0
  36. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/pool_controller.py +0 -0
  37. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/pool_events.py +0 -0
  38. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/snapshot_controller.py +0 -0
  39. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/snapshot_events.py +0 -0
  40. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/storage_events.py +0 -0
  41. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/tasks_controller.py +0 -0
  42. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/controllers/tasks_events.py +0 -0
  43. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/distr_controller.py +0 -0
  44. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/kv_store.py +0 -0
  45. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/mgmt_node_ops.py +0 -0
  46. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/__init__.py +0 -0
  47. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/base_model.py +0 -0
  48. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/caching_node.py +0 -0
  49. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/cluster.py +0 -0
  50. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/compute_node.py +0 -0
  51. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/events.py +0 -0
  52. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/global_settings.py +0 -0
  53. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/iface.py +0 -0
  54. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/job_schedule.py +0 -0
  55. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/lvol_model.py +0 -0
  56. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/mgmt_node.py +0 -0
  57. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/nvme_device.py +0 -0
  58. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/pool.py +0 -0
  59. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/port_stat.py +0 -0
  60. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/snapshot.py +0 -0
  61. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/stats.py +0 -0
  62. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/models/storage_node.py +0 -0
  63. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/pci_utils.py +0 -0
  64. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/__init__.py +0 -0
  65. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/alerting/alert_rules.yaml +0 -0
  66. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/clean_local_storage_deploy.sh +0 -0
  67. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/config_docker.sh +0 -0
  68. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/dashboards/cluster.json +0 -0
  69. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/dashboards/devices.json +0 -0
  70. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/dashboards/lvols.json +0 -0
  71. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/dashboards/node-exporter.json +0 -0
  72. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/dashboards/nodes.json +0 -0
  73. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/dashboards/pools.json +0 -0
  74. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/datasource.yml +0 -0
  75. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/db_config_double.sh +0 -0
  76. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/db_config_single.sh +0 -0
  77. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/deploy_stack.sh +0 -0
  78. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/docker-compose-swarm-monitoring.yml +0 -0
  79. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/docker-compose-swarm.yml +0 -0
  80. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/haproxy.cfg +0 -0
  81. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/install_deps.sh +0 -0
  82. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/prometheus.yml +0 -0
  83. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/run_ssh.sh +0 -0
  84. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/set_db_config.sh +0 -0
  85. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/scripts/stack_deploy_wait.sh +0 -0
  86. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/__init__.py +0 -0
  87. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/caching_node_monitor.py +0 -0
  88. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/cap_monitor.py +0 -0
  89. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/capacity_and_stats_collector.py +0 -0
  90. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/device_monitor.py +0 -0
  91. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/install_service.sh +0 -0
  92. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/log_agg_service.py +0 -0
  93. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/lvol_monitor.py +0 -0
  94. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/lvol_stat_collector.py +0 -0
  95. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/mgmt_node_monitor.py +0 -0
  96. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/port_stat_collector.py +0 -0
  97. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/remove_service.sh +0 -0
  98. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/service_template.service +0 -0
  99. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/services/tasks_runner_migration.py +0 -0
  100. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/shell_utils.py +0 -0
  101. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/snode_client.py +0 -0
  102. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/storage_node_ops.py +0 -0
  103. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_core/utils.py +0 -0
  104. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/__init__.py +0 -0
  105. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/app.py +0 -0
  106. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/auth_middleware.py +0 -0
  107. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/__init__.py +0 -0
  108. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/caching_node_ops.py +0 -0
  109. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/caching_node_ops_k8s.py +0 -0
  110. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/node_api_basic.py +0 -0
  111. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/node_api_caching_docker.py +0 -0
  112. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/node_api_caching_ks.py +0 -0
  113. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/snode_ops.py +0 -0
  114. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_caching_node.py +0 -0
  115. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_cluster.py +0 -0
  116. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_device.py +0 -0
  117. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_lvol.py +0 -0
  118. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_mgmt_node.py +0 -0
  119. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_pool.py +0 -0
  120. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_snapshot.py +0 -0
  121. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/blueprints/web_api_storage_node.py +0 -0
  122. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/caching_node_app.py +0 -0
  123. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/caching_node_app_k8s.py +0 -0
  124. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/node_utils.py +0 -0
  125. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/node_webapp.py +0 -0
  126. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/snode_app.py +0 -0
  127. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/static/delete.py +0 -0
  128. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/static/deploy.py +0 -0
  129. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/static/deploy_cnode.yaml +0 -0
  130. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/static/deploy_spdk.yaml +0 -0
  131. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/static/is_up.py +0 -0
  132. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/static/list_deps.py +0 -0
  133. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/static/rpac.yaml +0 -0
  134. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/static/tst.py +0 -0
  135. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/templates/deploy_spdk.yaml.j2 +0 -0
  136. {sbcli_pre-1.1.3 → sbcli_pre-1.1.4}/simplyblock_web/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sbcli-pre
3
- Version: 1.1.3
3
+ Version: 1.1.4
4
4
  Summary: CLI for managing SimplyBlock cluster
5
5
  Home-page: https://www.simplyblock.io/
6
6
  Author: Hamdy
@@ -1,5 +1,5 @@
1
1
  SIMPLY_BLOCK_COMMAND_NAME=sbcli-pre
2
- SIMPLY_BLOCK_VERSION=1.1.3
2
+ SIMPLY_BLOCK_VERSION=1.1.4
3
3
 
4
4
  SIMPLY_BLOCK_DOCKER_IMAGE=simplyblock/simplyblock:pre-release
5
5
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sbcli-pre
3
- Version: 1.1.3
3
+ Version: 1.1.4
4
4
  Summary: CLI for managing SimplyBlock cluster
5
5
  Home-page: https://www.simplyblock.io/
6
6
  Author: Hamdy
@@ -73,7 +73,7 @@ simplyblock_core/scripts/prometheus.yml
73
73
  simplyblock_core/scripts/run_ssh.sh
74
74
  simplyblock_core/scripts/set_db_config.sh
75
75
  simplyblock_core/scripts/stack_deploy_wait.sh
76
- simplyblock_core/scripts/alerting/alert_resources.yaml
76
+ simplyblock_core/scripts/alerting/alert_resources.yaml.j2
77
77
  simplyblock_core/scripts/alerting/alert_rules.yaml
78
78
  simplyblock_core/scripts/dashboards/cluster.json
79
79
  simplyblock_core/scripts/dashboards/devices.json
@@ -235,6 +235,10 @@ class CLIWrapper:
235
235
  dest='log_del_interval', default='7d')
236
236
  sub_command.add_argument("--metrics-retention-period", help='retention period for prometheus metrics, default: 7d',
237
237
  dest='metrics_retention_period', default='7d')
238
+ sub_command.add_argument("--contact-point", help='the email or slack webhook url to be used for alerting',
239
+ dest='contact_point', default='')
240
+ sub_command.add_argument("--grafana-endpoint", help='the endpoint url for grafana',
241
+ dest='grafana_endpoint', default='')
238
242
 
239
243
  # add cluster
240
244
  sub_command = self.add_sub_command(subparser, 'add', 'Add new cluster')
@@ -1137,11 +1141,13 @@ class CLIWrapper:
1137
1141
  ifname = args.ifname
1138
1142
  log_del_interval = args.log_del_interval
1139
1143
  metrics_retention_period = args.metrics_retention_period
1144
+ contact_point = args.contact_point
1145
+ grafana_endpoint = args.grafana_endpoint
1140
1146
 
1141
1147
  return cluster_ops.create_cluster(
1142
1148
  blk_size, page_size_in_blocks,
1143
1149
  CLI_PASS, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit,
1144
- ifname, log_del_interval, metrics_retention_period)
1150
+ ifname, log_del_interval, metrics_retention_period, contact_point, grafana_endpoint)
1145
1151
 
1146
1152
  def query_yes_no(self, question, default="yes"):
1147
1153
  """Ask a yes/no question via raw_input() and return their answer.
@@ -2,11 +2,16 @@
2
2
  import json
3
3
  import logging
4
4
  import os
5
+ import re
6
+ import tempfile
7
+ import shutil
8
+ import subprocess
5
9
  import time
6
10
  import uuid
7
11
 
8
12
  import docker
9
13
  import requests
14
+ from jinja2 import Environment, FileSystemLoader
10
15
 
11
16
  from simplyblock_core import utils, scripts, constants, mgmt_node_ops, storage_node_ops
12
17
  from simplyblock_core.controllers import cluster_events, device_controller
@@ -16,7 +21,7 @@ from simplyblock_core.models.nvme_device import NVMeDevice
16
21
  from simplyblock_core.models.storage_node import StorageNode
17
22
 
18
23
  logger = logging.getLogger()
19
-
24
+ TOP_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
20
25
 
21
26
  def _add_grafana_dashboards(username, password, cluster_ip):
22
27
  url = f"http://{username}:{password}@{cluster_ip}/grafana/api/dashboards/import"
@@ -69,7 +74,8 @@ def _add_graylog_input(cluster_ip, password):
69
74
 
70
75
 
71
76
  def create_cluster(blk_size, page_size_in_blocks, cli_pass,
72
- cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, ifname, log_del_interval, metrics_retention_period):
77
+ cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, ifname, log_del_interval, metrics_retention_period,
78
+ contact_point, grafana_endpoint):
73
79
  logger.info("Installing dependencies...")
74
80
  ret = scripts.install_deps()
75
81
  logger.info("Installing dependencies > Done")
@@ -124,6 +130,43 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
124
130
  if prov_cap_crit and prov_cap_crit > 0:
125
131
  c.prov_cap_crit = prov_cap_crit
126
132
 
133
+ alerts_template_folder = os.path.join(TOP_DIR, "simplyblock_core/scripts/alerting/")
134
+ alert_resources_file = "alert_resources.yaml"
135
+
136
+ env = Environment(loader=FileSystemLoader(alerts_template_folder), trim_blocks=True, lstrip_blocks=True)
137
+ template = env.get_template(f'{alert_resources_file}.j2')
138
+
139
+ slack_pattern = re.compile(r"https://hooks\.slack\.com/services/\S+")
140
+ email_pattern = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
141
+
142
+ if slack_pattern.match(contact_point):
143
+ ALERT_TYPE = "slack"
144
+ elif email_pattern.match(contact_point):
145
+ ALERT_TYPE = "email"
146
+ else:
147
+ ALERT_TYPE = "slack"
148
+
149
+ values = {
150
+ 'CONTACT_POINT': contact_point,
151
+ 'GRAFANA_ENDPOINT': grafana_endpoint,
152
+ 'ALERT_TYPE': ALERT_TYPE,
153
+ }
154
+
155
+ temp_dir = tempfile.mkdtemp()
156
+
157
+ temp_file_path = os.path.join(temp_dir, alert_resources_file)
158
+ with open(temp_file_path, 'w') as file:
159
+ file.write(template.render(values))
160
+
161
+ destination_file_path = os.path.join(alerts_template_folder, alert_resources_file)
162
+ try:
163
+ subprocess.run(['sudo', '-v'], check=True) # sudo -v checks if the current user has sudo permissions
164
+ subprocess.run(['sudo', 'mv', temp_file_path, destination_file_path], check=True)
165
+ print(f"File moved to {destination_file_path} successfully.")
166
+ except subprocess.CalledProcessError as e:
167
+ print(f"An error occurred: {e}")
168
+ shutil.rmtree(temp_dir)
169
+
127
170
  logger.info("Deploying swarm stack ...")
128
171
  ret = scripts.deploy_stack(cli_pass, DEV_IP, constants.SIMPLY_BLOCK_DOCKER_IMAGE, c.secret, c.uuid, log_del_interval, metrics_retention_period)
129
172
  logger.info("Deploying swarm stack > Done")
@@ -20,7 +20,7 @@ DEV_MONITOR_INTERVAL_SEC = 10
20
20
  DEV_STAT_COLLECTOR_INTERVAL_SEC = 2
21
21
  PROT_STAT_COLLECTOR_INTERVAL_SEC = 2
22
22
  DISTR_EVENT_COLLECTOR_INTERVAL_SEC = 2
23
- DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS = 20
23
+ DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS = 10
24
24
  CAP_MONITOR_INTERVAL_SEC = 30
25
25
  SSD_VENDOR_WHITE_LIST = ["1d0f:cd01", "1d0f:cd00"]
26
26
 
@@ -69,7 +69,7 @@ def _check_node_rpc(rpc_ip, rpc_port, rpc_username, rpc_password):
69
69
  try:
70
70
  rpc_client = RPCClient(
71
71
  rpc_ip, rpc_port, rpc_username, rpc_password,
72
- timeout=3, retry=1)
72
+ timeout=10, retry=1)
73
73
  ret = rpc_client.get_version()
74
74
  if ret:
75
75
  logger.debug(f"SPDK version: {ret['version']}")
@@ -434,8 +434,9 @@ class RPCClient:
434
434
  "trsvcid": str(port),
435
435
  "subnqn": nqn,
436
436
  "fabrics_connect_timeout_us": 100000,
437
- "fast_io_fail_timeout_sec": 0,
437
+ "fast_io_fail_timeout_sec": 1,
438
438
  "num_io_queues": 16384,
439
+ "ctrlr_loss_timeout_sec": 2,
439
440
  }
440
441
  return self._request("bdev_nvme_attach_controller", params)
441
442
 
@@ -483,9 +484,9 @@ class RPCClient:
483
484
  params = {
484
485
  "bdev_retry_count": 0,
485
486
  "transport_retry_count": 0,
486
- "ctrlr_loss_timeout_sec": -1,
487
- "fast_io_fail_timeout_sec": 5,
488
- "reconnect_delay_sec": 5,
487
+ "ctrlr_loss_timeout_sec": 2,
488
+ "fast_io_fail_timeout_sec": 1,
489
+ "reconnect_delay_sec": 1,
489
490
  "keep_alive_timeout_ms": 200,
490
491
  "transport_ack_timeout": 7,
491
492
  "timeout_us": 100000
@@ -12,15 +12,26 @@ contactPoints:
12
12
  name: grafana-alerts
13
13
  receivers:
14
14
  - uid: grafana
15
- type: slack
15
+ type: {{ ALERT_TYPE }}
16
+ {% if ALERT_TYPE == 'slack' %}
16
17
  settings:
17
18
  username: grafana_bot
18
- url: 'https://hooks.slack.com/services/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'
19
+ url: '{{ CONTACT_POINT }}'
19
20
  title: |
20
- {{ template "slack.title" . }}
21
+ {{ '{{' }} template "slack.title" . {{ '}}' }}
21
22
  text: |
22
- {{ template "slack.message" . }}
23
+ {{ '{{' }} template "slack.message" . {{ '}}' }}
24
+ {% else %}
25
+ settings:
26
+ addresses: '{{ CONTACT_POINT }}'
27
+ subject: |
28
+ {{ '{{' }} template "email.subject" . {{ '}}' }}
29
+ body: |
30
+ {{ '{{' }} template "email.body" . {{ '}}' }}
31
+ {% endif %}
23
32
 
33
+ {% if ALERT_TYPE == 'slack' %}
34
+ {% raw %}
24
35
  templates:
25
36
  - orgId: 1
26
37
  name: slack.title
@@ -38,7 +49,9 @@ templates:
38
49
  *Description*: {{ .Annotations.description }}
39
50
  {{ end -}}
40
51
  *Log message*: {{ index .Labels "message" }}
41
- *Explore logs:* https://grafanaURL.com/explore?orgId=1
52
+ {% endraw %}
53
+ *Explore logs:* {{ GRAFANA_ENDPOINT }}
54
+ {% raw %}
42
55
  {{ if .DashboardURL -}}
43
56
  *Go to dashboard:* {{ .DashboardURL }}
44
57
  {{- end }}
@@ -65,3 +78,39 @@ templates:
65
78
  {{ end }}
66
79
 
67
80
  {{- end }}
81
+ {% endraw %}
82
+ {% else %}
83
+ {% raw %}
84
+ - orgId: 1
85
+ name: email.subject
86
+ template: |-
87
+ {{ define "email.subject" -}}
88
+ [{{ .Status | toUpper }}] Grafana Alert
89
+ {{- end -}}
90
+ - orgId: 1
91
+ name: email.body
92
+ template: |-
93
+ {{ define "email.body" -}}
94
+ Alert: {{ .Labels.alertname }}
95
+ {{ if .Annotations -}}
96
+ Summary: {{ .Annotations.summary}}
97
+ Description: {{ .Annotations.description }}
98
+ {{ end -}}
99
+ Log message: {{ index .Labels "message" }}
100
+ Explore logs: {{ GRAFANA_ENDPOINT }}
101
+ {{ if .DashboardURL -}}
102
+ Go to dashboard: {{ .DashboardURL }}
103
+ {{- end }}
104
+ {{ if .PanelURL -}}
105
+ Go to panel: {{ .PanelURL }}
106
+ {{- end }}
107
+ Details:
108
+ {{ range .Labels.SortedPairs -}}
109
+ - {{ .Name }}: `{{ .Value }}`
110
+ {{ end -}}
111
+ {{ if .SilenceURL -}}
112
+ Silence this alert: {{ .SilenceURL }}
113
+ {{- end }}
114
+ {{- end }}
115
+ {% endraw %}
116
+ {% endif %}
@@ -140,14 +140,13 @@ while True:
140
140
  snode.rpc_port,
141
141
  snode.rpc_username,
142
142
  snode.rpc_password,
143
- timeout=3, retry=2
144
- )
145
- num_of_events = constants.DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS
143
+ timeout=10, retry=2)
144
+
146
145
  try:
147
- # events = client.distr_status_events_get()
148
- events = client.distr_status_events_discard_then_get(0, num_of_events)
146
+ events = client.distr_status_events_discard_then_get(0, constants.DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS)
147
+
149
148
  if not events:
150
- logger.error("Distr events empty")
149
+ logger.debug("no events found")
151
150
  continue
152
151
 
153
152
  logger.info(f"Found events: {len(events)}")
@@ -161,8 +160,8 @@ while True:
161
160
  logger.info(f"Processing event: {eid}")
162
161
  process_event(eid)
163
162
 
164
- logger.info(f"Discarding events: {num_of_events}")
165
- events = client.distr_status_events_discard_then_get(num_of_events, 0)
163
+ logger.info(f"Discarding events: {len(events)}")
164
+ client.distr_status_events_discard_then_get(len(events), 0)
166
165
 
167
166
  except Exception as e:
168
167
  logger.error("Failed to process distr events")
@@ -118,7 +118,7 @@ while True:
118
118
  rpc_client = RPCClient(
119
119
  snode.mgmt_ip, snode.rpc_port,
120
120
  snode.rpc_username, snode.rpc_password,
121
- timeout=5, retry=3)
121
+ timeout=10, retry=1)
122
122
  for remote_device in snode.remote_devices:
123
123
  ret = rpc_client.get_bdevs(remote_device.remote_bdev)
124
124
  if ret:
@@ -66,8 +66,8 @@ def get_cluster_target_status(cluster_id):
66
66
  logger.debug(f"online_devices: {online_devices}")
67
67
  logger.debug(f"offline_devices: {offline_devices}")
68
68
 
69
- # if more than two affected modes then cluster is suspended
70
- if affected_nodes > 2:
69
+ # if more than two affected nodes then cluster is suspended
70
+ if affected_nodes > 2 or offline_nodes > 2:
71
71
  return Cluster.STATUS_SUSPENDED
72
72
 
73
73
  # if any device goes offline then cluster is degraded
@@ -105,7 +105,7 @@ def update_cluster_status(cluster_id):
105
105
 
106
106
  def set_node_online(node):
107
107
  if node.status != StorageNode.STATUS_ONLINE:
108
- storage_node_ops.set_node_status(snode, StorageNode.STATUS_ONLINE)
108
+ storage_node_ops.set_node_status(snode.get_id(), StorageNode.STATUS_ONLINE)
109
109
 
110
110
 
111
111
  def set_node_offline(node):
@@ -175,6 +175,7 @@ def task_runner_node(task):
175
175
  if ret:
176
176
  logger.info(f"Node restart succeeded")
177
177
 
178
+ time.sleep(5)
178
179
  if _get_node_unavailable_devices_count(node.get_id()) == 0 and node.status == StorageNode.STATUS_ONLINE:
179
180
  logger.info(f"Node is online: {node.get_id()}")
180
181
  task.function_result = "done"
File without changes
File without changes
File without changes
File without changes