sbcli-pre 1.1.3__zip → 1.1.5__zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/PKG-INFO +1 -1
  2. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/env_var +1 -1
  3. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/sbcli_pre.egg-info/PKG-INFO +1 -1
  4. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/sbcli_pre.egg-info/SOURCES.txt +1 -1
  5. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_cli/cli.py +7 -12
  6. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/cluster_ops.py +45 -2
  7. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/constants.py +1 -1
  8. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/health_controller.py +1 -1
  9. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/rpc_client.py +5 -4
  10. sbcli_pre-1.1.3/simplyblock_core/scripts/alerting/alert_resources.yaml → sbcli_pre-1.1.5/simplyblock_core/scripts/alerting/alert_resources.yaml.j2 +54 -5
  11. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/distr_event_collector.py +7 -8
  12. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/health_check_service.py +1 -1
  13. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/storage_node_monitor.py +3 -3
  14. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/tasks_runner_restart.py +1 -0
  15. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/storage_node_ops.py +0 -134
  16. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/README.md +0 -0
  17. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/pyproject.toml +0 -0
  18. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/sbcli_pre.egg-info/dependency_links.txt +0 -0
  19. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/sbcli_pre.egg-info/entry_points.txt +0 -0
  20. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/sbcli_pre.egg-info/requires.txt +0 -0
  21. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/sbcli_pre.egg-info/top_level.txt +0 -0
  22. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/setup.cfg +0 -0
  23. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/setup.py +0 -0
  24. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_cli/main.py +0 -0
  25. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/__init__.py +0 -0
  26. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/cnode_client.py +0 -0
  27. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/compute_node_ops.py +0 -0
  28. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/__init__.py +0 -0
  29. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/caching_node_controller.py +0 -0
  30. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/cluster_events.py +0 -0
  31. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/device_controller.py +0 -0
  32. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/device_events.py +0 -0
  33. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/events_controller.py +0 -0
  34. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/lvol_controller.py +0 -0
  35. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/lvol_events.py +0 -0
  36. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/mgmt_events.py +0 -0
  37. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/pool_controller.py +0 -0
  38. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/pool_events.py +0 -0
  39. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/snapshot_controller.py +0 -0
  40. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/snapshot_events.py +0 -0
  41. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/storage_events.py +0 -0
  42. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/tasks_controller.py +0 -0
  43. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/controllers/tasks_events.py +0 -0
  44. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/distr_controller.py +0 -0
  45. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/kv_store.py +0 -0
  46. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/mgmt_node_ops.py +0 -0
  47. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/__init__.py +0 -0
  48. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/base_model.py +0 -0
  49. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/caching_node.py +0 -0
  50. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/cluster.py +0 -0
  51. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/compute_node.py +0 -0
  52. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/events.py +0 -0
  53. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/global_settings.py +0 -0
  54. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/iface.py +0 -0
  55. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/job_schedule.py +0 -0
  56. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/lvol_model.py +0 -0
  57. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/mgmt_node.py +0 -0
  58. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/nvme_device.py +0 -0
  59. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/pool.py +0 -0
  60. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/port_stat.py +0 -0
  61. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/snapshot.py +0 -0
  62. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/stats.py +0 -0
  63. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/models/storage_node.py +0 -0
  64. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/pci_utils.py +0 -0
  65. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/__init__.py +0 -0
  66. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/alerting/alert_rules.yaml +0 -0
  67. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/clean_local_storage_deploy.sh +0 -0
  68. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/config_docker.sh +0 -0
  69. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/dashboards/cluster.json +0 -0
  70. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/dashboards/devices.json +0 -0
  71. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/dashboards/lvols.json +0 -0
  72. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/dashboards/node-exporter.json +0 -0
  73. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/dashboards/nodes.json +0 -0
  74. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/dashboards/pools.json +0 -0
  75. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/datasource.yml +0 -0
  76. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/db_config_double.sh +0 -0
  77. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/db_config_single.sh +0 -0
  78. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/deploy_stack.sh +0 -0
  79. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/docker-compose-swarm-monitoring.yml +0 -0
  80. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/docker-compose-swarm.yml +0 -0
  81. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/haproxy.cfg +0 -0
  82. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/install_deps.sh +0 -0
  83. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/prometheus.yml +0 -0
  84. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/run_ssh.sh +0 -0
  85. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/set_db_config.sh +0 -0
  86. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/scripts/stack_deploy_wait.sh +0 -0
  87. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/__init__.py +0 -0
  88. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/caching_node_monitor.py +0 -0
  89. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/cap_monitor.py +0 -0
  90. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/capacity_and_stats_collector.py +0 -0
  91. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/device_monitor.py +0 -0
  92. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/install_service.sh +0 -0
  93. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/log_agg_service.py +0 -0
  94. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/lvol_monitor.py +0 -0
  95. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/lvol_stat_collector.py +0 -0
  96. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/mgmt_node_monitor.py +0 -0
  97. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/port_stat_collector.py +0 -0
  98. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/remove_service.sh +0 -0
  99. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/service_template.service +0 -0
  100. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/services/tasks_runner_migration.py +0 -0
  101. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/shell_utils.py +0 -0
  102. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/snode_client.py +0 -0
  103. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_core/utils.py +0 -0
  104. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/__init__.py +0 -0
  105. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/app.py +0 -0
  106. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/auth_middleware.py +0 -0
  107. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/__init__.py +0 -0
  108. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/caching_node_ops.py +0 -0
  109. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/caching_node_ops_k8s.py +0 -0
  110. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/node_api_basic.py +0 -0
  111. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/node_api_caching_docker.py +0 -0
  112. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/node_api_caching_ks.py +0 -0
  113. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/snode_ops.py +0 -0
  114. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_caching_node.py +0 -0
  115. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_cluster.py +0 -0
  116. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_device.py +0 -0
  117. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_lvol.py +0 -0
  118. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_mgmt_node.py +0 -0
  119. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_pool.py +0 -0
  120. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_snapshot.py +0 -0
  121. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/blueprints/web_api_storage_node.py +0 -0
  122. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/caching_node_app.py +0 -0
  123. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/caching_node_app_k8s.py +0 -0
  124. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/node_utils.py +0 -0
  125. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/node_webapp.py +0 -0
  126. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/snode_app.py +0 -0
  127. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/delete.py +0 -0
  128. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/deploy.py +0 -0
  129. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/deploy_cnode.yaml +0 -0
  130. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/deploy_spdk.yaml +0 -0
  131. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/is_up.py +0 -0
  132. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/list_deps.py +0 -0
  133. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/rpac.yaml +0 -0
  134. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/static/tst.py +0 -0
  135. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/templates/deploy_spdk.yaml.j2 +0 -0
  136. {sbcli_pre-1.1.3 → sbcli_pre-1.1.5}/simplyblock_web/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sbcli-pre
3
- Version: 1.1.3
3
+ Version: 1.1.5
4
4
  Summary: CLI for managing SimplyBlock cluster
5
5
  Home-page: https://www.simplyblock.io/
6
6
  Author: Hamdy
@@ -1,5 +1,5 @@
1
1
  SIMPLY_BLOCK_COMMAND_NAME=sbcli-pre
2
- SIMPLY_BLOCK_VERSION=1.1.3
2
+ SIMPLY_BLOCK_VERSION=1.1.5
3
3
 
4
4
  SIMPLY_BLOCK_DOCKER_IMAGE=simplyblock/simplyblock:pre-release
5
5
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sbcli-pre
3
- Version: 1.1.3
3
+ Version: 1.1.5
4
4
  Summary: CLI for managing SimplyBlock cluster
5
5
  Home-page: https://www.simplyblock.io/
6
6
  Author: Hamdy
@@ -73,7 +73,7 @@ simplyblock_core/scripts/prometheus.yml
73
73
  simplyblock_core/scripts/run_ssh.sh
74
74
  simplyblock_core/scripts/set_db_config.sh
75
75
  simplyblock_core/scripts/stack_deploy_wait.sh
76
- simplyblock_core/scripts/alerting/alert_resources.yaml
76
+ simplyblock_core/scripts/alerting/alert_resources.yaml.j2
77
77
  simplyblock_core/scripts/alerting/alert_rules.yaml
78
78
  simplyblock_core/scripts/dashboards/cluster.json
79
79
  simplyblock_core/scripts/dashboards/devices.json
@@ -235,6 +235,10 @@ class CLIWrapper:
235
235
  dest='log_del_interval', default='7d')
236
236
  sub_command.add_argument("--metrics-retention-period", help='retention period for prometheus metrics, default: 7d',
237
237
  dest='metrics_retention_period', default='7d')
238
+ sub_command.add_argument("--contact-point", help='the email or slack webhook url to be used for alerting',
239
+ dest='contact_point', default='')
240
+ sub_command.add_argument("--grafana-endpoint", help='the endpoint url for grafana',
241
+ dest='grafana_endpoint', default='')
238
242
 
239
243
  # add cluster
240
244
  sub_command = self.add_sub_command(subparser, 'add', 'Add new cluster')
@@ -626,9 +630,6 @@ class CLIWrapper:
626
630
  elif sub_command == "deploy-cleaner":
627
631
  ret = storage_ops.deploy_cleaner()
628
632
 
629
- elif sub_command == "add":
630
- ret = self.storage_node_add(args)
631
-
632
633
  elif sub_command == "add-node":
633
634
  cluster_id = args.cluster_id
634
635
  node_ip = args.node_ip
@@ -1098,14 +1099,6 @@ class CLIWrapper:
1098
1099
  out = storage_ops.list_storage_nodes(self.db_store, args.json)
1099
1100
  return out
1100
1101
 
1101
- def storage_node_add(self, args):
1102
- cluster_id = args.cluster_id
1103
- ifname = args.ifname
1104
- data_nics = args.data_nics
1105
- # TODO: Validate the inputs
1106
- out = storage_ops.add_storage_node(cluster_id, ifname, data_nics)
1107
- return out
1108
-
1109
1102
  def storage_node_list_devices(self, args):
1110
1103
  node_id = args.node_id
1111
1104
  sort = args.sort
@@ -1137,11 +1130,13 @@ class CLIWrapper:
1137
1130
  ifname = args.ifname
1138
1131
  log_del_interval = args.log_del_interval
1139
1132
  metrics_retention_period = args.metrics_retention_period
1133
+ contact_point = args.contact_point
1134
+ grafana_endpoint = args.grafana_endpoint
1140
1135
 
1141
1136
  return cluster_ops.create_cluster(
1142
1137
  blk_size, page_size_in_blocks,
1143
1138
  CLI_PASS, cap_warn, cap_crit, prov_cap_warn, prov_cap_crit,
1144
- ifname, log_del_interval, metrics_retention_period)
1139
+ ifname, log_del_interval, metrics_retention_period, contact_point, grafana_endpoint)
1145
1140
 
1146
1141
  def query_yes_no(self, question, default="yes"):
1147
1142
  """Ask a yes/no question via raw_input() and return their answer.
@@ -2,11 +2,16 @@
2
2
  import json
3
3
  import logging
4
4
  import os
5
+ import re
6
+ import tempfile
7
+ import shutil
8
+ import subprocess
5
9
  import time
6
10
  import uuid
7
11
 
8
12
  import docker
9
13
  import requests
14
+ from jinja2 import Environment, FileSystemLoader
10
15
 
11
16
  from simplyblock_core import utils, scripts, constants, mgmt_node_ops, storage_node_ops
12
17
  from simplyblock_core.controllers import cluster_events, device_controller
@@ -16,7 +21,7 @@ from simplyblock_core.models.nvme_device import NVMeDevice
16
21
  from simplyblock_core.models.storage_node import StorageNode
17
22
 
18
23
  logger = logging.getLogger()
19
-
24
+ TOP_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
20
25
 
21
26
  def _add_grafana_dashboards(username, password, cluster_ip):
22
27
  url = f"http://{username}:{password}@{cluster_ip}/grafana/api/dashboards/import"
@@ -69,7 +74,8 @@ def _add_graylog_input(cluster_ip, password):
69
74
 
70
75
 
71
76
  def create_cluster(blk_size, page_size_in_blocks, cli_pass,
72
- cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, ifname, log_del_interval, metrics_retention_period):
77
+ cap_warn, cap_crit, prov_cap_warn, prov_cap_crit, ifname, log_del_interval, metrics_retention_period,
78
+ contact_point, grafana_endpoint):
73
79
  logger.info("Installing dependencies...")
74
80
  ret = scripts.install_deps()
75
81
  logger.info("Installing dependencies > Done")
@@ -124,6 +130,43 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass,
124
130
  if prov_cap_crit and prov_cap_crit > 0:
125
131
  c.prov_cap_crit = prov_cap_crit
126
132
 
133
+ alerts_template_folder = os.path.join(TOP_DIR, "simplyblock_core/scripts/alerting/")
134
+ alert_resources_file = "alert_resources.yaml"
135
+
136
+ env = Environment(loader=FileSystemLoader(alerts_template_folder), trim_blocks=True, lstrip_blocks=True)
137
+ template = env.get_template(f'{alert_resources_file}.j2')
138
+
139
+ slack_pattern = re.compile(r"https://hooks\.slack\.com/services/\S+")
140
+ email_pattern = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
141
+
142
+ if slack_pattern.match(contact_point):
143
+ ALERT_TYPE = "slack"
144
+ elif email_pattern.match(contact_point):
145
+ ALERT_TYPE = "email"
146
+ else:
147
+ ALERT_TYPE = "slack"
148
+
149
+ values = {
150
+ 'CONTACT_POINT': contact_point,
151
+ 'GRAFANA_ENDPOINT': grafana_endpoint,
152
+ 'ALERT_TYPE': ALERT_TYPE,
153
+ }
154
+
155
+ temp_dir = tempfile.mkdtemp()
156
+
157
+ temp_file_path = os.path.join(temp_dir, alert_resources_file)
158
+ with open(temp_file_path, 'w') as file:
159
+ file.write(template.render(values))
160
+
161
+ destination_file_path = os.path.join(alerts_template_folder, alert_resources_file)
162
+ try:
163
+ subprocess.run(['sudo', '-v'], check=True) # sudo -v checks if the current user has sudo permissions
164
+ subprocess.run(['sudo', 'mv', temp_file_path, destination_file_path], check=True)
165
+ print(f"File moved to {destination_file_path} successfully.")
166
+ except subprocess.CalledProcessError as e:
167
+ print(f"An error occurred: {e}")
168
+ shutil.rmtree(temp_dir)
169
+
127
170
  logger.info("Deploying swarm stack ...")
128
171
  ret = scripts.deploy_stack(cli_pass, DEV_IP, constants.SIMPLY_BLOCK_DOCKER_IMAGE, c.secret, c.uuid, log_del_interval, metrics_retention_period)
129
172
  logger.info("Deploying swarm stack > Done")
@@ -20,7 +20,7 @@ DEV_MONITOR_INTERVAL_SEC = 10
20
20
  DEV_STAT_COLLECTOR_INTERVAL_SEC = 2
21
21
  PROT_STAT_COLLECTOR_INTERVAL_SEC = 2
22
22
  DISTR_EVENT_COLLECTOR_INTERVAL_SEC = 2
23
- DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS = 20
23
+ DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS = 10
24
24
  CAP_MONITOR_INTERVAL_SEC = 30
25
25
  SSD_VENDOR_WHITE_LIST = ["1d0f:cd01", "1d0f:cd00"]
26
26
 
@@ -69,7 +69,7 @@ def _check_node_rpc(rpc_ip, rpc_port, rpc_username, rpc_password):
69
69
  try:
70
70
  rpc_client = RPCClient(
71
71
  rpc_ip, rpc_port, rpc_username, rpc_password,
72
- timeout=3, retry=1)
72
+ timeout=10, retry=1)
73
73
  ret = rpc_client.get_version()
74
74
  if ret:
75
75
  logger.debug(f"SPDK version: {ret['version']}")
@@ -434,8 +434,9 @@ class RPCClient:
434
434
  "trsvcid": str(port),
435
435
  "subnqn": nqn,
436
436
  "fabrics_connect_timeout_us": 100000,
437
- "fast_io_fail_timeout_sec": 0,
437
+ "fast_io_fail_timeout_sec": 1,
438
438
  "num_io_queues": 16384,
439
+ "ctrlr_loss_timeout_sec": 2,
439
440
  }
440
441
  return self._request("bdev_nvme_attach_controller", params)
441
442
 
@@ -483,9 +484,9 @@ class RPCClient:
483
484
  params = {
484
485
  "bdev_retry_count": 0,
485
486
  "transport_retry_count": 0,
486
- "ctrlr_loss_timeout_sec": -1,
487
- "fast_io_fail_timeout_sec": 5,
488
- "reconnect_delay_sec": 5,
487
+ "ctrlr_loss_timeout_sec": 2,
488
+ "fast_io_fail_timeout_sec": 1,
489
+ "reconnect_delay_sec": 1,
489
490
  "keep_alive_timeout_ms": 200,
490
491
  "transport_ack_timeout": 7,
491
492
  "timeout_us": 100000
@@ -12,15 +12,26 @@ contactPoints:
12
12
  name: grafana-alerts
13
13
  receivers:
14
14
  - uid: grafana
15
- type: slack
15
+ type: {{ ALERT_TYPE }}
16
+ {% if ALERT_TYPE == 'slack' %}
16
17
  settings:
17
18
  username: grafana_bot
18
- url: 'https://hooks.slack.com/services/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'
19
+ url: '{{ CONTACT_POINT }}'
19
20
  title: |
20
- {{ template "slack.title" . }}
21
+ {{ '{{' }} template "slack.title" . {{ '}}' }}
21
22
  text: |
22
- {{ template "slack.message" . }}
23
+ {{ '{{' }} template "slack.message" . {{ '}}' }}
24
+ {% else %}
25
+ settings:
26
+ addresses: '{{ CONTACT_POINT }}'
27
+ subject: |
28
+ {{ '{{' }} template "email.subject" . {{ '}}' }}
29
+ body: |
30
+ {{ '{{' }} template "email.body" . {{ '}}' }}
31
+ {% endif %}
23
32
 
33
+ {% if ALERT_TYPE == 'slack' %}
34
+ {% raw %}
24
35
  templates:
25
36
  - orgId: 1
26
37
  name: slack.title
@@ -38,7 +49,9 @@ templates:
38
49
  *Description*: {{ .Annotations.description }}
39
50
  {{ end -}}
40
51
  *Log message*: {{ index .Labels "message" }}
41
- *Explore logs:* https://grafanaURL.com/explore?orgId=1
52
+ {% endraw %}
53
+ *Explore logs:* {{ GRAFANA_ENDPOINT }}
54
+ {% raw %}
42
55
  {{ if .DashboardURL -}}
43
56
  *Go to dashboard:* {{ .DashboardURL }}
44
57
  {{- end }}
@@ -65,3 +78,39 @@ templates:
65
78
  {{ end }}
66
79
 
67
80
  {{- end }}
81
+ {% endraw %}
82
+ {% else %}
83
+ {% raw %}
84
+ - orgId: 1
85
+ name: email.subject
86
+ template: |-
87
+ {{ define "email.subject" -}}
88
+ [{{ .Status | toUpper }}] Grafana Alert
89
+ {{- end -}}
90
+ - orgId: 1
91
+ name: email.body
92
+ template: |-
93
+ {{ define "email.body" -}}
94
+ Alert: {{ .Labels.alertname }}
95
+ {{ if .Annotations -}}
96
+ Summary: {{ .Annotations.summary}}
97
+ Description: {{ .Annotations.description }}
98
+ {{ end -}}
99
+ Log message: {{ index .Labels "message" }}
100
+ Explore logs: {{ GRAFANA_ENDPOINT }}
101
+ {{ if .DashboardURL -}}
102
+ Go to dashboard: {{ .DashboardURL }}
103
+ {{- end }}
104
+ {{ if .PanelURL -}}
105
+ Go to panel: {{ .PanelURL }}
106
+ {{- end }}
107
+ Details:
108
+ {{ range .Labels.SortedPairs -}}
109
+ - {{ .Name }}: `{{ .Value }}`
110
+ {{ end -}}
111
+ {{ if .SilenceURL -}}
112
+ Silence this alert: {{ .SilenceURL }}
113
+ {{- end }}
114
+ {{- end }}
115
+ {% endraw %}
116
+ {% endif %}
@@ -140,14 +140,13 @@ while True:
140
140
  snode.rpc_port,
141
141
  snode.rpc_username,
142
142
  snode.rpc_password,
143
- timeout=3, retry=2
144
- )
145
- num_of_events = constants.DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS
143
+ timeout=10, retry=2)
144
+
146
145
  try:
147
- # events = client.distr_status_events_get()
148
- events = client.distr_status_events_discard_then_get(0, num_of_events)
146
+ events = client.distr_status_events_discard_then_get(0, constants.DISTR_EVENT_COLLECTOR_NUM_OF_EVENTS)
147
+
149
148
  if not events:
150
- logger.error("Distr events empty")
149
+ logger.debug("no events found")
151
150
  continue
152
151
 
153
152
  logger.info(f"Found events: {len(events)}")
@@ -161,8 +160,8 @@ while True:
161
160
  logger.info(f"Processing event: {eid}")
162
161
  process_event(eid)
163
162
 
164
- logger.info(f"Discarding events: {num_of_events}")
165
- events = client.distr_status_events_discard_then_get(num_of_events, 0)
163
+ logger.info(f"Discarding events: {len(events)}")
164
+ client.distr_status_events_discard_then_get(len(events), 0)
166
165
 
167
166
  except Exception as e:
168
167
  logger.error("Failed to process distr events")
@@ -118,7 +118,7 @@ while True:
118
118
  rpc_client = RPCClient(
119
119
  snode.mgmt_ip, snode.rpc_port,
120
120
  snode.rpc_username, snode.rpc_password,
121
- timeout=5, retry=3)
121
+ timeout=10, retry=1)
122
122
  for remote_device in snode.remote_devices:
123
123
  ret = rpc_client.get_bdevs(remote_device.remote_bdev)
124
124
  if ret:
@@ -66,8 +66,8 @@ def get_cluster_target_status(cluster_id):
66
66
  logger.debug(f"online_devices: {online_devices}")
67
67
  logger.debug(f"offline_devices: {offline_devices}")
68
68
 
69
- # if more than two affected modes then cluster is suspended
70
- if affected_nodes > 2:
69
+ # if more than two affected nodes then cluster is suspended
70
+ if affected_nodes > 2 or offline_nodes > 2:
71
71
  return Cluster.STATUS_SUSPENDED
72
72
 
73
73
  # if any device goes offline then cluster is degraded
@@ -105,7 +105,7 @@ def update_cluster_status(cluster_id):
105
105
 
106
106
  def set_node_online(node):
107
107
  if node.status != StorageNode.STATUS_ONLINE:
108
- storage_node_ops.set_node_status(snode, StorageNode.STATUS_ONLINE)
108
+ storage_node_ops.set_node_status(snode.get_id(), StorageNode.STATUS_ONLINE)
109
109
 
110
110
 
111
111
  def set_node_offline(node):
@@ -175,6 +175,7 @@ def task_runner_node(task):
175
175
  if ret:
176
176
  logger.info(f"Node restart succeeded")
177
177
 
178
+ time.sleep(5)
178
179
  if _get_node_unavailable_devices_count(node.get_id()) == 0 and node.status == StorageNode.STATUS_ONLINE:
179
180
  logger.info(f"Node is online: {node.get_id()}")
180
181
  task.function_result = "done"
@@ -799,140 +799,6 @@ def add_node(cluster_id, node_ip, iface_name, data_nics_list,
799
799
  return "Success"
800
800
 
801
801
 
802
- # Deprecated
803
- def add_storage_node(cluster_id, iface_name, data_nics):
804
- db_controller = DBController()
805
- kv_store = db_controller.kv_store
806
-
807
- cluster = db_controller.get_cluster_by_id(cluster_id)
808
- if not cluster:
809
- logger.error("Cluster not found: %s", cluster_id)
810
- return False
811
-
812
- logger.info("Add Storage node")
813
-
814
- hostname = utils.get_hostname()
815
- snode = db_controller.get_storage_node_by_hostname(hostname)
816
- if snode:
817
- logger.error("Node already exists, try remove it first.")
818
- exit(1)
819
- else:
820
- snode = StorageNode()
821
- snode.uuid = str(uuid.uuid4())
822
-
823
- mgmt_ip = _get_if_ip_address(iface_name)
824
- system_id = utils.get_system_id()
825
-
826
- BASE_NQN = cluster.nqn.split(":")[0]
827
- subsystem_nqn = f"{BASE_NQN}:{hostname}"
828
-
829
- if data_nics:
830
- data_nics = _get_data_nics(data_nics)
831
- else:
832
- data_nics = _get_data_nics([iface_name])
833
-
834
- rpc_user, rpc_pass = utils.generate_rpc_user_and_pass()
835
-
836
- # creating storage node object
837
- snode.status = StorageNode.STATUS_IN_CREATION
838
- snode.baseboard_sn = utils.get_baseboard_sn()
839
- snode.system_uuid = system_id
840
- snode.hostname = hostname
841
- snode.host_nqn = subsystem_nqn
842
- snode.subsystem = subsystem_nqn
843
- snode.data_nics = data_nics
844
- snode.mgmt_ip = mgmt_ip
845
- snode.rpc_port = constants.RPC_HTTP_PROXY_PORT
846
- snode.rpc_username = rpc_user
847
- snode.rpc_password = rpc_pass
848
- snode.cluster_id = cluster_id
849
- snode.write_to_db(kv_store)
850
-
851
- # creating RPCClient instance
852
- rpc_client = RPCClient(
853
- snode.mgmt_ip,
854
- snode.rpc_port,
855
- snode.rpc_username,
856
- snode.rpc_password)
857
-
858
- logger.info("Getting nvme devices")
859
- devs = get_nvme_devices()
860
- logger.debug(devs)
861
- pcies = [d[0] for d in devs]
862
- nvme_devs = addNvmeDevices(cluster, rpc_client, pcies, snode)
863
- if not nvme_devs:
864
- logger.error("No NVMe devices was found!")
865
-
866
- logger.debug(nvme_devs)
867
- snode.nvme_devices = nvme_devs
868
-
869
- # Set device cluster order
870
- dev_order = get_next_cluster_device_order(db_controller, cluster_id)
871
- for index, nvme in enumerate(snode.nvme_devices):
872
- nvme.cluster_device_order = dev_order
873
- dev_order += 1
874
- snode.write_to_db(db_controller.kv_store)
875
-
876
- # prepare devices
877
- # _prepare_cluster_devices(snode)
878
-
879
- logger.info("Connecting to remote devices")
880
- remote_devices = _connect_to_remote_devs(snode)
881
- snode.remote_devices = remote_devices
882
-
883
- logger.info("Setting node status to Active")
884
- snode.status = StorageNode.STATUS_ONLINE
885
- snode.write_to_db(kv_store)
886
-
887
- # make other nodes connect to the new devices
888
- logger.info("Make other nodes connect to the new devices")
889
- snodes = db_controller.get_storage_nodes_by_cluster_id(cluster_id)
890
- for node_index, node in enumerate(snodes):
891
- if node.get_id() == snode.get_id():
892
- continue
893
- logger.info(f"Connecting to node: {node.get_id()}")
894
- rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password)
895
- count = 0
896
- for dev in snode.nvme_devices:
897
- name = f"remote_{dev.alceml_bdev}"
898
- ret = rpc_client.bdev_nvme_attach_controller_tcp(name, dev.nvmf_nqn, dev.nvmf_ip, dev.nvmf_port)
899
- if not ret:
900
- logger.error(f"Failed to connect to device: {name}")
901
- continue
902
-
903
- dev.remote_bdev = f"{name}n1"
904
- idx = -1
905
- for i, d in enumerate(node.remote_devices):
906
- if d.get_id() == dev.get_id():
907
- idx = i
908
- break
909
- if idx >= 0:
910
- node.remote_devices[idx] = dev
911
- else:
912
- node.remote_devices.append(dev)
913
- count += 1
914
- node.write_to_db(kv_store)
915
- logger.info(f"connected to devices count: {count}")
916
-
917
- logger.info("Sending cluster map")
918
- ret = distr_controller.send_cluster_map_to_node(snode)
919
- if not ret:
920
- return False, "Failed to send cluster map"
921
- ret = distr_controller.send_cluster_map_add_node(snode)
922
- if not ret:
923
- return False, "Failed to send cluster map add node"
924
- time.sleep(3)
925
-
926
- logger.info("Sending cluster event updates")
927
- distr_controller.send_node_status_event(snode, StorageNode.STATUS_ONLINE)
928
-
929
- for dev in snode.nvme_devices:
930
- distr_controller.send_dev_status_event(dev, NVMeDevice.STATUS_ONLINE)
931
-
932
- logger.info("Done")
933
- return "Success"
934
-
935
-
936
802
  def delete_storage_node(node_id):
937
803
  db_controller = DBController()
938
804
  snode = db_controller.get_storage_node_by_id(node_id)
File without changes
File without changes
File without changes
File without changes