sbcli-pre 1.2.5__zip → 1.2.7__zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/PKG-INFO +1 -1
  2. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/env_var +1 -1
  3. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/PKG-INFO +1 -1
  4. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/SOURCES.txt +5 -3
  5. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_cli/cli.py +138 -136
  6. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/cluster_ops.py +138 -235
  7. sbcli_pre-1.2.7/simplyblock_core/constants.py +91 -0
  8. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/caching_node_controller.py +8 -6
  9. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/cluster_events.py +9 -0
  10. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/device_controller.py +56 -63
  11. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/events_controller.py +5 -3
  12. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/health_controller.py +30 -40
  13. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/lvol_controller.py +75 -39
  14. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/pool_controller.py +8 -4
  15. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/snapshot_controller.py +36 -3
  16. sbcli_pre-1.2.7/simplyblock_core/controllers/tasks_controller.py +103 -0
  17. sbcli_pre-1.2.7/simplyblock_core/controllers/tasks_events.py +37 -0
  18. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/distr_controller.py +13 -9
  19. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/kv_store.py +62 -20
  20. sbcli_pre-1.2.7/simplyblock_core/mgmt_node_ops.py +205 -0
  21. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/events.py +9 -1
  22. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/job_schedule.py +6 -0
  23. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/nvme_device.py +42 -4
  24. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/storage_node.py +14 -2
  25. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/rpc_client.py +55 -10
  26. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/__init__.py +0 -4
  27. sbcli_pre-1.2.5/simplyblock_core/scripts/alerting/alert_resources.yaml → sbcli_pre-1.2.7/simplyblock_core/scripts/alerting/alert_resources.yaml.j2 +54 -5
  28. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/cluster.json +1 -1
  29. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/deploy_stack.sh +9 -0
  30. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/docker-compose-swarm-monitoring.yml +32 -15
  31. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/docker-compose-swarm.yml +17 -2
  32. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/haproxy.cfg +15 -0
  33. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/install_deps.sh +3 -0
  34. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/stack_deploy_wait.sh +1 -1
  35. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/capacity_and_stats_collector.py +1 -1
  36. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/device_monitor.py +5 -46
  37. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/distr_event_collector.py +10 -11
  38. sbcli_pre-1.2.7/simplyblock_core/services/health_check_service.py +134 -0
  39. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/lvol_monitor.py +1 -1
  40. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/lvol_stat_collector.py +1 -1
  41. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/port_stat_collector.py +0 -1
  42. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/storage_node_monitor.py +49 -44
  43. sbcli_pre-1.2.7/simplyblock_core/services/tasks_runner_migration.py +61 -0
  44. sbcli_pre-1.2.5/simplyblock_core/services/job_tasks.py → sbcli_pre-1.2.7/simplyblock_core/services/tasks_runner_restart.py +95 -46
  45. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/snode_client.py +12 -0
  46. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/storage_node_ops.py +630 -358
  47. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/utils.py +126 -1
  48. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/snode_ops.py +103 -25
  49. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_cluster.py +20 -43
  50. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_device.py +10 -7
  51. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_lvol.py +9 -5
  52. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_pool.py +14 -5
  53. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_storage_node.py +15 -15
  54. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/node_utils.py +0 -2
  55. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/utils.py +8 -0
  56. sbcli_pre-1.2.5/simplyblock_core/constants.py +0 -65
  57. sbcli_pre-1.2.5/simplyblock_core/mgmt_node_ops.py +0 -80
  58. sbcli_pre-1.2.5/simplyblock_core/scripts/apply_dashboard.sh +0 -22
  59. sbcli_pre-1.2.5/simplyblock_core/services/health_check_service.py +0 -136
  60. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/README.md +0 -0
  61. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/pyproject.toml +0 -0
  62. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/dependency_links.txt +0 -0
  63. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/entry_points.txt +0 -0
  64. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/requires.txt +0 -0
  65. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/sbcli_pre.egg-info/top_level.txt +0 -0
  66. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/setup.cfg +0 -0
  67. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/setup.py +0 -0
  68. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_cli/main.py +0 -0
  69. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/__init__.py +0 -0
  70. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/cnode_client.py +0 -0
  71. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/compute_node_ops.py +0 -0
  72. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/__init__.py +0 -0
  73. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/device_events.py +0 -0
  74. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/lvol_events.py +0 -0
  75. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/mgmt_events.py +0 -0
  76. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/pool_events.py +0 -0
  77. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/snapshot_events.py +0 -0
  78. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/controllers/storage_events.py +0 -0
  79. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/__init__.py +0 -0
  80. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/base_model.py +0 -0
  81. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/caching_node.py +0 -0
  82. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/cluster.py +0 -0
  83. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/compute_node.py +0 -0
  84. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/deployer.py +0 -0
  85. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/global_settings.py +0 -0
  86. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/iface.py +0 -0
  87. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/lvol_model.py +0 -0
  88. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/mgmt_node.py +0 -0
  89. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/pool.py +0 -0
  90. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/port_stat.py +0 -0
  91. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/snapshot.py +0 -0
  92. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/models/stats.py +0 -0
  93. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/pci_utils.py +0 -0
  94. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/alerting/alert_rules.yaml +0 -0
  95. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/clean_local_storage_deploy.sh +0 -0
  96. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/config_docker.sh +0 -0
  97. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/devices.json +0 -0
  98. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/lvols.json +0 -0
  99. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/node-exporter.json +0 -0
  100. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/nodes.json +0 -0
  101. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/dashboards/pools.json +0 -0
  102. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/datasource.yml +0 -0
  103. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/db_config_double.sh +0 -0
  104. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/db_config_single.sh +0 -0
  105. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/prometheus.yml +0 -0
  106. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/run_ssh.sh +0 -0
  107. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/scripts/set_db_config.sh +0 -0
  108. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/__init__.py +0 -0
  109. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/caching_node_monitor.py +0 -0
  110. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/cap_monitor.py +0 -0
  111. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/install_service.sh +0 -0
  112. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/log_agg_service.py +0 -0
  113. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/mgmt_node_monitor.py +0 -0
  114. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/remove_service.sh +0 -0
  115. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/services/service_template.service +0 -0
  116. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_core/shell_utils.py +0 -0
  117. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/__init__.py +0 -0
  118. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/app.py +0 -0
  119. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/auth_middleware.py +0 -0
  120. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/__init__.py +0 -0
  121. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/caching_node_ops.py +0 -0
  122. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/caching_node_ops_k8s.py +0 -0
  123. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/node_api_basic.py +0 -0
  124. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/node_api_caching_docker.py +0 -0
  125. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/node_api_caching_ks.py +0 -0
  126. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_caching_node.py +0 -0
  127. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_deployer.py +0 -0
  128. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_mgmt_node.py +0 -0
  129. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/blueprints/web_api_snapshot.py +0 -0
  130. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/caching_node_app.py +0 -0
  131. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/caching_node_app_k8s.py +0 -0
  132. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/node_webapp.py +0 -0
  133. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/snode_app.py +0 -0
  134. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/delete.py +0 -0
  135. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/deploy.py +0 -0
  136. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/deploy_cnode.yaml +0 -0
  137. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/deploy_spdk.yaml +0 -0
  138. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/is_up.py +0 -0
  139. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/list_deps.py +0 -0
  140. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/rpac.yaml +0 -0
  141. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/static/tst.py +0 -0
  142. {sbcli_pre-1.2.5 → sbcli_pre-1.2.7}/simplyblock_web/templates/deploy_spdk.yaml.j2 +0 -0
@@ -1,80 +0,0 @@
1
- # coding=utf-8
2
- import json
3
- import logging
4
- import uuid
5
-
6
- import docker
7
-
8
- from simplyblock_core import utils
9
- from simplyblock_core.controllers import mgmt_events
10
- from simplyblock_core.kv_store import DBController
11
- from simplyblock_core.models.mgmt_node import MgmtNode
12
-
13
- logger = logging.getLogger()
14
-
15
-
16
- def add_mgmt_node(mgmt_ip, cluster_id=None):
17
- db_controller = DBController()
18
- hostname = utils.get_hostname()
19
- node = db_controller.get_mgmt_node_by_hostname(hostname)
20
- if node:
21
- logger.error("Node already exists in the cluster")
22
- return False
23
-
24
- node = MgmtNode()
25
- node.uuid = str(uuid.uuid4())
26
- node.hostname = hostname
27
- node.docker_ip_port = f"{mgmt_ip}:2375"
28
- node.cluster_id = cluster_id
29
- node.mgmt_ip = mgmt_ip
30
- node.status = MgmtNode.STATUS_ONLINE
31
- node.write_to_db(db_controller.kv_store)
32
-
33
- mgmt_events.mgmt_add(node)
34
- logger.info("Done")
35
- return True
36
-
37
-
38
- def list_mgmt_nodes(is_json):
39
- db_controller = DBController()
40
- nodes = db_controller.get_mgmt_nodes()
41
- data = []
42
- output = ""
43
-
44
- for node in nodes:
45
- logging.debug(node)
46
- logging.debug("*" * 20)
47
- data.append({
48
- "UUID": node.get_id(),
49
- "Hostname": node.hostname,
50
- "IP": node.mgmt_ip,
51
- "Status": node.status,
52
- })
53
-
54
- if not data:
55
- return output
56
-
57
- if is_json:
58
- output = json.dumps(data, indent=2)
59
- else:
60
- output = utils.print_table(data)
61
- return output
62
-
63
-
64
- def remove_mgmt_node(uuid):
65
- db_controller = DBController()
66
- snode = db_controller.get_mgmt_node_by_id(uuid)
67
- if not snode:
68
- logger.error("can not find node")
69
- return False
70
-
71
- logging.info("Removing mgmt node")
72
- snode.remove(db_controller.kv_store)
73
-
74
- logger.info("Leaving swarm...")
75
- node_docker = docker.DockerClient(base_url=f"tcp://{snode.docker_ip_port}", version="auto")
76
- node_docker.swarm.leave()
77
-
78
- mgmt_events.mgmt_remove(snode)
79
- logging.info("done")
80
-
@@ -1,22 +0,0 @@
1
- #!/bin/bash
2
-
3
- TD=$(dirname -- "$(readlink -f -- "$0")")
4
-
5
- # Grafana Password
6
- export grafanaPassword=$1
7
-
8
- # Grafana username
9
- GF_ADMIN_USER=admin
10
-
11
- HOST=0.0.0.0:3000
12
-
13
- DASHBOARDS="${TD}/dashboards"
14
- for dashboard in "${DASHBOARDS}/cluster.json" "${DASHBOARDS}/devices.json" "${DASHBOARDS}/nodes.json" "${DASHBOARDS}/lvols.json" "${DASHBOARDS}/pools.json" "${DASHBOARDS}/node-exporter.json"; do
15
- echo -e "\nUploading dashboard: ${dashboard}"
16
- curl -X POST -H "Content-Type: application/json" \
17
- -d "@${dashboard}" \
18
- "http://${GF_ADMIN_USER}:${grafanaPassword}@${HOST}/api/dashboards/import"
19
- echo ""
20
- done
21
-
22
- echo "Cluster deployment complete."
@@ -1,136 +0,0 @@
1
- # coding=utf-8
2
- import logging
3
-
4
- import time
5
- import sys
6
- from datetime import datetime
7
-
8
-
9
- from simplyblock_core.controllers import health_controller, storage_events, device_events
10
- from simplyblock_core.models.storage_node import StorageNode
11
- from simplyblock_core.rpc_client import RPCClient
12
- from simplyblock_core import constants, kv_store
13
-
14
- # Import the GELF logger
15
- from graypy import GELFUDPHandler
16
-
17
- def set_node_health_check(snode, health_check_status):
18
- snode = db_controller.get_storage_node_by_id(snode.get_id())
19
- if snode.health_check == health_check_status:
20
- return
21
- old_status = snode.health_check
22
- snode.health_check = health_check_status
23
- snode.updated_at = str(datetime.now())
24
- snode.write_to_db(db_store)
25
- storage_events.snode_health_check_change(snode, snode.health_check, old_status, caused_by="monitor")
26
-
27
-
28
- def set_device_health_check(cluster_id, device, health_check_status):
29
- if device.health_check == health_check_status:
30
- return
31
- nodes = db_controller.get_storage_nodes()
32
- for node in nodes:
33
- if node.nvme_devices:
34
- for dev in node.nvme_devices:
35
- if dev.get_id() == device.get_id():
36
- old_status = dev.health_check
37
- dev.health_check = health_check_status
38
- node.write_to_db(db_store)
39
- device_events.device_health_check_change(
40
- dev, dev.health_check, old_status, caused_by="monitor")
41
-
42
-
43
- # configure logging
44
- logger_handler = logging.StreamHandler(stream=sys.stdout)
45
- logger_handler.setFormatter(logging.Formatter('%(asctime)s: %(levelname)s: %(message)s'))
46
- gelf_handler = GELFUDPHandler('0.0.0.0', constants.GELF_PORT)
47
- logger = logging.getLogger()
48
- logger.addHandler(gelf_handler)
49
- logger.addHandler(logger_handler)
50
- logger.setLevel(logging.DEBUG)
51
-
52
- # get DB controller
53
- db_store = kv_store.KVStore()
54
- db_controller = kv_store.DBController()
55
-
56
- logger.info("Starting health check service")
57
- while True:
58
- cluster_id = ""
59
- cl = db_controller.get_clusters()
60
- if cl:
61
- cluster_id = cl[0].get_id()
62
-
63
- snodes = db_controller.get_storage_nodes()
64
- if not snodes:
65
- logger.error("storage nodes list is empty")
66
-
67
- for snode in snodes:
68
- logger.info("Node: %s, status %s", snode.get_id(), snode.status)
69
-
70
- if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE]:
71
- logger.info(f"Node status is: {snode.status}, skipping")
72
- continue
73
-
74
- # 1- check node ping
75
- ping_check = health_controller._check_node_ping(snode.mgmt_ip)
76
- logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}")
77
-
78
- # 2- check node API
79
- node_api_check = health_controller._check_node_api(snode.mgmt_ip)
80
- logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {node_api_check}")
81
-
82
- if snode.status == StorageNode.STATUS_OFFLINE:
83
- set_node_health_check(snode, ping_check & node_api_check)
84
- continue
85
-
86
- # 3- check node RPC
87
- node_rpc_check = health_controller._check_node_rpc(
88
- snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password)
89
- logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}")
90
-
91
- # 4- docker API
92
- node_docker_check = health_controller._check_node_docker_api(snode.mgmt_ip)
93
- logger.info(f"Check: node docker API {snode.mgmt_ip}:2375 ... {node_docker_check}")
94
-
95
- is_node_online = ping_check and node_api_check and node_rpc_check and node_docker_check
96
-
97
- health_check_status = is_node_online
98
- if not node_rpc_check:
99
- logger.info("Putting all devices to unavailable state because RPC check failed")
100
- for dev in snode.nvme_devices:
101
- if dev.io_error:
102
- logger.debug(f"Skipping Device action because of io_error {dev.get_id()}")
103
- continue
104
- set_device_health_check(cluster_id, dev, False)
105
- else:
106
- logger.info(f"Node device count: {len(snode.nvme_devices)}")
107
- node_devices_check = True
108
- node_remote_devices_check = True
109
-
110
- for dev in snode.nvme_devices:
111
- if dev.io_error:
112
- logger.debug(f"Skipping Device check because of io_error {dev.get_id()}")
113
- continue
114
- ret = health_controller.check_device(dev.get_id())
115
- set_device_health_check(cluster_id, dev, ret)
116
- if dev.status == dev.STATUS_ONLINE:
117
- node_devices_check &= ret
118
-
119
- logger.info(f"Node remote device: {len(snode.remote_devices)}")
120
- rpc_client = RPCClient(
121
- snode.mgmt_ip, snode.rpc_port,
122
- snode.rpc_username, snode.rpc_password,
123
- timeout=5, retry=3)
124
- for remote_device in snode.remote_devices:
125
- ret = rpc_client.get_bdevs(remote_device.remote_bdev)
126
- if ret:
127
- logger.info(f"Checking bdev: {remote_device.remote_bdev} ... ok")
128
- else:
129
- logger.info(f"Checking bdev: {remote_device.remote_bdev} ... not found")
130
- node_remote_devices_check &= bool(ret)
131
-
132
- health_check_status = is_node_online and node_devices_check and node_remote_devices_check
133
- set_node_health_check(snode, health_check_status)
134
-
135
- time.sleep(constants.HEALTH_CHECK_INTERVAL_SEC)
136
-
File without changes
File without changes
File without changes
File without changes