sbcli-pre 1.2.4__zip → 1.2.5__zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/PKG-INFO +20 -5
  2. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/README.md +19 -4
  3. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/env_var +1 -1
  4. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/sbcli_pre.egg-info/PKG-INFO +20 -5
  5. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/sbcli_pre.egg-info/SOURCES.txt +5 -5
  6. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_cli/cli.py +115 -113
  7. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/cluster_ops.py +238 -141
  8. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/constants.py +7 -5
  9. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/caching_node_controller.py +6 -8
  10. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/cluster_events.py +0 -9
  11. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/device_controller.py +63 -56
  12. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/events_controller.py +3 -5
  13. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/health_controller.py +40 -30
  14. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/lvol_controller.py +38 -51
  15. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/pool_controller.py +4 -8
  16. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/snapshot_controller.py +3 -9
  17. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/distr_controller.py +9 -13
  18. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/kv_store.py +29 -47
  19. sbcli_pre-1.2.5/simplyblock_core/mgmt_node_ops.py +80 -0
  20. sbcli_pre-1.2.5/simplyblock_core/models/deployer.py +62 -0
  21. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/events.py +1 -9
  22. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/job_schedule.py +0 -6
  23. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/nvme_device.py +4 -42
  24. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/storage_node.py +1 -9
  25. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/rpc_client.py +10 -55
  26. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/__init__.py +4 -0
  27. sbcli_pre-1.2.4/simplyblock_core/scripts/alerting/alert_resources.yaml.j2 → sbcli_pre-1.2.5/simplyblock_core/scripts/alerting/alert_resources.yaml +5 -54
  28. sbcli_pre-1.2.5/simplyblock_core/scripts/apply_dashboard.sh +22 -0
  29. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/dashboards/cluster.json +1 -1
  30. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/deploy_stack.sh +0 -2
  31. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/docker-compose-swarm-monitoring.yml +13 -22
  32. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/docker-compose-swarm.yml +2 -17
  33. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/haproxy.cfg +0 -15
  34. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/install_deps.sh +0 -1
  35. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/capacity_and_stats_collector.py +1 -1
  36. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/device_monitor.py +46 -5
  37. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/distr_event_collector.py +11 -10
  38. sbcli_pre-1.2.5/simplyblock_core/services/health_check_service.py +136 -0
  39. sbcli_pre-1.2.4/simplyblock_core/services/tasks_runner_restart.py → sbcli_pre-1.2.5/simplyblock_core/services/job_tasks.py +46 -95
  40. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/lvol_monitor.py +1 -1
  41. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/lvol_stat_collector.py +1 -1
  42. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/port_stat_collector.py +1 -0
  43. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/storage_node_monitor.py +44 -49
  44. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/snode_client.py +0 -12
  45. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/storage_node_ops.py +336 -525
  46. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/utils.py +1 -46
  47. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/app.py +2 -1
  48. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/snode_ops.py +25 -103
  49. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_cluster.py +43 -20
  50. sbcli_pre-1.2.5/simplyblock_web/blueprints/web_api_deployer.py +394 -0
  51. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_device.py +7 -10
  52. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_lvol.py +5 -9
  53. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_pool.py +5 -14
  54. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_storage_node.py +10 -3
  55. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/node_utils.py +2 -0
  56. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/utils.py +0 -8
  57. sbcli_pre-1.2.4/simplyblock_core/controllers/tasks_controller.py +0 -103
  58. sbcli_pre-1.2.4/simplyblock_core/controllers/tasks_events.py +0 -37
  59. sbcli_pre-1.2.4/simplyblock_core/mgmt_node_ops.py +0 -205
  60. sbcli_pre-1.2.4/simplyblock_core/services/health_check_service.py +0 -134
  61. sbcli_pre-1.2.4/simplyblock_core/services/tasks_runner_migration.py +0 -61
  62. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/pyproject.toml +0 -0
  63. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/sbcli_pre.egg-info/dependency_links.txt +0 -0
  64. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/sbcli_pre.egg-info/entry_points.txt +0 -0
  65. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/sbcli_pre.egg-info/requires.txt +0 -0
  66. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/sbcli_pre.egg-info/top_level.txt +0 -0
  67. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/setup.cfg +0 -0
  68. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/setup.py +0 -0
  69. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_cli/main.py +0 -0
  70. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/__init__.py +0 -0
  71. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/cnode_client.py +0 -0
  72. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/compute_node_ops.py +0 -0
  73. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/__init__.py +0 -0
  74. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/device_events.py +0 -0
  75. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/lvol_events.py +0 -0
  76. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/mgmt_events.py +0 -0
  77. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/pool_events.py +0 -0
  78. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/snapshot_events.py +0 -0
  79. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/controllers/storage_events.py +0 -0
  80. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/__init__.py +0 -0
  81. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/base_model.py +0 -0
  82. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/caching_node.py +0 -0
  83. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/cluster.py +0 -0
  84. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/compute_node.py +0 -0
  85. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/global_settings.py +0 -0
  86. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/iface.py +0 -0
  87. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/lvol_model.py +0 -0
  88. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/mgmt_node.py +0 -0
  89. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/pool.py +0 -0
  90. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/port_stat.py +0 -0
  91. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/snapshot.py +0 -0
  92. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/models/stats.py +0 -0
  93. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/pci_utils.py +0 -0
  94. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/alerting/alert_rules.yaml +0 -0
  95. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/clean_local_storage_deploy.sh +0 -0
  96. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/config_docker.sh +0 -0
  97. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/dashboards/devices.json +0 -0
  98. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/dashboards/lvols.json +0 -0
  99. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/dashboards/node-exporter.json +0 -0
  100. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/dashboards/nodes.json +0 -0
  101. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/dashboards/pools.json +0 -0
  102. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/datasource.yml +0 -0
  103. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/db_config_double.sh +0 -0
  104. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/db_config_single.sh +0 -0
  105. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/prometheus.yml +0 -0
  106. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/run_ssh.sh +0 -0
  107. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/set_db_config.sh +0 -0
  108. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/scripts/stack_deploy_wait.sh +0 -0
  109. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/__init__.py +0 -0
  110. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/caching_node_monitor.py +0 -0
  111. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/cap_monitor.py +0 -0
  112. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/install_service.sh +0 -0
  113. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/log_agg_service.py +0 -0
  114. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/mgmt_node_monitor.py +0 -0
  115. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/remove_service.sh +0 -0
  116. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/services/service_template.service +0 -0
  117. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_core/shell_utils.py +0 -0
  118. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/__init__.py +0 -0
  119. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/auth_middleware.py +0 -0
  120. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/__init__.py +0 -0
  121. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/caching_node_ops.py +0 -0
  122. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/caching_node_ops_k8s.py +0 -0
  123. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/node_api_basic.py +0 -0
  124. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/node_api_caching_docker.py +0 -0
  125. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/node_api_caching_ks.py +0 -0
  126. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_caching_node.py +0 -0
  127. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_mgmt_node.py +0 -0
  128. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/blueprints/web_api_snapshot.py +0 -0
  129. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/caching_node_app.py +0 -0
  130. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/caching_node_app_k8s.py +0 -0
  131. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/node_webapp.py +0 -0
  132. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/snode_app.py +0 -0
  133. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/delete.py +0 -0
  134. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/deploy.py +0 -0
  135. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/deploy_cnode.yaml +0 -0
  136. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/deploy_spdk.yaml +0 -0
  137. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/is_up.py +0 -0
  138. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/list_deps.py +0 -0
  139. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/rpac.yaml +0 -0
  140. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/static/tst.py +0 -0
  141. {sbcli_pre-1.2.4 → sbcli_pre-1.2.5}/simplyblock_web/templates/deploy_spdk.yaml.j2 +0 -0
@@ -1,103 +0,0 @@
1
- # coding=utf-8
2
- import logging
3
- import time
4
- import uuid
5
-
6
- from simplyblock_core import kv_store, constants, utils
7
- from simplyblock_core.controllers import tasks_events
8
- from simplyblock_core.models.job_schedule import JobSchedule
9
-
10
- logger = logging.getLogger()
11
- db_controller = kv_store.DBController()
12
-
13
-
14
- def _validate_new_task_node_restart(cluster_id, node_id):
15
- tasks = db_controller.get_job_tasks(cluster_id)
16
- for task in tasks:
17
- if task.function_name == JobSchedule.FN_NODE_RESTART and task.node_id == node_id:
18
- if task.status != JobSchedule.STATUS_DONE:
19
- logger.info(f"Task found, skip adding new task: {task.get_id()}")
20
- return False
21
- return True
22
-
23
-
24
- def _validate_new_task_dev_restart(cluster_id, node_id, device_id):
25
- tasks = db_controller.get_job_tasks(cluster_id)
26
- for task in tasks:
27
- if task.function_name == JobSchedule.FN_DEV_RESTART and task.device_id == device_id:
28
- if task.status != JobSchedule.STATUS_DONE:
29
- logger.info(f"Task found, skip adding new task: {task.get_id()}")
30
- return False
31
- elif task.function_name == JobSchedule.FN_NODE_RESTART and task.node_id == node_id:
32
- if task.status != JobSchedule.STATUS_DONE:
33
- logger.info(f"Task found, skip adding new task: {task.get_id()}")
34
- return False
35
- return True
36
-
37
-
38
- def _add_task(function_name, cluster_id, node_id, device_id):
39
-
40
- if function_name in [JobSchedule.FN_DEV_RESTART, JobSchedule.FN_DEV_MIG]:
41
- if not _validate_new_task_dev_restart(cluster_id, node_id, device_id):
42
- return False
43
- elif function_name == JobSchedule.FN_NODE_RESTART:
44
- if not _validate_new_task_node_restart(cluster_id, node_id):
45
- return False
46
-
47
- task_obj = JobSchedule()
48
- task_obj.uuid = str(uuid.uuid4())
49
- task_obj.cluster_id = cluster_id
50
- task_obj.node_id = node_id
51
- task_obj.device_id = device_id
52
- task_obj.date = int(time.time())
53
- task_obj.function_name = function_name
54
- task_obj.status = JobSchedule.STATUS_NEW
55
- task_obj.write_to_db(db_controller.kv_store)
56
- tasks_events.task_create(task_obj)
57
- return task_obj.uuid
58
-
59
-
60
- def add_device_mig_task(device_id):
61
- device = db_controller.get_storage_devices(device_id)
62
- return _add_task(JobSchedule.FN_DEV_MIG, device.cluster_id, device.node_id, device.get_id())
63
-
64
-
65
- def add_device_to_auto_restart(device):
66
- return _add_task(JobSchedule.FN_DEV_RESTART, device.cluster_id, device.node_id, device.get_id())
67
-
68
-
69
- def add_node_to_auto_restart(node):
70
- return _add_task(JobSchedule.FN_NODE_RESTART, node.cluster_id, node.get_id(), "")
71
-
72
-
73
- def list_tasks(cluster_id):
74
- cluster = db_controller.get_cluster_by_id(cluster_id)
75
- if not cluster:
76
- logger.error("Cluster not found: %s", cluster_id)
77
- return False
78
-
79
- data = []
80
- tasks = db_controller.get_job_tasks(cluster_id)
81
- for task in tasks:
82
- data.append({
83
- "Task ID": task.uuid,
84
- "Target ID": task.device_id or task.node_id,
85
- "Function": task.function_name,
86
- "Retry": f"{task.retry}/{constants.TASK_EXEC_RETRY_COUNT}",
87
- "Status": task.status,
88
- "Result": task.function_result,
89
- "Date": time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(task.date)),
90
- })
91
- return utils.print_table(data)
92
-
93
-
94
- def cancel_task(task_id):
95
- task = db_controller.get_task_by_id(task_id)
96
- if not task:
97
- logger.error("Task not found: %s", task_id)
98
- return False
99
-
100
- task.canceled = True
101
- task.write_to_db(db_controller.kv_store)
102
- tasks_events.task_canceled(task)
103
- return True
@@ -1,37 +0,0 @@
1
- # coding=utf-8
2
- import logging
3
-
4
- from simplyblock_core.controllers import events_controller as ec
5
- from simplyblock_core.kv_store import DBController
6
-
7
- logger = logging.getLogger()
8
- db_controller = DBController()
9
-
10
-
11
- def _task_event(task, message, caused_by, event):
12
- ec.log_event_cluster(
13
- cluster_id=task.cluster_id,
14
- domain=ec.DOMAIN_CLUSTER,
15
- event=event,
16
- db_object=task,
17
- caused_by=caused_by,
18
- message=message,
19
- node_id=task.node_id,
20
- status=task.status)
21
-
22
-
23
- def task_create(task, caused_by=ec.CAUSED_BY_CLI):
24
- _task_event(task, f"task created: {task.uuid}", caused_by, ec.EVENT_OBJ_CREATED)
25
-
26
-
27
- def task_updated(task, caused_by=ec.CAUSED_BY_CLI):
28
- _task_event(task, f"Task updated: {task.uuid}", caused_by, ec.EVENT_STATUS_CHANGE)
29
-
30
-
31
- def task_status_change(task, new_state, old_status, caused_by=ec.CAUSED_BY_CLI):
32
- _task_event(task, f"task status changed from: {old_status} to: {new_state}", caused_by, ec.EVENT_STATUS_CHANGE)
33
-
34
-
35
- def task_canceled(task, caused_by=ec.CAUSED_BY_CLI):
36
- _task_event(task, f"Task canceled: {task.uuid}", caused_by, ec.EVENT_STATUS_CHANGE)
37
-
@@ -1,205 +0,0 @@
1
- # coding=utf-8
2
- import json
3
- import logging
4
- import uuid
5
- import time
6
- import requests
7
-
8
- import docker
9
-
10
- from simplyblock_core import utils, scripts
11
- from simplyblock_core.controllers import mgmt_events
12
- from simplyblock_core.kv_store import DBController
13
- from simplyblock_core.models.mgmt_node import MgmtNode
14
-
15
-
16
- logger = logging.getLogger()
17
-
18
-
19
- def deploy_mgmt_node(cluster_ip, cluster_id, ifname):
20
-
21
- try:
22
- resp = requests.get(f"http://{cluster_ip}/cluster/{cluster_id}")
23
- resp_json = resp.json()
24
- cluster_data = resp_json['results'][0]
25
- logger.info(f"Cluster found! NQN:{cluster_data['nqn']}")
26
- logger.debug(cluster_data)
27
- except Exception as e:
28
- logger.error("Error getting cluster data!")
29
- logger.error(e)
30
- return ""
31
-
32
- logger.info("Installing dependencies...")
33
- scripts.install_deps()
34
- logger.info("Installing dependencies > Done")
35
-
36
- if not ifname:
37
- ifname = "eth0"
38
-
39
- DEV_IP = utils.get_iface_ip(ifname)
40
- if not DEV_IP:
41
- logger.error(f"Error getting interface ip: {ifname}")
42
- return False
43
-
44
- logger.info(f"Node IP: {DEV_IP}")
45
- ret = scripts.configure_docker(DEV_IP)
46
-
47
- db_connection = cluster_data['db_connection']
48
- scripts.set_db_config(db_connection)
49
- time.sleep(1)
50
- hostname = utils.get_hostname()
51
- db_controller = DBController()
52
- nodes = db_controller.get_mgmt_nodes(cluster_id=cluster_id)
53
- if not nodes:
54
- logger.error("No mgmt nodes was found in the cluster!")
55
- return False
56
- for node in nodes:
57
- if node.hostname == hostname:
58
- logger.error("Node already exists in the cluster")
59
- return False
60
-
61
- logger.info("Joining docker swarm...")
62
- try:
63
- cluster_docker = utils.get_docker_client(cluster_id)
64
- docker_ip = cluster_docker.info()["Swarm"]["NodeAddr"]
65
- join_token = cluster_docker.swarm.attrs['JoinTokens']['Manager']
66
- node_docker = docker.DockerClient(base_url=f"tcp://{DEV_IP}:2375", version="auto")
67
- if node_docker.info()["Swarm"]["LocalNodeState"] == "active":
68
- logger.info("Node is part of another swarm, leaving swarm")
69
- try:
70
- cluster_docker.nodes.get(node_docker.info()["Swarm"]["NodeID"]).remove(force=True)
71
- except:
72
- pass
73
- node_docker.swarm.leave(force=True)
74
- time.sleep(5)
75
-
76
- node_docker.swarm.join([f"{docker_ip}:2377"], join_token)
77
-
78
- retries = 10
79
- while retries > 0:
80
- if node_docker.info()["Swarm"]["LocalNodeState"] == "active":
81
- break
82
- logger.info("Waiting for node to be active...")
83
- retries -= 1
84
- time.sleep(2)
85
- logger.info("Joining docker swarm > Done")
86
- time.sleep(5)
87
-
88
- except Exception as e:
89
- raise e
90
-
91
- logger.info("Adding management node object")
92
- node_id = add_mgmt_node(DEV_IP, cluster_id)
93
-
94
- # check if ha setting is required
95
- nodes = db_controller.get_mgmt_nodes(cluster_id=cluster_id)
96
- if len(nodes) >= 3:
97
- logger.info("Waiting for FDB container to be active...")
98
- fdb_cont = None
99
- retries = 30
100
- while retries > 0 and fdb_cont is None:
101
- logger.info("Looking for FDB container...")
102
- for cont in node_docker.containers.list(all=True):
103
- logger.debug(cont.attrs['Name'])
104
- if cont.attrs['Name'].startswith("/app_fdb"):
105
- fdb_cont = cont
106
- break
107
- if fdb_cont:
108
- logger.info("FDB container found")
109
- break
110
- else:
111
- retries -= 1
112
- time.sleep(5)
113
-
114
- if not fdb_cont:
115
- logger.warning("FDB container was not found")
116
- else:
117
- retries = 10
118
- while retries > 0:
119
- info = node_docker.containers.get(fdb_cont.attrs['Id'])
120
- status = info.attrs['State']["Status"]
121
- is_running = info.attrs['State']["Running"]
122
- if not is_running:
123
- logger.info("Container is not running, waiting...")
124
- time.sleep(3)
125
- retries -= 1
126
- else:
127
- logger.info(f"Container status: {status}, Is Running: {is_running}")
128
- break
129
-
130
- logger.info("Configuring Double DB...")
131
- time.sleep(3)
132
- scripts.set_db_config_double()
133
- for cl in db_controller.get_clusters():
134
- cl.ha_type = "ha"
135
- cl.write_to_db(db_controller.kv_store)
136
-
137
- logger.info("Node joined the cluster")
138
- return node_id
139
-
140
-
141
- def add_mgmt_node(mgmt_ip, cluster_id=None):
142
- db_controller = DBController()
143
- hostname = utils.get_hostname()
144
- node = db_controller.get_mgmt_node_by_hostname(hostname)
145
- if node:
146
- logger.error("Node already exists in the cluster")
147
- return False
148
-
149
- node = MgmtNode()
150
- node.uuid = str(uuid.uuid4())
151
- node.hostname = hostname
152
- node.docker_ip_port = f"{mgmt_ip}:2375"
153
- node.cluster_id = cluster_id
154
- node.mgmt_ip = mgmt_ip
155
- node.status = MgmtNode.STATUS_ONLINE
156
- node.write_to_db(db_controller.kv_store)
157
-
158
- mgmt_events.mgmt_add(node)
159
- logger.info("Done")
160
- return node.uuid
161
-
162
-
163
- def list_mgmt_nodes(is_json):
164
- db_controller = DBController()
165
- nodes = db_controller.get_mgmt_nodes()
166
- data = []
167
- output = ""
168
-
169
- for node in nodes:
170
- logging.debug(node)
171
- logging.debug("*" * 20)
172
- data.append({
173
- "UUID": node.get_id(),
174
- "Hostname": node.hostname,
175
- "IP": node.mgmt_ip,
176
- "Status": node.status,
177
- })
178
-
179
- if not data:
180
- return output
181
-
182
- if is_json:
183
- output = json.dumps(data, indent=2)
184
- else:
185
- output = utils.print_table(data)
186
- return output
187
-
188
-
189
- def remove_mgmt_node(uuid):
190
- db_controller = DBController()
191
- snode = db_controller.get_mgmt_node_by_id(uuid)
192
- if not snode:
193
- logger.error("can not find node")
194
- return False
195
-
196
- logging.info("Removing mgmt node")
197
- snode.remove(db_controller.kv_store)
198
-
199
- logger.info("Leaving swarm...")
200
- node_docker = docker.DockerClient(base_url=f"tcp://{snode.docker_ip_port}", version="auto")
201
- node_docker.swarm.leave()
202
-
203
- mgmt_events.mgmt_remove(snode)
204
- logging.info("done")
205
-
@@ -1,134 +0,0 @@
1
- # coding=utf-8
2
- import logging
3
-
4
- import time
5
- import sys
6
- from datetime import datetime
7
-
8
-
9
- from simplyblock_core.controllers import health_controller, storage_events, device_events
10
- from simplyblock_core.models.storage_node import StorageNode
11
- from simplyblock_core.rpc_client import RPCClient
12
- from simplyblock_core import constants, kv_store
13
-
14
- # Import the GELF logger
15
- from graypy import GELFUDPHandler
16
-
17
- def set_node_health_check(snode, health_check_status):
18
- snode = db_controller.get_storage_node_by_id(snode.get_id())
19
- if snode.health_check == health_check_status:
20
- return
21
- old_status = snode.health_check
22
- snode.health_check = health_check_status
23
- snode.updated_at = str(datetime.now())
24
- snode.write_to_db(db_store)
25
- storage_events.snode_health_check_change(snode, snode.health_check, old_status, caused_by="monitor")
26
-
27
-
28
- def set_device_health_check(cluster_id, device, health_check_status):
29
- if device.health_check == health_check_status:
30
- return
31
- nodes = db_controller.get_storage_nodes_by_cluster_id(cluster_id)
32
- for node in nodes:
33
- if node.nvme_devices:
34
- for dev in node.nvme_devices:
35
- if dev.get_id() == device.get_id():
36
- old_status = dev.health_check
37
- dev.health_check = health_check_status
38
- node.write_to_db(db_store)
39
- device_events.device_health_check_change(
40
- dev, dev.health_check, old_status, caused_by="monitor")
41
-
42
-
43
- # configure logging
44
- logger_handler = logging.StreamHandler(stream=sys.stdout)
45
- logger_handler.setFormatter(logging.Formatter('%(asctime)s: %(levelname)s: %(message)s'))
46
- gelf_handler = GELFUDPHandler('0.0.0.0', constants.GELF_PORT)
47
- logger = logging.getLogger()
48
- logger.addHandler(gelf_handler)
49
- logger.addHandler(logger_handler)
50
- logger.setLevel(logging.DEBUG)
51
-
52
- # get DB controller
53
- db_store = kv_store.KVStore()
54
- db_controller = kv_store.DBController()
55
-
56
- logger.info("Starting health check service")
57
- while True:
58
- clusters = db_controller.get_clusters()
59
- for cluster in clusters:
60
- cluster_id = cluster.get_id()
61
- snodes = db_controller.get_storage_nodes_by_cluster_id(cluster_id)
62
- if not snodes:
63
- logger.error("storage nodes list is empty")
64
-
65
- for snode in snodes:
66
- logger.info("Node: %s, status %s", snode.get_id(), snode.status)
67
-
68
- if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE]:
69
- logger.info(f"Node status is: {snode.status}, skipping")
70
- continue
71
-
72
- # 1- check node ping
73
- ping_check = health_controller._check_node_ping(snode.mgmt_ip)
74
- logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}")
75
-
76
- # 2- check node API
77
- node_api_check = health_controller._check_node_api(snode.mgmt_ip)
78
- logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {node_api_check}")
79
-
80
- if snode.status == StorageNode.STATUS_OFFLINE:
81
- set_node_health_check(snode, ping_check & node_api_check)
82
- continue
83
-
84
- # 3- check node RPC
85
- node_rpc_check = health_controller._check_node_rpc(
86
- snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password)
87
- logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}")
88
-
89
- # 4- docker API
90
- node_docker_check = health_controller._check_node_docker_api(snode.mgmt_ip)
91
- logger.info(f"Check: node docker API {snode.mgmt_ip}:2375 ... {node_docker_check}")
92
-
93
- is_node_online = ping_check and node_api_check and node_rpc_check and node_docker_check
94
-
95
- health_check_status = is_node_online
96
- if not node_rpc_check:
97
- logger.info("Putting all devices to unavailable state because RPC check failed")
98
- for dev in snode.nvme_devices:
99
- if dev.io_error:
100
- logger.debug(f"Skipping Device action because of io_error {dev.get_id()}")
101
- continue
102
- set_device_health_check(cluster_id, dev, False)
103
- else:
104
- logger.info(f"Node device count: {len(snode.nvme_devices)}")
105
- node_devices_check = True
106
- node_remote_devices_check = True
107
-
108
- for dev in snode.nvme_devices:
109
- if dev.io_error:
110
- logger.debug(f"Skipping Device check because of io_error {dev.get_id()}")
111
- continue
112
- ret = health_controller.check_device(dev.get_id())
113
- set_device_health_check(cluster_id, dev, ret)
114
- if dev.status == dev.STATUS_ONLINE:
115
- node_devices_check &= ret
116
-
117
- logger.info(f"Node remote device: {len(snode.remote_devices)}")
118
- rpc_client = RPCClient(
119
- snode.mgmt_ip, snode.rpc_port,
120
- snode.rpc_username, snode.rpc_password,
121
- timeout=10, retry=1)
122
- for remote_device in snode.remote_devices:
123
- ret = rpc_client.get_bdevs(remote_device.remote_bdev)
124
- if ret:
125
- logger.info(f"Checking bdev: {remote_device.remote_bdev} ... ok")
126
- else:
127
- logger.info(f"Checking bdev: {remote_device.remote_bdev} ... not found")
128
- node_remote_devices_check &= bool(ret)
129
-
130
- health_check_status = is_node_online and node_devices_check and node_remote_devices_check
131
- set_node_health_check(snode, health_check_status)
132
-
133
- time.sleep(constants.HEALTH_CHECK_INTERVAL_SEC)
134
-
@@ -1,61 +0,0 @@
1
- # coding=utf-8
2
- import logging
3
- import time
4
- import sys
5
-
6
-
7
- from simplyblock_core import constants, kv_store
8
- from simplyblock_core.controllers import tasks_events
9
- from simplyblock_core.models.job_schedule import JobSchedule
10
-
11
-
12
- # Import the GELF logger
13
- from graypy import GELFUDPHandler
14
-
15
-
16
- def task_runner(task):
17
- task.status = JobSchedule.STATUS_RUNNING
18
- task.write_to_db(db_controller.kv_store)
19
- tasks_events.task_updated(task)
20
-
21
- time.sleep(30)
22
-
23
- task.function_result = "sleep 30"
24
- task.status = JobSchedule.STATUS_DONE
25
- task.write_to_db(db_controller.kv_store)
26
- tasks_events.task_updated(task)
27
-
28
- return True
29
-
30
-
31
- # configure logging
32
- logger_handler = logging.StreamHandler(stream=sys.stdout)
33
- logger_handler.setFormatter(logging.Formatter('%(asctime)s: %(levelname)s: %(message)s'))
34
- gelf_handler = GELFUDPHandler('0.0.0.0', constants.GELF_PORT)
35
- logger = logging.getLogger()
36
- logger.addHandler(gelf_handler)
37
- logger.addHandler(logger_handler)
38
- logger.setLevel(logging.DEBUG)
39
-
40
- # get DB controller
41
- db_controller = kv_store.DBController()
42
-
43
- logger.info("Starting Tasks runner...")
44
- while True:
45
- time.sleep(3)
46
- clusters = db_controller.get_clusters()
47
- if not clusters:
48
- logger.error("No clusters found!")
49
- else:
50
- for cl in clusters:
51
- tasks = db_controller.get_job_tasks(cl.get_id(), reverse=False)
52
- for task in tasks:
53
- delay_seconds = constants.TASK_EXEC_INTERVAL_SEC
54
- if task.function_name == JobSchedule.FN_DEV_MIG:
55
- while task.status != JobSchedule.STATUS_DONE:
56
- res = task_runner(task)
57
- if res:
58
- tasks_events.task_updated(task)
59
- else:
60
- time.sleep(delay_seconds)
61
- delay_seconds *= 2
File without changes
File without changes
File without changes