hybrid_platforms_conductor 32.3.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (244) hide show
  1. checksums.yaml +7 -0
  2. data/bin/check-node +24 -0
  3. data/bin/deploy +12 -0
  4. data/bin/dump_nodes_json +12 -0
  5. data/bin/free_ips +23 -0
  6. data/bin/free_veids +17 -0
  7. data/bin/get_impacted_nodes +43 -0
  8. data/bin/last_deploys +56 -0
  9. data/bin/nodes_to_deploy +104 -0
  10. data/bin/report +10 -0
  11. data/bin/run +39 -0
  12. data/bin/setup +11 -0
  13. data/bin/ssh_config +14 -0
  14. data/bin/test +13 -0
  15. data/bin/topograph +54 -0
  16. data/lib/hybrid_platforms_conductor/action.rb +82 -0
  17. data/lib/hybrid_platforms_conductor/actions_executor.rb +307 -0
  18. data/lib/hybrid_platforms_conductor/bitbucket.rb +123 -0
  19. data/lib/hybrid_platforms_conductor/cmd_runner.rb +188 -0
  20. data/lib/hybrid_platforms_conductor/cmdb.rb +34 -0
  21. data/lib/hybrid_platforms_conductor/common_config_dsl/bitbucket.rb +78 -0
  22. data/lib/hybrid_platforms_conductor/common_config_dsl/confluence.rb +43 -0
  23. data/lib/hybrid_platforms_conductor/common_config_dsl/file_system_tests.rb +110 -0
  24. data/lib/hybrid_platforms_conductor/common_config_dsl/idempotence_tests.rb +38 -0
  25. data/lib/hybrid_platforms_conductor/config.rb +263 -0
  26. data/lib/hybrid_platforms_conductor/confluence.rb +119 -0
  27. data/lib/hybrid_platforms_conductor/connector.rb +84 -0
  28. data/lib/hybrid_platforms_conductor/credentials.rb +127 -0
  29. data/lib/hybrid_platforms_conductor/current_dir_monitor.rb +42 -0
  30. data/lib/hybrid_platforms_conductor/deployer.rb +598 -0
  31. data/lib/hybrid_platforms_conductor/executable.rb +145 -0
  32. data/lib/hybrid_platforms_conductor/hpc_plugins/action/bash.rb +44 -0
  33. data/lib/hybrid_platforms_conductor/hpc_plugins/action/interactive.rb +44 -0
  34. data/lib/hybrid_platforms_conductor/hpc_plugins/action/my_action.rb.sample +79 -0
  35. data/lib/hybrid_platforms_conductor/hpc_plugins/action/remote_bash.rb +63 -0
  36. data/lib/hybrid_platforms_conductor/hpc_plugins/action/ruby.rb +69 -0
  37. data/lib/hybrid_platforms_conductor/hpc_plugins/action/scp.rb +61 -0
  38. data/lib/hybrid_platforms_conductor/hpc_plugins/cmdb/config.rb +78 -0
  39. data/lib/hybrid_platforms_conductor/hpc_plugins/cmdb/host_ip.rb +104 -0
  40. data/lib/hybrid_platforms_conductor/hpc_plugins/cmdb/host_keys.rb +114 -0
  41. data/lib/hybrid_platforms_conductor/hpc_plugins/cmdb/my_cmdb.rb.sample +129 -0
  42. data/lib/hybrid_platforms_conductor/hpc_plugins/cmdb/platform_handlers.rb +66 -0
  43. data/lib/hybrid_platforms_conductor/hpc_plugins/connector/my_connector.rb.sample +156 -0
  44. data/lib/hybrid_platforms_conductor/hpc_plugins/connector/ssh.rb +702 -0
  45. data/lib/hybrid_platforms_conductor/hpc_plugins/platform_handler/platform_handler_plugin.rb.sample +292 -0
  46. data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/docker.rb +148 -0
  47. data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/my_provisioner.rb.sample +103 -0
  48. data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/podman.rb +125 -0
  49. data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/proxmox.rb +522 -0
  50. data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/proxmox/proxmox_waiter.rb +707 -0
  51. data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/proxmox/reserve_proxmox_container +122 -0
  52. data/lib/hybrid_platforms_conductor/hpc_plugins/report/confluence.rb +69 -0
  53. data/lib/hybrid_platforms_conductor/hpc_plugins/report/mediawiki.rb +164 -0
  54. data/lib/hybrid_platforms_conductor/hpc_plugins/report/my_report_plugin.rb.sample +88 -0
  55. data/lib/hybrid_platforms_conductor/hpc_plugins/report/stdout.rb +61 -0
  56. data/lib/hybrid_platforms_conductor/hpc_plugins/report/templates/confluence_inventory.html.erb +33 -0
  57. data/lib/hybrid_platforms_conductor/hpc_plugins/test/bitbucket_conf.rb +137 -0
  58. data/lib/hybrid_platforms_conductor/hpc_plugins/test/can_be_checked.rb +21 -0
  59. data/lib/hybrid_platforms_conductor/hpc_plugins/test/check_deploy_and_idempotence.rb +112 -0
  60. data/lib/hybrid_platforms_conductor/hpc_plugins/test/check_from_scratch.rb +35 -0
  61. data/lib/hybrid_platforms_conductor/hpc_plugins/test/connection.rb +28 -0
  62. data/lib/hybrid_platforms_conductor/hpc_plugins/test/deploy_freshness.rb +44 -0
  63. data/lib/hybrid_platforms_conductor/hpc_plugins/test/deploy_from_scratch.rb +36 -0
  64. data/lib/hybrid_platforms_conductor/hpc_plugins/test/deploy_removes_root_access.rb +49 -0
  65. data/lib/hybrid_platforms_conductor/hpc_plugins/test/divergence.rb +25 -0
  66. data/lib/hybrid_platforms_conductor/hpc_plugins/test/executables.rb +46 -0
  67. data/lib/hybrid_platforms_conductor/hpc_plugins/test/file_system.rb +45 -0
  68. data/lib/hybrid_platforms_conductor/hpc_plugins/test/file_system_hdfs.rb +45 -0
  69. data/lib/hybrid_platforms_conductor/hpc_plugins/test/hostname.rb +25 -0
  70. data/lib/hybrid_platforms_conductor/hpc_plugins/test/idempotence.rb +77 -0
  71. data/lib/hybrid_platforms_conductor/hpc_plugins/test/ip.rb +38 -0
  72. data/lib/hybrid_platforms_conductor/hpc_plugins/test/jenkins_ci_conf.rb +56 -0
  73. data/lib/hybrid_platforms_conductor/hpc_plugins/test/jenkins_ci_masters_ok.rb +54 -0
  74. data/lib/hybrid_platforms_conductor/hpc_plugins/test/linear_strategy.rb +47 -0
  75. data/lib/hybrid_platforms_conductor/hpc_plugins/test/local_users.rb +82 -0
  76. data/lib/hybrid_platforms_conductor/hpc_plugins/test/mounts.rb +120 -0
  77. data/lib/hybrid_platforms_conductor/hpc_plugins/test/my_test_plugin.rb.sample +143 -0
  78. data/lib/hybrid_platforms_conductor/hpc_plugins/test/orphan_files.rb +74 -0
  79. data/lib/hybrid_platforms_conductor/hpc_plugins/test/ports.rb +85 -0
  80. data/lib/hybrid_platforms_conductor/hpc_plugins/test/private_ips.rb +38 -0
  81. data/lib/hybrid_platforms_conductor/hpc_plugins/test/public_ips.rb +38 -0
  82. data/lib/hybrid_platforms_conductor/hpc_plugins/test/spectre-meltdown-checker.sh +1930 -0
  83. data/lib/hybrid_platforms_conductor/hpc_plugins/test/spectre.rb +56 -0
  84. data/lib/hybrid_platforms_conductor/hpc_plugins/test/veids.rb +31 -0
  85. data/lib/hybrid_platforms_conductor/hpc_plugins/test/vulnerabilities.rb +159 -0
  86. data/lib/hybrid_platforms_conductor/hpc_plugins/test_report/confluence.rb +122 -0
  87. data/lib/hybrid_platforms_conductor/hpc_plugins/test_report/my_test_report.rb.sample +48 -0
  88. data/lib/hybrid_platforms_conductor/hpc_plugins/test_report/stdout.rb +120 -0
  89. data/lib/hybrid_platforms_conductor/hpc_plugins/test_report/templates/_confluence_errors_status.html.erb +46 -0
  90. data/lib/hybrid_platforms_conductor/hpc_plugins/test_report/templates/_confluence_gauge.html.erb +49 -0
  91. data/lib/hybrid_platforms_conductor/hpc_plugins/test_report/templates/confluence.html.erb +242 -0
  92. data/lib/hybrid_platforms_conductor/io_router.rb +70 -0
  93. data/lib/hybrid_platforms_conductor/json_dumper.rb +88 -0
  94. data/lib/hybrid_platforms_conductor/logger_helpers.rb +319 -0
  95. data/lib/hybrid_platforms_conductor/mutex_dir +76 -0
  96. data/lib/hybrid_platforms_conductor/nodes_handler.rb +597 -0
  97. data/lib/hybrid_platforms_conductor/parallel_threads.rb +97 -0
  98. data/lib/hybrid_platforms_conductor/platform_handler.rb +188 -0
  99. data/lib/hybrid_platforms_conductor/platforms_handler.rb +118 -0
  100. data/lib/hybrid_platforms_conductor/plugin.rb +53 -0
  101. data/lib/hybrid_platforms_conductor/plugins.rb +101 -0
  102. data/lib/hybrid_platforms_conductor/provisioner.rb +181 -0
  103. data/lib/hybrid_platforms_conductor/report.rb +31 -0
  104. data/lib/hybrid_platforms_conductor/reports_handler.rb +84 -0
  105. data/lib/hybrid_platforms_conductor/services_handler.rb +274 -0
  106. data/lib/hybrid_platforms_conductor/test.rb +141 -0
  107. data/lib/hybrid_platforms_conductor/test_by_service.rb +22 -0
  108. data/lib/hybrid_platforms_conductor/test_report.rb +282 -0
  109. data/lib/hybrid_platforms_conductor/tests_runner.rb +590 -0
  110. data/lib/hybrid_platforms_conductor/thycotic.rb +92 -0
  111. data/lib/hybrid_platforms_conductor/topographer.rb +859 -0
  112. data/lib/hybrid_platforms_conductor/topographer/plugin.rb +20 -0
  113. data/lib/hybrid_platforms_conductor/topographer/plugins/graphviz.rb +127 -0
  114. data/lib/hybrid_platforms_conductor/topographer/plugins/json.rb +72 -0
  115. data/lib/hybrid_platforms_conductor/topographer/plugins/my_topographer_output_plugin.rb.sample +37 -0
  116. data/lib/hybrid_platforms_conductor/topographer/plugins/svg.rb +30 -0
  117. data/lib/hybrid_platforms_conductor/version.rb +5 -0
  118. data/spec/hybrid_platforms_conductor_test.rb +159 -0
  119. data/spec/hybrid_platforms_conductor_test/api/actions_executor/actions/bash_spec.rb +43 -0
  120. data/spec/hybrid_platforms_conductor_test/api/actions_executor/actions/interactive_spec.rb +18 -0
  121. data/spec/hybrid_platforms_conductor_test/api/actions_executor/actions/remote_bash_spec.rb +102 -0
  122. data/spec/hybrid_platforms_conductor_test/api/actions_executor/actions/ruby_spec.rb +108 -0
  123. data/spec/hybrid_platforms_conductor_test/api/actions_executor/actions/scp_spec.rb +79 -0
  124. data/spec/hybrid_platforms_conductor_test/api/actions_executor/actions_spec.rb +199 -0
  125. data/spec/hybrid_platforms_conductor_test/api/actions_executor/connection_spec.rb +212 -0
  126. data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/cli_options_spec.rb +125 -0
  127. data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/config_dsl_spec.rb +50 -0
  128. data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/connectable_nodes_spec.rb +28 -0
  129. data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/connections_spec.rb +448 -0
  130. data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/global_helpers_spec.rb +313 -0
  131. data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/node_helpers_spec.rb +32 -0
  132. data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/remote_actions_spec.rb +134 -0
  133. data/spec/hybrid_platforms_conductor_test/api/actions_executor/logging_spec.rb +256 -0
  134. data/spec/hybrid_platforms_conductor_test/api/actions_executor/parallel_spec.rb +338 -0
  135. data/spec/hybrid_platforms_conductor_test/api/actions_executor/timeout_spec.rb +101 -0
  136. data/spec/hybrid_platforms_conductor_test/api/cmd_runner_spec.rb +165 -0
  137. data/spec/hybrid_platforms_conductor_test/api/config_spec.rb +238 -0
  138. data/spec/hybrid_platforms_conductor_test/api/deployer/check_spec.rb +9 -0
  139. data/spec/hybrid_platforms_conductor_test/api/deployer/deploy_spec.rb +243 -0
  140. data/spec/hybrid_platforms_conductor_test/api/deployer/parse_deploy_output_spec.rb +104 -0
  141. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioner_spec.rb +131 -0
  142. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/docker/Dockerfile +10 -0
  143. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/docker_spec.rb +123 -0
  144. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/podman_spec.rb +211 -0
  145. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/config_dsl_spec.rb +126 -0
  146. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/create_spec.rb +290 -0
  147. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/destroy_spec.rb +43 -0
  148. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/ip_spec.rb +60 -0
  149. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/proxmox.json +3 -0
  150. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/destroy_vm_spec.rb +82 -0
  151. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/expired_containers_spec.rb +786 -0
  152. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/ips_assignment_spec.rb +112 -0
  153. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/other_lxc_containers_resources_spec.rb +190 -0
  154. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/pve_node_resources_spec.rb +200 -0
  155. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/retries_spec.rb +35 -0
  156. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/vm_ids_assignment_spec.rb +67 -0
  157. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/start_spec.rb +79 -0
  158. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/state_spec.rb +28 -0
  159. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/stop_spec.rb +41 -0
  160. data/spec/hybrid_platforms_conductor_test/api/nodes_handler/cmdbs/config_spec.rb +33 -0
  161. data/spec/hybrid_platforms_conductor_test/api/nodes_handler/cmdbs/host_ip_spec.rb +64 -0
  162. data/spec/hybrid_platforms_conductor_test/api/nodes_handler/cmdbs/host_keys_spec.rb +133 -0
  163. data/spec/hybrid_platforms_conductor_test/api/nodes_handler/cmdbs/platform_handlers_spec.rb +19 -0
  164. data/spec/hybrid_platforms_conductor_test/api/nodes_handler/cmdbs_plugins_api_spec.rb +446 -0
  165. data/spec/hybrid_platforms_conductor_test/api/nodes_handler/common_spec.rb +127 -0
  166. data/spec/hybrid_platforms_conductor_test/api/nodes_handler/git_diff_impacts_spec.rb +318 -0
  167. data/spec/hybrid_platforms_conductor_test/api/nodes_handler/nodes_selectors_spec.rb +132 -0
  168. data/spec/hybrid_platforms_conductor_test/api/nodes_handler/platform_handlers_plugins_api_spec.rb +60 -0
  169. data/spec/hybrid_platforms_conductor_test/api/nodes_handler/several_platforms_spec.rb +58 -0
  170. data/spec/hybrid_platforms_conductor_test/api/platform_handler_spec.rb +97 -0
  171. data/spec/hybrid_platforms_conductor_test/api/platforms_handler_spec.rb +104 -0
  172. data/spec/hybrid_platforms_conductor_test/api/plugins_spec.rb +243 -0
  173. data/spec/hybrid_platforms_conductor_test/api/reports_handler_spec.rb +44 -0
  174. data/spec/hybrid_platforms_conductor_test/api/services_handler/actions_to_deploy_spec.rb +121 -0
  175. data/spec/hybrid_platforms_conductor_test/api/services_handler/deploy_allowed_spec.rb +142 -0
  176. data/spec/hybrid_platforms_conductor_test/api/services_handler/log_info_spec.rb +101 -0
  177. data/spec/hybrid_platforms_conductor_test/api/services_handler/package_spec.rb +388 -0
  178. data/spec/hybrid_platforms_conductor_test/api/services_handler/parse_deploy_output_spec.rb +274 -0
  179. data/spec/hybrid_platforms_conductor_test/api/services_handler/prepare_for_deploy_spec.rb +264 -0
  180. data/spec/hybrid_platforms_conductor_test/api/tests_runner/common_spec.rb +194 -0
  181. data/spec/hybrid_platforms_conductor_test/api/tests_runner/global_spec.rb +37 -0
  182. data/spec/hybrid_platforms_conductor_test/api/tests_runner/node_check_spec.rb +194 -0
  183. data/spec/hybrid_platforms_conductor_test/api/tests_runner/node_spec.rb +137 -0
  184. data/spec/hybrid_platforms_conductor_test/api/tests_runner/node_ssh_spec.rb +257 -0
  185. data/spec/hybrid_platforms_conductor_test/api/tests_runner/platform_spec.rb +110 -0
  186. data/spec/hybrid_platforms_conductor_test/api/tests_runner/reports_spec.rb +367 -0
  187. data/spec/hybrid_platforms_conductor_test/api/tests_runner/test_plugins/bitbucket_conf_spec.rb +111 -0
  188. data/spec/hybrid_platforms_conductor_test/api/tests_runner/test_reports_plugins/confluence_spec.rb +29 -0
  189. data/spec/hybrid_platforms_conductor_test/cmdb_plugins/test_cmdb.rb +166 -0
  190. data/spec/hybrid_platforms_conductor_test/cmdb_plugins/test_cmdb2.rb +93 -0
  191. data/spec/hybrid_platforms_conductor_test/cmdb_plugins/test_cmdb_others.rb +60 -0
  192. data/spec/hybrid_platforms_conductor_test/cmdb_plugins/test_cmdb_others2.rb +58 -0
  193. data/spec/hybrid_platforms_conductor_test/executables/check-node_spec.rb +35 -0
  194. data/spec/hybrid_platforms_conductor_test/executables/deploy_spec.rb +35 -0
  195. data/spec/hybrid_platforms_conductor_test/executables/get_impacted_nodes_spec.rb +158 -0
  196. data/spec/hybrid_platforms_conductor_test/executables/last_deploys_spec.rb +173 -0
  197. data/spec/hybrid_platforms_conductor_test/executables/nodes_to_deploy_spec.rb +283 -0
  198. data/spec/hybrid_platforms_conductor_test/executables/options/actions_executor_spec.rb +28 -0
  199. data/spec/hybrid_platforms_conductor_test/executables/options/cmd_runner_spec.rb +28 -0
  200. data/spec/hybrid_platforms_conductor_test/executables/options/common_spec.rb +67 -0
  201. data/spec/hybrid_platforms_conductor_test/executables/options/deployer_spec.rb +251 -0
  202. data/spec/hybrid_platforms_conductor_test/executables/options/nodes_handler_spec.rb +111 -0
  203. data/spec/hybrid_platforms_conductor_test/executables/options/nodes_selectors_spec.rb +71 -0
  204. data/spec/hybrid_platforms_conductor_test/executables/options/reports_handler_spec.rb +54 -0
  205. data/spec/hybrid_platforms_conductor_test/executables/options/tests_runner_spec.rb +139 -0
  206. data/spec/hybrid_platforms_conductor_test/executables/report_spec.rb +60 -0
  207. data/spec/hybrid_platforms_conductor_test/executables/run_spec.rb +173 -0
  208. data/spec/hybrid_platforms_conductor_test/executables/ssh_config_spec.rb +35 -0
  209. data/spec/hybrid_platforms_conductor_test/executables/test_spec.rb +41 -0
  210. data/spec/hybrid_platforms_conductor_test/helpers/actions_executor_helpers.rb +98 -0
  211. data/spec/hybrid_platforms_conductor_test/helpers/cmd_runner_helpers.rb +92 -0
  212. data/spec/hybrid_platforms_conductor_test/helpers/cmdb_helpers.rb +37 -0
  213. data/spec/hybrid_platforms_conductor_test/helpers/config_helpers.rb +20 -0
  214. data/spec/hybrid_platforms_conductor_test/helpers/connector_ssh_helpers.rb +130 -0
  215. data/spec/hybrid_platforms_conductor_test/helpers/deployer_helpers.rb +149 -0
  216. data/spec/hybrid_platforms_conductor_test/helpers/deployer_test_helpers.rb +812 -0
  217. data/spec/hybrid_platforms_conductor_test/helpers/executables_helpers.rb +96 -0
  218. data/spec/hybrid_platforms_conductor_test/helpers/nodes_handler_helpers.rb +20 -0
  219. data/spec/hybrid_platforms_conductor_test/helpers/platform_handler_helpers.rb +35 -0
  220. data/spec/hybrid_platforms_conductor_test/helpers/platforms_handler_helpers.rb +127 -0
  221. data/spec/hybrid_platforms_conductor_test/helpers/plugins_helpers.rb +48 -0
  222. data/spec/hybrid_platforms_conductor_test/helpers/provisioner_proxmox_helpers.rb +789 -0
  223. data/spec/hybrid_platforms_conductor_test/helpers/reports_handler_helpers.rb +29 -0
  224. data/spec/hybrid_platforms_conductor_test/helpers/services_handler_helpers.rb +20 -0
  225. data/spec/hybrid_platforms_conductor_test/helpers/tests_runner_helpers.rb +38 -0
  226. data/spec/hybrid_platforms_conductor_test/mocked_lib/my_test_gem/hpc_plugins/test_plugin_type/test_plugin_id1.rb +22 -0
  227. data/spec/hybrid_platforms_conductor_test/mocked_lib/my_test_gem/hpc_plugins/test_plugin_type/test_plugin_id2.rb +22 -0
  228. data/spec/hybrid_platforms_conductor_test/mocked_lib/my_test_gem2/sub_dir/hpc_plugins/test_plugin_type/test_plugin_id3.rb +26 -0
  229. data/spec/hybrid_platforms_conductor_test/mocked_lib/my_test_gem2/sub_dir/hpc_plugins/test_plugin_type2/test_plugin_id4.rb +26 -0
  230. data/spec/hybrid_platforms_conductor_test/platform_handler_plugins/test.rb +225 -0
  231. data/spec/hybrid_platforms_conductor_test/platform_handler_plugins/test2.rb +11 -0
  232. data/spec/hybrid_platforms_conductor_test/report_plugin.rb +35 -0
  233. data/spec/hybrid_platforms_conductor_test/test_action.rb +66 -0
  234. data/spec/hybrid_platforms_conductor_test/test_connector.rb +151 -0
  235. data/spec/hybrid_platforms_conductor_test/test_plugins/global.rb +30 -0
  236. data/spec/hybrid_platforms_conductor_test/test_plugins/node.rb +53 -0
  237. data/spec/hybrid_platforms_conductor_test/test_plugins/node_check.rb +47 -0
  238. data/spec/hybrid_platforms_conductor_test/test_plugins/node_ssh.rb +42 -0
  239. data/spec/hybrid_platforms_conductor_test/test_plugins/platform.rb +50 -0
  240. data/spec/hybrid_platforms_conductor_test/test_plugins/several_checks.rb +50 -0
  241. data/spec/hybrid_platforms_conductor_test/test_provisioner.rb +95 -0
  242. data/spec/hybrid_platforms_conductor_test/tests_report_plugin.rb +49 -0
  243. data/spec/spec_helper.rb +111 -0
  244. metadata +566 -0
@@ -0,0 +1,707 @@
1
+ # Require tmpdir before futex, as this Rubygem has a bug missing its require.
2
+ require 'tmpdir'
3
+ require 'futex'
4
+ require 'json'
5
+ require 'proxmox'
6
+ require 'time'
7
+
8
+ # Serve Proxmox reservation requests, like a waiter in a restaurant ;-)
9
+ # Multi-process safe.
10
+ class ProxmoxWaiter
11
+
12
+ # Integer: Timeout in seconds to get the futex
13
+ # Take into account that some processes can be lengthy while the futex is taken:
14
+ # * POST/DELETE operations in the Proxmox API requires tasks to be performed which can take a few seconds, depending on the load.
15
+ # * Proxmox API sometimes fails to respond when containers are being locked temporarily (we have a 30 secs timeout for each one).
16
+ FUTEX_TIMEOUT = 600
17
+
18
+ # Integer: Maximum timeout in seconds before retrying getting the Futex when we are not first in the queue (a rand will be applied to it)
19
+ RETRY_QUEUE_WAIT = 30
20
+
21
+ # Constructor
22
+ #
23
+ # Parameters::
24
+ # * *config_file* (String): Path to a JSON file containing a configuration for ProxmoxWaiter.
25
+ # Here is the file structure:
26
+ # * *proxmox_api_url* (String): Proxmox API URL.
27
+ # * *futex_file* (String): Path to the file serving as a futex.
28
+ # * *logs_dir* (String): Path to the directory containing logs [default: '.']
29
+ # * *pve_nodes* (Array<String>): List of PVE nodes allowed to spawn new containers [default: all]
30
+ # * *vm_ips_list* (Array<String>): The list of IPs that are available for the Proxomx containers.
31
+ # * *vm_ids_range* ([Integer, Integer]): Minimum and maximum reservable VM ID
32
+ # * *coeff_ram_consumption* (Integer): Importance coefficient to assign to the RAM consumption when selecting available PVE nodes
33
+ # * *coeff_disk_consumption* (Integer): Importance coefficient to assign to the disk consumption when selecting available PVE nodes
34
+ # * *expiration_period_secs* (Integer): Number of seconds defining the expiration period
35
+ # * *expire_stopped_vm_timeout_secs* (Integer): Number of seconds before defining stopped VMs as expired
36
+ # * *limits* (Hash): Limits to be taken into account while reserving resources. Each property is optional and no property means no limit.
37
+ # * *nbr_vms_max* (Integer): Max number of VMs we can reserve.
38
+ # * *cpu_loads_thresholds* ([Float, Float, Float]): CPU load thresholds from which a PVE node should not be used (as soon as 1 of the value is greater than 1 of those thresholds, discard the node).
39
+ # * *ram_percent_used_max* (Float): Max percentage (between 0 and 1) of RAM that can be reserved on a PVE node.
40
+ # * *disk_percent_used_max* (Float): Max percentage (between 0 and 1) of disk that can be reserved on a PVE node.
41
+ # * *proxmox_user* (String): Proxmox user to be used to connect to the API.
42
+ # * *proxmox_password* (String): Proxmox password to be used to connect to the API.
43
+ # * *proxmox_realm* (String): Proxmox realm to be used to connect to the API.
44
+ def initialize(config_file, proxmox_user, proxmox_password, proxmox_realm)
45
+ @config = JSON.parse(File.read(config_file))
46
+ @proxmox_user = proxmox_user
47
+ @proxmox_password = proxmox_password
48
+ @proxmox_realm = proxmox_realm
49
+ # Keep a memory of non-debug stopped containers, so that we can guess if they are expired or not after some time.
50
+ # Time when we noticed a given container is stopped, per creation date, per VM ID, per PVE node
51
+ # We add the creation date as a VM ID can be reused (with a different creation date) and we want to make sure we don't think a newly created VM is here for longer that it should.
52
+ # Hash< String, Hash< Integer, Hash< String, Time > > >
53
+ # Hash< pve_node, Hash< vm_id, Hash< creation_date, time_seen_as_stopped > > >
54
+ @non_debug_stopped_containers = {}
55
+ @log_file = "#{@config['logs_dir'] || '.'}/proxmox_waiter_#{Time.now.utc.strftime('%Y%m%d%H%M%S')}_pid_#{Process.pid}_#{File.basename(config_file, '.json')}.log"
56
+ FileUtils.mkdir_p File.dirname(@log_file)
57
+ end
58
+
59
+ # Reserve resources for a new container.
60
+ # Check resources availability.
61
+ #
62
+ # Parameters::
63
+ # * *vm_info* (Hash<String,Object>): The VM info to be created, using the same properties as LXC container creation through Proxmox API.
64
+ # Result::
65
+ # * Hash<Symbol, Object> or Symbol: Reserved resource info, or Symbol in case of error.
66
+ # The following properties are set as resource info:
67
+ # * *pve_node* (String): Node on which the container has been created.
68
+ # * *vm_id* (Integer): The VM ID
69
+ # * *vm_ip* (String): The VM IP
70
+ # Possible error codes returned are:
71
+ # * *not_enough_resources*: There is no available free resources to be reserved
72
+ # * *no_available_ip*: There is no available IP to be reserved
73
+ # * *no_available_vm_id*: There is no available VM ID to be reserved
74
+ # * *exceeded_number_of_vms*: There is already too many VMs running
75
+ def create(vm_info)
76
+ log "Ask to create #{vm_info}"
77
+ # Extract the required resources from the desired VM info
78
+ nbr_cpus = vm_info['cpulimit']
79
+ ram_mb = vm_info['memory']
80
+ disk_gb = Integer(vm_info['rootfs'].split(':').last)
81
+ reserved_resource = nil
82
+ start do
83
+ pve_node_scores = pve_scores_for(nbr_cpus, ram_mb, disk_gb)
84
+ # Check if we are not exceeding hard-limits:
85
+ # * the number of vms to be created
86
+ # * the free IPs
87
+ # * the free VM IDs
88
+ # In such case, even when free resources on PVE nodes are enough to host the new container, we still need to clean-up before.
89
+ nbr_vms = nbr_vms_handled_by_us
90
+ if nbr_vms >= @config['limits']['nbr_vms_max'] || free_ips.empty? || free_vm_ids.empty?
91
+ log 'Hitting at least 1 hard-limit. Check if we can destroy expired containers.'
92
+ log "[ Hard limit reached ] - Already #{nbr_vms} are created (max is #{@config['limits']['nbr_vms_max']})." if nbr_vms >= @config['limits']['nbr_vms_max']
93
+ log '[ Hard limit reached ] - No more available IPs.' if free_ips.empty?
94
+ log '[ Hard limit reached ] - No more available VM IDs.' if free_vm_ids.empty?
95
+ clean_up_done = false
96
+ # Check if we can remove some expired ones
97
+ @config['pve_nodes'].each do |pve_node|
98
+ if api_get("nodes/#{pve_node}/lxc").any? { |lxc_info| is_vm_expired?(pve_node, Integer(lxc_info['vmid'])) }
99
+ destroy_expired_vms_on(pve_node)
100
+ clean_up_done = true
101
+ end
102
+ end
103
+ if clean_up_done
104
+ nbr_vms = nbr_vms_handled_by_us
105
+ if nbr_vms >= @config['limits']['nbr_vms_max']
106
+ log "[ Hard limit reached ] - Still too many running VMs after clean-up: #{nbr_vms}."
107
+ reserved_resource = :exceeded_number_of_vms
108
+ elsif free_ips.empty?
109
+ log '[ Hard limit reached ] - Still no available IP'
110
+ reserved_resource = :no_available_ip
111
+ elsif free_vm_ids.empty?
112
+ log '[ Hard limit reached ] - Still no available VM ID'
113
+ reserved_resource = :no_available_vm_id
114
+ end
115
+ else
116
+ log 'Could not find any expired VM to destroy.'
117
+ # There was nothing to clean. So wait for other processes to destroy their containers.
118
+ reserved_resource =
119
+ if nbr_vms >= @config['limits']['nbr_vms_max']
120
+ :exceeded_number_of_vms
121
+ elsif free_ips.empty?
122
+ :no_available_ip
123
+ else
124
+ :no_available_vm_id
125
+ end
126
+ end
127
+ end
128
+ if reserved_resource.nil?
129
+ # Select the best node, first keeping expired VMs if possible.
130
+ # This is the index of the scores to be checked: if we can choose without recycling VMs, do it by considering score index 0.
131
+ score_idx =
132
+ if pve_node_scores.all? { |_pve_node, pve_node_scores| pve_node_scores[0].nil? }
133
+ # No node was available without removing expired VMs.
134
+ # Therefore we consider only scores without expired VMs.
135
+ log 'No PVE node has enough free resources without removing eventual expired VMs'
136
+ 1
137
+ else
138
+ 0
139
+ end
140
+ selected_pve_node, selected_pve_node_score = pve_node_scores.inject([nil, nil]) do |(best_pve_node, best_score), (pve_node, pve_node_scores)|
141
+ if pve_node_scores[score_idx].nil? ||
142
+ (!best_score.nil? && pve_node_scores[score_idx] >= best_score)
143
+ [best_pve_node, best_score]
144
+ else
145
+ [pve_node, pve_node_scores[score_idx]]
146
+ end
147
+ end
148
+ if selected_pve_node.nil?
149
+ # No PVE node can host our request.
150
+ log 'Could not find any PVE node with enough free resources'
151
+ reserved_resource = :not_enough_resources
152
+ else
153
+ log "[ #{selected_pve_node} ] - PVE node selected with score #{selected_pve_node_score}"
154
+ # We know on which PVE node we can instantiate our new container.
155
+ # We have to purge expired VMs on this PVE node before reserving a new creation.
156
+ destroy_expired_vms_on(selected_pve_node) if score_idx == 1
157
+ # Now select the correct VM ID and VM IP.
158
+ vm_id_or_error, ip = reserve_on(selected_pve_node, nbr_cpus, ram_mb, disk_gb)
159
+ if ip.nil?
160
+ # We have an error
161
+ reserved_resource = vm_id_or_error
162
+ else
163
+ # Create the container for real
164
+ completed_vm_info = vm_info.dup
165
+ completed_vm_info['vmid'] = vm_id_or_error
166
+ completed_vm_info['net0'] = "#{completed_vm_info['net0']},ip=#{ip}/32"
167
+ completed_vm_info['description'] = "#{completed_vm_info['description']}creation_date: #{Time.now.utc.strftime('%FT%T')}\n"
168
+ log "[ #{selected_pve_node}/#{vm_id_or_error} ] - Create LXC container"
169
+ wait_for_proxmox_task(selected_pve_node, @proxmox.post("nodes/#{selected_pve_node}/lxc", completed_vm_info))
170
+ reserved_resource = {
171
+ pve_node: selected_pve_node,
172
+ vm_id: vm_id_or_error,
173
+ vm_ip: ip
174
+ }
175
+ end
176
+ end
177
+ end
178
+ end
179
+ reserved_resource
180
+ end
181
+
182
+ # Destroy a VM.
183
+ #
184
+ # Parameters::
185
+ # * *vm_info* (Hash<String,Object>): The VM info to be destroyed:
186
+ # * *vm_id* (Integer): The VM ID
187
+ # * *node* (String): The node for which this VM has been created
188
+ # * *environment* (String): The environment for which this VM has been created
189
+ # Result::
190
+ # * Hash<Symbol, Object> or Symbol: Released resource info, or Symbol in case of error.
191
+ # The following properties are set as resource info:
192
+ # * *pve_node* (String): Node on which the container has been released (if found).
193
+ # Possible error codes returned are:
194
+ # None
195
+ def destroy(vm_info)
196
+ log "Ask to destroy #{vm_info}"
197
+ found_pve_node = nil
198
+ start do
199
+ vm_id_str = vm_info['vm_id'].to_s
200
+ # Destroy the VM ID
201
+ # Find which PVE node hosts this VM
202
+ unless @config['pve_nodes'].any? do |pve_node|
203
+ api_get("nodes/#{pve_node}/lxc").any? do |lxc_info|
204
+ if lxc_info['vmid'] == vm_id_str
205
+ # Make sure this VM is still used for the node and environment we want.
206
+ # It could have been deleted manually and re-affected to another node/environment automatically, and in this case we should not remove it.
207
+ metadata = vm_metadata(pve_node, vm_info['vm_id'])
208
+ if metadata[:node] == vm_info['node'] && metadata[:environment] == vm_info['environment']
209
+ destroy_vm_on(pve_node, vm_info['vm_id'])
210
+ found_pve_node = pve_node
211
+ true
212
+ else
213
+ log "[ #{pve_node}/#{vm_info['vm_id']} ] - This container is not hosting the node/environment to be destroyed: #{metadata[:node]}/#{metadata[:environment]} != #{vm_info['node']}/#{vm_info['environment']}"
214
+ false
215
+ end
216
+ else
217
+ false
218
+ end
219
+ end
220
+ end
221
+ log "Could not find any PVE node hosting VM #{vm_info['vm_id']}"
222
+ end
223
+ end
224
+ reserved_resource = {}
225
+ reserved_resource[:pve_node] = found_pve_node unless found_pve_node.nil?
226
+ reserved_resource
227
+ end
228
+
229
+ private
230
+
231
+ # Log a message to stdout and in the log file
232
+ #
233
+ # Parameters::
234
+ # * *msg* (String): Message to log
235
+ def log(msg)
236
+ puts msg
237
+ File.open(@log_file, 'a') { |f| f.puts "[ #{Time.now.utc.strftime('%F %T.%L')} ] - [ PID #{Process.pid} ] - #{msg}" }
238
+ end
239
+
240
+ # Get the access queue from a file.
241
+ # Handle the case of missing file.
242
+ #
243
+ # Parameters::
244
+ # * *queue_file* (String): The file holding the queue
245
+ # Result::
246
+ # * Array<Integer>: PIDs queue
247
+ def read_access_queue(queue_file)
248
+ (File.exist?(queue_file) ? File.read(queue_file).split("\n").map { |line| Integer(line) } : [])
249
+ end
250
+
251
+ # Write the access queue to a file.
252
+ #
253
+ # Parameters::
254
+ # * *queue_file* (String): The file holding the queue
255
+ # * *access_queue* (Array<Integer>): PIDs queue
256
+ def write_access_queue(queue_file, access_queue)
257
+ File.write(queue_file, access_queue.join("\n"))
258
+ end
259
+
260
+ # Get an exclusive (based on PID) access using a futex-protected queue
261
+ #
262
+ # Parameters::
263
+ # * *futex_file* (String): Name of the file to be used as a futex
264
+ # * Prox: Code called with access authorized
265
+ def with_futex_queue_access_on(futex_file)
266
+ pid = Process.pid
267
+ queue_futex_file = "#{futex_file}.queue"
268
+ # Register ourselves in the queue (at the end)
269
+ Futex.new(queue_futex_file, timeout: FUTEX_TIMEOUT).open do
270
+ access_queue = read_access_queue(queue_futex_file)
271
+ log "[ Futex queue ] - Register our PID in the queue: #{access_queue.join(', ')}"
272
+ write_access_queue(queue_futex_file, access_queue + [pid])
273
+ end
274
+ # Loop until we are first ones in the queue
275
+ retry_futex_queue = true
276
+ while retry_futex_queue
277
+ Futex.new(futex_file, timeout: FUTEX_TIMEOUT).open do
278
+ # Check if we are the first one in the queue
279
+ Futex.new(queue_futex_file, timeout: FUTEX_TIMEOUT).open do
280
+ access_queue = read_access_queue(queue_futex_file)
281
+ idx = access_queue.index(pid)
282
+ log "[ Futex queue ] - We are ##{idx} in the queue: #{access_queue.join(', ')}"
283
+ if idx.nil?
284
+ # We disappeared from the queue!
285
+ log '[ Futex queue ] - !!! Somebody removed use from the queue. Add our PID back.'
286
+ write_access_queue(queue_futex_file, access_queue + [pid])
287
+ elsif idx == 0
288
+ # Access granted
289
+ log '[ Futex queue ] - Exclusive access granted'
290
+ write_access_queue(queue_futex_file, access_queue[1..-1])
291
+ retry_futex_queue = false
292
+ else
293
+ # Just check that the first PID still exists, otherwise remove it from the queue.
294
+ # This way we avoid starvation in case of killed processes.
295
+ first_pid = access_queue.first
296
+ first_pid_exist =
297
+ begin
298
+ Process.getpgid(first_pid)
299
+ true
300
+ rescue Errno::ESRCH
301
+ false
302
+ end
303
+ unless first_pid_exist
304
+ log "[ Futex queue ] - !!! First PID #{first_pid} does not exist - remove it from the queue"
305
+ write_access_queue(queue_futex_file, access_queue[1..-1])
306
+ end
307
+ end
308
+ end
309
+ yield unless retry_futex_queue
310
+ end
311
+ sleep(rand(RETRY_QUEUE_WAIT) + 1) if retry_futex_queue
312
+ end
313
+ end
314
+
315
+ # Grab the lock to start a new atomic session.
316
+ # Make sure the lock is released at the end of the session.
317
+ #
318
+ # Parameters::
319
+ # * Proc: Client code with the session started.
320
+ # The following instance variables are set:
321
+ # * *@expiration_date* (Time): The expiration date to be considered when selecting expired VMs
322
+ # * *@proxmox* (Proxmox): The Proxmox instance
323
+ def start
324
+ with_futex_queue_access_on(@config['futex_file']) do
325
+ # Connect to Proxmox's API
326
+ @proxmox = Proxmox::Proxmox.new(
327
+ "#{@config['proxmox_api_url']}/api2/json/",
328
+ # Proxmox uses the hostname as the node name so make the default API node derived from the URL.
329
+ # cf https://pve.proxmox.com/wiki/Renaming_a_PVE_node
330
+ URI.parse(@config['proxmox_api_url']).host.downcase.split('.').first,
331
+ @proxmox_user,
332
+ @proxmox_password,
333
+ @proxmox_realm,
334
+ { verify_ssl: false }
335
+ )
336
+ # Cache of get queries to the API
337
+ @gets_cache = {}
338
+ # Check connectivity before going further
339
+ begin
340
+ nodes_info = api_get('nodes')
341
+ # Get the list of PVE nodes by default
342
+ @config['pve_nodes'] = nodes_info.map { |node_info| node_info['node'] } unless @config['pve_nodes']
343
+ rescue
344
+ raise "Unable to connect to Proxmox API #{@config['proxmox_api_url']} with user #{@proxmox_user}: #{$!}"
345
+ end
346
+ @expiration_date = Time.now.utc - @config['expiration_period_secs']
347
+ log "Consider expiration date #{@expiration_date.strftime('%F %T')}"
348
+ begin
349
+ yield
350
+ ensure
351
+ @expiration_date = nil
352
+ @proxmox = nil
353
+ end
354
+ end
355
+ end
356
+
357
+ # Compute scores if we were to allocate resources for each possible PVE node.
358
+ # Those scores can help in choosing the best PVE node to host those resources.
359
+ # The best score is the smallest one.
360
+ # The score is computed by simulating resources' consumptions on the node if our container was to be installed in this node.
361
+ # The score uses coefficients as to better weigh some criterias more than others (all configured in the config file).
362
+ # 2 scores are gathered: 1 with the current PVE node's VMs, and 1 with the node having expired VMs removed.
363
+ # If a score is nil, it means the node can't be used (for example when a hard limit has been hit).
364
+ # Prerequisites:
365
+ # * This method should be called in a #start block
366
+ #
367
+ # Parameters::
368
+ # * *nbr_cpus* (Integer): Wanted CPUs
369
+ # * *ram_mb* (Integer): Wanted MB of RAM
370
+ # * *disk_gb* (Integer): Wanted GB of disk
371
+ # Result::
372
+ # * Hash<String, [Float or nil, Float or nil]>: The set of 2 scores, per PVE node name
373
+ def pve_scores_for(nbr_cpus, ram_mb, disk_gb)
374
+ Hash[@config['pve_nodes'].map do |pve_node|
375
+ # Get some resource usages stats from the node directly
376
+ status_info = api_get("nodes/#{pve_node}/status")
377
+ load_average = status_info['loadavg'].map { |load_str| Float(load_str) }
378
+ log "[ #{pve_node} ] - Load average: #{load_average.join(', ')}"
379
+ [
380
+ pve_node,
381
+ # If CPU load is too high, don't select the node anyway.
382
+ if load_average.zip(@config['limits']['cpu_loads_thresholds']).all? { |load_current, load_limit| load_current <= load_limit }
383
+ storage_info = api_get("nodes/#{pve_node}/storage").find { |search_storage_info| search_storage_info['storage'] == 'local-lvm' }
384
+ disk_gb_total = storage_info['total'] / (1024 * 1024 * 1024)
385
+ ram_mb_total = status_info['memory']['total'] / (1024 * 1024)
386
+ # Used resources is the sum of the allocated resource for each VM in this PVE node.
387
+ # It is not forcefully the currently used resource.
388
+ # This way we are sure to keep the allocated resources intact for containers not handled by this script.
389
+ disk_gb_used = 0
390
+ ram_mb_used = 0
391
+ # Store the resources used by containers we can recycle in separate variables.
392
+ expired_disk_gb_used = 0
393
+ expired_ram_mb_used = 0
394
+ found_vm_ids = api_get("nodes/#{pve_node}/lxc").map do |lxc_info|
395
+ vm_id = Integer(lxc_info['vmid'])
396
+ # Some times the Proxmox API returns maxdisk as a String (but not always) even if it is documented as Integer here: https://pve.proxmox.com/pve-docs/api-viewer/#/nodes/{node}/lxc.
397
+ # TODO: Remove the Integer conversion when Proxmox API will be fixed.
398
+ lxc_disk_gb_used = Integer(lxc_info['maxdisk']) / (1024 * 1024 * 1024)
399
+ lxc_ram_mb_used = lxc_info['maxmem'] / (1024 * 1024)
400
+ if is_vm_expired?(pve_node, vm_id)
401
+ expired_disk_gb_used += lxc_disk_gb_used
402
+ expired_ram_mb_used += lxc_ram_mb_used
403
+ else
404
+ disk_gb_used += lxc_disk_gb_used
405
+ ram_mb_used += lxc_ram_mb_used
406
+ end
407
+ vm_id.to_s
408
+ end
409
+ log "[ #{pve_node} ] - RAM MB usage: #{ram_mb_used + expired_ram_mb_used} / #{ram_mb_total} (#{expired_ram_mb_used} MB from expired containers)"
410
+ log "[ #{pve_node} ] - Disk GB usage: #{disk_gb_used + expired_disk_gb_used} / #{disk_gb_total} (#{expired_disk_gb_used} GB from expired containers)"
411
+ # Evaluate the expected percentages of resources' usage if we were to add our new container to this PVE node.
412
+ expected_ram_percent_used = (ram_mb_used + expired_ram_mb_used + ram_mb).to_f / ram_mb_total
413
+ expected_disk_percent_used = (disk_gb_used + expired_disk_gb_used + disk_gb).to_f / disk_gb_total
414
+ expected_ram_percent_used_without_expired = (ram_mb_used + ram_mb).to_f / ram_mb_total
415
+ expected_disk_percent_used_without_expired = (disk_gb_used + disk_gb).to_f / disk_gb_total
416
+ # If we break the limits, don't select this node.
417
+ # Otherwise, store the scores, taking into account coefficients to then choose among possible PVE nodes.
418
+ [
419
+ if expected_ram_percent_used <= @config['limits']['ram_percent_used_max'] &&
420
+ expected_disk_percent_used <= @config['limits']['disk_percent_used_max']
421
+ expected_ram_percent_used * @config['coeff_ram_consumption'] + expected_disk_percent_used * @config['coeff_disk_consumption']
422
+ else
423
+ nil
424
+ end,
425
+ if expected_ram_percent_used_without_expired <= @config['limits']['ram_percent_used_max'] &&
426
+ expected_disk_percent_used_without_expired <= @config['limits']['disk_percent_used_max']
427
+ expected_ram_percent_used_without_expired * @config['coeff_ram_consumption'] + expected_disk_percent_used_without_expired * @config['coeff_disk_consumption']
428
+ else
429
+ nil
430
+ end
431
+ ]
432
+ else
433
+ # CPU load is too high. Don't select this node.
434
+ log "[ #{pve_node} ] - Load average is too high for this PVE node to be selected (thresholds: : #{@config['limits']['cpu_loads_thresholds'].join(', ')})"
435
+ [nil, nil]
436
+ end
437
+ ]
438
+ end]
439
+ end
440
+
441
+ # Is a given VM expired?
442
+ #
443
+ # Parameters::
444
+ # * *pve_node* (String): The PVE node hosting this VM
445
+ # * *vm_id* (Integer): The VM ID
446
+ # Result::
447
+ # * Boolean: Is the given VM expired?
448
+ def is_vm_expired?(pve_node, vm_id)
449
+ if vm_id.between?(*@config['vm_ids_range'])
450
+ # Get its reservation date from the notes
451
+ metadata = vm_metadata(pve_node, vm_id)
452
+ if metadata[:creation_date].nil? || Time.parse("#{metadata[:creation_date]} UTC") < @expiration_date
453
+ log "[ #{pve_node}/#{vm_id} ] - [ Expired ] - Creation date is #{metadata[:creation_date]}"
454
+ true
455
+ else
456
+ state = vm_state(pve_node, vm_id)
457
+ if state == 'running' || metadata[:debug] == 'true'
458
+ # Just in case it was previously remembered as a non-debug stopped container, clear it.
459
+ @non_debug_stopped_containers[pve_node].delete(vm_id) if @non_debug_stopped_containers.key?(pve_node)
460
+ log "[ #{pve_node}/#{vm_id} ] - State is #{state} and debug is #{metadata[:debug]}"
461
+ false
462
+ else
463
+ # Check if it is not a left-over from a crash.
464
+ # If it stays not running for long and is not meant for debug purposes, then it is also considered expired.
465
+ # For this, remember previously seen containers that were stopped
466
+ first_time_seen_as_stopped = @non_debug_stopped_containers.dig pve_node, vm_id, metadata[:creation_date]
467
+ if first_time_seen_as_stopped.nil?
468
+ # It is the first time we see it stopped.
469
+ # Remember it and consider it as non-expired.
470
+ @non_debug_stopped_containers[pve_node] = {} unless @non_debug_stopped_containers.key?(pve_node)
471
+ @non_debug_stopped_containers[pve_node][vm_id] = {} unless @non_debug_stopped_containers[pve_node].key?(vm_id)
472
+ @non_debug_stopped_containers[pve_node][vm_id][metadata[:creation_date]] = Time.now
473
+ log "[ #{pve_node}/#{vm_id} ] - Discovered non-debug container (created on #{metadata[:creation_date]}) as stopped"
474
+ false
475
+ elsif Time.now - first_time_seen_as_stopped >= @config['expire_stopped_vm_timeout_secs']
476
+ # If it is stopped from more than the timeout, then consider it expired
477
+ log "[ #{pve_node}/#{vm_id} ] - [ Expired ] - Non-debug container (created on #{metadata[:creation_date]}) is stopped since #{first_time_seen_as_stopped.strftime('%F %T')} (more than #{@config['expire_stopped_vm_timeout_secs']} seconds ago)"
478
+ true
479
+ else
480
+ log "[ #{pve_node}/#{vm_id} ] - Non-debug container (created on #{metadata[:creation_date]}) is stopped since #{first_time_seen_as_stopped.strftime('%F %T')} (less than #{@config['expire_stopped_vm_timeout_secs']} seconds ago)"
481
+ false
482
+ end
483
+ end
484
+ end
485
+ else
486
+ log "[ #{pve_node}/#{vm_id} ] - Container is not part of our VM ID range."
487
+ false
488
+ end
489
+ end
490
+
491
+ # Get the metadata we associate to VMs.
492
+ # It can be empty if no metadata found.
493
+ #
494
+ # Parameters::
495
+ # * *pve_node* (String): The PVE node hosting this VM
496
+ # * *vm_id* (Integer): The VM ID
497
+ # Result::
498
+ # * Hash<Symbol, String>: The metadata
499
+ def vm_metadata(pve_node, vm_id)
500
+ lxc_config = api_get("nodes/#{pve_node}/lxc/#{vm_id}/config")
501
+ vm_description_lines = (lxc_config['description'] || '').split("\n")
502
+ hpc_marker_idx = vm_description_lines.index('===== HPC info =====')
503
+ if hpc_marker_idx.nil?
504
+ {}
505
+ else
506
+ Hash[vm_description_lines[hpc_marker_idx + 1..-1].map do |line|
507
+ property, value = line.split(': ')
508
+ [property.to_sym, value]
509
+ end]
510
+ end
511
+ end
512
+
513
+ # Count the number of VMs handled by us currently existing.
514
+ #
515
+ # Result::
516
+ # * Integer: Number of VMs handled by us
517
+ def nbr_vms_handled_by_us
518
+ @config['pve_nodes'].map do |pve_node|
519
+ api_get("nodes/#{pve_node}/lxc").select { |lxc_info| Integer(lxc_info['vmid']).between?(*@config['vm_ids_range']) }.size
520
+ end.sum
521
+ end
522
+
523
+ # Reserve resources for a new container on a PVE node, and assign a new VM ID and IP to it.
524
+ # Prerequisites:
525
+ # * This method should be called in a #start block
526
+ #
527
+ # Parameters::
528
+ # * *pve_node* (String): Node on which we reserve the resources.
529
+ # * *nbr_cpus* (Integer): Wanted CPUs
530
+ # * *ram_mb* (Integer): Wanted MB of RAM
531
+ # * *disk_gb* (Integer): Wanted GB of disk
532
+ # Result::
533
+ # * [Integer, String] or Symbol: Reserved resource info ([vm_id, ip]), or Symbol in case of error.
534
+ # Possible error codes returned are:
535
+ # * *no_available_ip*: There is no available IP to be reserved
536
+ # * *no_available_vm_id*: There is no available VM ID to be reserved
537
+ def reserve_on(pve_node, nbr_cpus, ram_mb, disk_gb)
538
+ # We select a new VM ID and VM IP.
539
+ selected_vm_ip = free_ips.first
540
+ if selected_vm_ip.nil?
541
+ # No available IP for now.
542
+ :no_available_ip
543
+ else
544
+ selected_vm_id = free_vm_ids.first
545
+ if selected_vm_id.nil?
546
+ # No available ID for now.
547
+ :no_available_vm_id
548
+ else
549
+ # Success
550
+ log "[ #{pve_node}/#{selected_vm_id} ] - New LXC container reserved with IP #{selected_vm_ip}"
551
+ [selected_vm_id, selected_vm_ip]
552
+ end
553
+ end
554
+ end
555
+
556
+ # Destroy expired VMs on a PVE node.
557
+ # Only consider VMs that fall in the config VM ID range and are expired.
558
+ #
559
+ # Parameters::
560
+ # * *pve_node* (String): PVE node to delete expired VMs from.
561
+ def destroy_expired_vms_on(pve_node)
562
+ api_get("nodes/#{pve_node}/lxc").each do |lxc_info|
563
+ vm_id = Integer(lxc_info['vmid'])
564
+ destroy_vm_on(pve_node, vm_id) if is_vm_expired?(pve_node, vm_id)
565
+ end
566
+ # Invalidate the API cache for anything related to this PVE node
567
+ pve_node_paths_regexp = /^nodes\/#{Regexp.escape(pve_node)}\/.+$/
568
+ @gets_cache.delete_if { |path, _result| path =~ pve_node_paths_regexp }
569
+ end
570
+
571
+ # Destroy a VM on a PVE node.
572
+ # Stop it if needed before destroy.
573
+ #
574
+ # Parameters::
575
+ # * *pve_node* (String): PVE node hosting the VM
576
+ # * *vm_id* (Integer): The VM ID to destroy
577
+ def destroy_vm_on(pve_node, vm_id)
578
+ if vm_state(pve_node, vm_id) == 'running'
579
+ log "[ #{pve_node}/#{vm_id} ] - Stop LXC container"
580
+ wait_for_proxmox_task(pve_node, @proxmox.post("nodes/#{pve_node}/lxc/#{vm_id}/status/stop"))
581
+ end
582
+ log "[ #{pve_node}/#{vm_id} ] - Destroy LXC container"
583
+ wait_for_proxmox_task(pve_node, @proxmox.delete("nodes/#{pve_node}/lxc/#{vm_id}"))
584
+ end
585
+
586
+ # Return the list of available IPs
587
+ #
588
+ # Result::
589
+ # * Array<String>: List of available IPs
590
+ def free_ips
591
+ # Consider all nodes and all IPs to ensure we won't create any conflict, even outside our allowed range
592
+ @config['vm_ips_list'] -
593
+ api_get('nodes').map do |pve_node_info|
594
+ pve_node = pve_node_info['node']
595
+ api_get("nodes/#{pve_node}/lxc").map do |lxc_info|
596
+ ip_of(pve_node, Integer(lxc_info['vmid']))
597
+ end.compact
598
+ end.flatten
599
+ end
600
+
601
+ # Return the list of available VM IDs
602
+ #
603
+ # Result::
604
+ # * Array<Integer>: List of available VM IDs
605
+ def free_vm_ids
606
+ Range.new(*@config['vm_ids_range']).to_a -
607
+ api_get('nodes').map do |pve_node_info|
608
+ api_get("nodes/#{pve_node_info['node']}/lxc").map { |lxc_info| Integer(lxc_info['vmid']) }
609
+ end.flatten
610
+ end
611
+
612
+ # Wait for a given Proxmox task completion
613
+ #
614
+ # Parameters::
615
+ # * *pve_node* (String): The PVE node on which the task is run
616
+ # * *task* (String): The task ID
617
+ def wait_for_proxmox_task(pve_node, task)
618
+ raise "Invalid task: #{task}" if task[0..3] == 'NOK:'
619
+ while task_status(pve_node, task) == 'running'
620
+ log "[ #{pve_node} ] - Wait for Proxmox task #{task} to complete..."
621
+ sleep 1
622
+ end
623
+ log "[ #{pve_node} ] - Proxmox task #{task} completed."
624
+ end
625
+
626
+ # Get task status
627
+ #
628
+ # Parameters::
629
+ # * *pve_node* (String): Node on which the task status is to be queried
630
+ # * *task* (String): Task ID to query
631
+ # Result::
632
+ # * String: The task status
633
+ def task_status(pve_node, task)
634
+ status_info = @proxmox.get("nodes/#{pve_node}/tasks/#{task}/status")
635
+ "#{status_info['status']}#{status_info['exitstatus'] ? ":#{status_info['exitstatus']}" : ''}"
636
+ end
637
+
638
+ # Get a path from the API it returns its JSON result.
639
+ # Keep a cache of it, whose lifespan is this ProxmoxWaiter instance.
640
+ #
641
+ # Parameters::
642
+ # * *path* (String): API path to query
643
+ def api_get(path)
644
+ @gets_cache[path] = @proxmox.get(path) unless @gets_cache.key?(path)
645
+ @gets_cache[path]
646
+ end
647
+
648
+ # Get the state of a VM
649
+ #
650
+ # Parameters::
651
+ # * *pve_node* (String): The PVE node having the container
652
+ # * *vm_id* (Integer): The VM ID
653
+ # Result::
654
+ # * String: The state
655
+ def vm_state(pve_node, vm_id)
656
+ api_get("nodes/#{pve_node}/lxc/#{vm_id}/status/current")['status']
657
+ end
658
+
659
+ # Timeout in seconds before giving up on a lock
660
+ LOCK_TIMEOUT = 30
661
+
662
+ # Get the IP address of a given LXC container
663
+ #
664
+ # Parameters::
665
+ # * *pve_node* (String): The PVE node having the container
666
+ # * *vm_id* (Integer): The VM ID
667
+ # Result::
668
+ # * String or nil: The corresponding IP address, or nil if not found (could be that the container has disappeared, as this method is used also for containers not part of our sync node)
669
+ def ip_of(pve_node, vm_id)
670
+ ip_found = nil
671
+ config_path = "nodes/#{pve_node}/lxc/#{vm_id}/config"
672
+ lxc_config = nil
673
+ begin_time = Time.now
674
+ loop do
675
+ lxc_config = api_get(config_path)
676
+ if lxc_config.is_a?(String)
677
+ log "[ #{pve_node}/#{vm_id} ] - Error while checking its config: #{lxc_config}. Might be that the VM has disappeared."
678
+ lxc_config = { 'lock' => "Error: #{lxc_config}" }
679
+ elsif lxc_config.key?('lock')
680
+ # The node is currently doing some task. Wait for the lock to be released.
681
+ log "[ #{pve_node}/#{vm_id} ] - Node is being locked (reason: #{lxc_config['lock']}). Wait for the lock to be released..."
682
+ sleep 1
683
+ else
684
+ break
685
+ end
686
+ # Make sure we don't cache the error or the lock
687
+ @gets_cache.delete(config_path)
688
+ if Time.now - begin_time > LOCK_TIMEOUT
689
+ log "[ #{pve_node}/#{vm_id} ] - !!! Timeout while waiting for to be unlocked (reason: #{lxc_config['lock']})."
690
+ break
691
+ end
692
+ end
693
+ if lxc_config['net0'].nil?
694
+ log "[ #{pve_node}/#{vm_id} ] - !!! Config does not contain net0 information: #{lxc_config}"
695
+ else
696
+ lxc_config['net0'].split(',').each do |net_info|
697
+ property, value = net_info.split('=')
698
+ if property == 'ip'
699
+ ip_found = value.split('/').first
700
+ break
701
+ end
702
+ end
703
+ end
704
+ ip_found
705
+ end
706
+
707
+ end