hybrid_platforms_conductor 32.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/check-node +24 -0
- data/bin/deploy +12 -0
- data/bin/dump_nodes_json +12 -0
- data/bin/free_ips +23 -0
- data/bin/free_veids +17 -0
- data/bin/get_impacted_nodes +43 -0
- data/bin/last_deploys +56 -0
- data/bin/nodes_to_deploy +104 -0
- data/bin/report +10 -0
- data/bin/run +39 -0
- data/bin/setup +11 -0
- data/bin/ssh_config +14 -0
- data/bin/test +13 -0
- data/bin/topograph +54 -0
- data/lib/hybrid_platforms_conductor/action.rb +82 -0
- data/lib/hybrid_platforms_conductor/actions_executor.rb +307 -0
- data/lib/hybrid_platforms_conductor/bitbucket.rb +123 -0
- data/lib/hybrid_platforms_conductor/cmd_runner.rb +188 -0
- data/lib/hybrid_platforms_conductor/cmdb.rb +34 -0
- data/lib/hybrid_platforms_conductor/common_config_dsl/bitbucket.rb +78 -0
- data/lib/hybrid_platforms_conductor/common_config_dsl/confluence.rb +43 -0
- data/lib/hybrid_platforms_conductor/common_config_dsl/file_system_tests.rb +110 -0
- data/lib/hybrid_platforms_conductor/common_config_dsl/idempotence_tests.rb +38 -0
- data/lib/hybrid_platforms_conductor/config.rb +263 -0
- data/lib/hybrid_platforms_conductor/confluence.rb +119 -0
- data/lib/hybrid_platforms_conductor/connector.rb +84 -0
- data/lib/hybrid_platforms_conductor/credentials.rb +127 -0
- data/lib/hybrid_platforms_conductor/current_dir_monitor.rb +42 -0
- data/lib/hybrid_platforms_conductor/deployer.rb +598 -0
- data/lib/hybrid_platforms_conductor/executable.rb +145 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/action/bash.rb +44 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/action/interactive.rb +44 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/action/my_action.rb.sample +79 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/action/remote_bash.rb +63 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/action/ruby.rb +69 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/action/scp.rb +61 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/cmdb/config.rb +78 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/cmdb/host_ip.rb +104 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/cmdb/host_keys.rb +114 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/cmdb/my_cmdb.rb.sample +129 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/cmdb/platform_handlers.rb +66 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/connector/my_connector.rb.sample +156 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/connector/ssh.rb +702 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/platform_handler/platform_handler_plugin.rb.sample +292 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/docker.rb +148 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/my_provisioner.rb.sample +103 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/podman.rb +125 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/proxmox.rb +522 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/proxmox/proxmox_waiter.rb +707 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/proxmox/reserve_proxmox_container +122 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/report/confluence.rb +69 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/report/mediawiki.rb +164 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/report/my_report_plugin.rb.sample +88 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/report/stdout.rb +61 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/report/templates/confluence_inventory.html.erb +33 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/bitbucket_conf.rb +137 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/can_be_checked.rb +21 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/check_deploy_and_idempotence.rb +112 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/check_from_scratch.rb +35 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/connection.rb +28 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/deploy_freshness.rb +44 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/deploy_from_scratch.rb +36 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/deploy_removes_root_access.rb +49 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/divergence.rb +25 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/executables.rb +46 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/file_system.rb +45 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/file_system_hdfs.rb +45 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/hostname.rb +25 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/idempotence.rb +77 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/ip.rb +38 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/jenkins_ci_conf.rb +56 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/jenkins_ci_masters_ok.rb +54 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/linear_strategy.rb +47 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/local_users.rb +82 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/mounts.rb +120 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/my_test_plugin.rb.sample +143 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/orphan_files.rb +74 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/ports.rb +85 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/private_ips.rb +38 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/public_ips.rb +38 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/spectre-meltdown-checker.sh +1930 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/spectre.rb +56 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/veids.rb +31 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/vulnerabilities.rb +159 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test_report/confluence.rb +122 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test_report/my_test_report.rb.sample +48 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test_report/stdout.rb +120 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test_report/templates/_confluence_errors_status.html.erb +46 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test_report/templates/_confluence_gauge.html.erb +49 -0
- data/lib/hybrid_platforms_conductor/hpc_plugins/test_report/templates/confluence.html.erb +242 -0
- data/lib/hybrid_platforms_conductor/io_router.rb +70 -0
- data/lib/hybrid_platforms_conductor/json_dumper.rb +88 -0
- data/lib/hybrid_platforms_conductor/logger_helpers.rb +319 -0
- data/lib/hybrid_platforms_conductor/mutex_dir +76 -0
- data/lib/hybrid_platforms_conductor/nodes_handler.rb +597 -0
- data/lib/hybrid_platforms_conductor/parallel_threads.rb +97 -0
- data/lib/hybrid_platforms_conductor/platform_handler.rb +188 -0
- data/lib/hybrid_platforms_conductor/platforms_handler.rb +118 -0
- data/lib/hybrid_platforms_conductor/plugin.rb +53 -0
- data/lib/hybrid_platforms_conductor/plugins.rb +101 -0
- data/lib/hybrid_platforms_conductor/provisioner.rb +181 -0
- data/lib/hybrid_platforms_conductor/report.rb +31 -0
- data/lib/hybrid_platforms_conductor/reports_handler.rb +84 -0
- data/lib/hybrid_platforms_conductor/services_handler.rb +274 -0
- data/lib/hybrid_platforms_conductor/test.rb +141 -0
- data/lib/hybrid_platforms_conductor/test_by_service.rb +22 -0
- data/lib/hybrid_platforms_conductor/test_report.rb +282 -0
- data/lib/hybrid_platforms_conductor/tests_runner.rb +590 -0
- data/lib/hybrid_platforms_conductor/thycotic.rb +92 -0
- data/lib/hybrid_platforms_conductor/topographer.rb +859 -0
- data/lib/hybrid_platforms_conductor/topographer/plugin.rb +20 -0
- data/lib/hybrid_platforms_conductor/topographer/plugins/graphviz.rb +127 -0
- data/lib/hybrid_platforms_conductor/topographer/plugins/json.rb +72 -0
- data/lib/hybrid_platforms_conductor/topographer/plugins/my_topographer_output_plugin.rb.sample +37 -0
- data/lib/hybrid_platforms_conductor/topographer/plugins/svg.rb +30 -0
- data/lib/hybrid_platforms_conductor/version.rb +5 -0
- data/spec/hybrid_platforms_conductor_test.rb +159 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/actions/bash_spec.rb +43 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/actions/interactive_spec.rb +18 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/actions/remote_bash_spec.rb +102 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/actions/ruby_spec.rb +108 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/actions/scp_spec.rb +79 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/actions_spec.rb +199 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/connection_spec.rb +212 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/cli_options_spec.rb +125 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/config_dsl_spec.rb +50 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/connectable_nodes_spec.rb +28 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/connections_spec.rb +448 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/global_helpers_spec.rb +313 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/node_helpers_spec.rb +32 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/remote_actions_spec.rb +134 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/logging_spec.rb +256 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/parallel_spec.rb +338 -0
- data/spec/hybrid_platforms_conductor_test/api/actions_executor/timeout_spec.rb +101 -0
- data/spec/hybrid_platforms_conductor_test/api/cmd_runner_spec.rb +165 -0
- data/spec/hybrid_platforms_conductor_test/api/config_spec.rb +238 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/check_spec.rb +9 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/deploy_spec.rb +243 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/parse_deploy_output_spec.rb +104 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioner_spec.rb +131 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/docker/Dockerfile +10 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/docker_spec.rb +123 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/podman_spec.rb +211 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/config_dsl_spec.rb +126 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/create_spec.rb +290 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/destroy_spec.rb +43 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/ip_spec.rb +60 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/proxmox.json +3 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/destroy_vm_spec.rb +82 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/expired_containers_spec.rb +786 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/ips_assignment_spec.rb +112 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/other_lxc_containers_resources_spec.rb +190 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/pve_node_resources_spec.rb +200 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/retries_spec.rb +35 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/vm_ids_assignment_spec.rb +67 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/start_spec.rb +79 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/state_spec.rb +28 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/stop_spec.rb +41 -0
- data/spec/hybrid_platforms_conductor_test/api/nodes_handler/cmdbs/config_spec.rb +33 -0
- data/spec/hybrid_platforms_conductor_test/api/nodes_handler/cmdbs/host_ip_spec.rb +64 -0
- data/spec/hybrid_platforms_conductor_test/api/nodes_handler/cmdbs/host_keys_spec.rb +133 -0
- data/spec/hybrid_platforms_conductor_test/api/nodes_handler/cmdbs/platform_handlers_spec.rb +19 -0
- data/spec/hybrid_platforms_conductor_test/api/nodes_handler/cmdbs_plugins_api_spec.rb +446 -0
- data/spec/hybrid_platforms_conductor_test/api/nodes_handler/common_spec.rb +127 -0
- data/spec/hybrid_platforms_conductor_test/api/nodes_handler/git_diff_impacts_spec.rb +318 -0
- data/spec/hybrid_platforms_conductor_test/api/nodes_handler/nodes_selectors_spec.rb +132 -0
- data/spec/hybrid_platforms_conductor_test/api/nodes_handler/platform_handlers_plugins_api_spec.rb +60 -0
- data/spec/hybrid_platforms_conductor_test/api/nodes_handler/several_platforms_spec.rb +58 -0
- data/spec/hybrid_platforms_conductor_test/api/platform_handler_spec.rb +97 -0
- data/spec/hybrid_platforms_conductor_test/api/platforms_handler_spec.rb +104 -0
- data/spec/hybrid_platforms_conductor_test/api/plugins_spec.rb +243 -0
- data/spec/hybrid_platforms_conductor_test/api/reports_handler_spec.rb +44 -0
- data/spec/hybrid_platforms_conductor_test/api/services_handler/actions_to_deploy_spec.rb +121 -0
- data/spec/hybrid_platforms_conductor_test/api/services_handler/deploy_allowed_spec.rb +142 -0
- data/spec/hybrid_platforms_conductor_test/api/services_handler/log_info_spec.rb +101 -0
- data/spec/hybrid_platforms_conductor_test/api/services_handler/package_spec.rb +388 -0
- data/spec/hybrid_platforms_conductor_test/api/services_handler/parse_deploy_output_spec.rb +274 -0
- data/spec/hybrid_platforms_conductor_test/api/services_handler/prepare_for_deploy_spec.rb +264 -0
- data/spec/hybrid_platforms_conductor_test/api/tests_runner/common_spec.rb +194 -0
- data/spec/hybrid_platforms_conductor_test/api/tests_runner/global_spec.rb +37 -0
- data/spec/hybrid_platforms_conductor_test/api/tests_runner/node_check_spec.rb +194 -0
- data/spec/hybrid_platforms_conductor_test/api/tests_runner/node_spec.rb +137 -0
- data/spec/hybrid_platforms_conductor_test/api/tests_runner/node_ssh_spec.rb +257 -0
- data/spec/hybrid_platforms_conductor_test/api/tests_runner/platform_spec.rb +110 -0
- data/spec/hybrid_platforms_conductor_test/api/tests_runner/reports_spec.rb +367 -0
- data/spec/hybrid_platforms_conductor_test/api/tests_runner/test_plugins/bitbucket_conf_spec.rb +111 -0
- data/spec/hybrid_platforms_conductor_test/api/tests_runner/test_reports_plugins/confluence_spec.rb +29 -0
- data/spec/hybrid_platforms_conductor_test/cmdb_plugins/test_cmdb.rb +166 -0
- data/spec/hybrid_platforms_conductor_test/cmdb_plugins/test_cmdb2.rb +93 -0
- data/spec/hybrid_platforms_conductor_test/cmdb_plugins/test_cmdb_others.rb +60 -0
- data/spec/hybrid_platforms_conductor_test/cmdb_plugins/test_cmdb_others2.rb +58 -0
- data/spec/hybrid_platforms_conductor_test/executables/check-node_spec.rb +35 -0
- data/spec/hybrid_platforms_conductor_test/executables/deploy_spec.rb +35 -0
- data/spec/hybrid_platforms_conductor_test/executables/get_impacted_nodes_spec.rb +158 -0
- data/spec/hybrid_platforms_conductor_test/executables/last_deploys_spec.rb +173 -0
- data/spec/hybrid_platforms_conductor_test/executables/nodes_to_deploy_spec.rb +283 -0
- data/spec/hybrid_platforms_conductor_test/executables/options/actions_executor_spec.rb +28 -0
- data/spec/hybrid_platforms_conductor_test/executables/options/cmd_runner_spec.rb +28 -0
- data/spec/hybrid_platforms_conductor_test/executables/options/common_spec.rb +67 -0
- data/spec/hybrid_platforms_conductor_test/executables/options/deployer_spec.rb +251 -0
- data/spec/hybrid_platforms_conductor_test/executables/options/nodes_handler_spec.rb +111 -0
- data/spec/hybrid_platforms_conductor_test/executables/options/nodes_selectors_spec.rb +71 -0
- data/spec/hybrid_platforms_conductor_test/executables/options/reports_handler_spec.rb +54 -0
- data/spec/hybrid_platforms_conductor_test/executables/options/tests_runner_spec.rb +139 -0
- data/spec/hybrid_platforms_conductor_test/executables/report_spec.rb +60 -0
- data/spec/hybrid_platforms_conductor_test/executables/run_spec.rb +173 -0
- data/spec/hybrid_platforms_conductor_test/executables/ssh_config_spec.rb +35 -0
- data/spec/hybrid_platforms_conductor_test/executables/test_spec.rb +41 -0
- data/spec/hybrid_platforms_conductor_test/helpers/actions_executor_helpers.rb +98 -0
- data/spec/hybrid_platforms_conductor_test/helpers/cmd_runner_helpers.rb +92 -0
- data/spec/hybrid_platforms_conductor_test/helpers/cmdb_helpers.rb +37 -0
- data/spec/hybrid_platforms_conductor_test/helpers/config_helpers.rb +20 -0
- data/spec/hybrid_platforms_conductor_test/helpers/connector_ssh_helpers.rb +130 -0
- data/spec/hybrid_platforms_conductor_test/helpers/deployer_helpers.rb +149 -0
- data/spec/hybrid_platforms_conductor_test/helpers/deployer_test_helpers.rb +812 -0
- data/spec/hybrid_platforms_conductor_test/helpers/executables_helpers.rb +96 -0
- data/spec/hybrid_platforms_conductor_test/helpers/nodes_handler_helpers.rb +20 -0
- data/spec/hybrid_platforms_conductor_test/helpers/platform_handler_helpers.rb +35 -0
- data/spec/hybrid_platforms_conductor_test/helpers/platforms_handler_helpers.rb +127 -0
- data/spec/hybrid_platforms_conductor_test/helpers/plugins_helpers.rb +48 -0
- data/spec/hybrid_platforms_conductor_test/helpers/provisioner_proxmox_helpers.rb +789 -0
- data/spec/hybrid_platforms_conductor_test/helpers/reports_handler_helpers.rb +29 -0
- data/spec/hybrid_platforms_conductor_test/helpers/services_handler_helpers.rb +20 -0
- data/spec/hybrid_platforms_conductor_test/helpers/tests_runner_helpers.rb +38 -0
- data/spec/hybrid_platforms_conductor_test/mocked_lib/my_test_gem/hpc_plugins/test_plugin_type/test_plugin_id1.rb +22 -0
- data/spec/hybrid_platforms_conductor_test/mocked_lib/my_test_gem/hpc_plugins/test_plugin_type/test_plugin_id2.rb +22 -0
- data/spec/hybrid_platforms_conductor_test/mocked_lib/my_test_gem2/sub_dir/hpc_plugins/test_plugin_type/test_plugin_id3.rb +26 -0
- data/spec/hybrid_platforms_conductor_test/mocked_lib/my_test_gem2/sub_dir/hpc_plugins/test_plugin_type2/test_plugin_id4.rb +26 -0
- data/spec/hybrid_platforms_conductor_test/platform_handler_plugins/test.rb +225 -0
- data/spec/hybrid_platforms_conductor_test/platform_handler_plugins/test2.rb +11 -0
- data/spec/hybrid_platforms_conductor_test/report_plugin.rb +35 -0
- data/spec/hybrid_platforms_conductor_test/test_action.rb +66 -0
- data/spec/hybrid_platforms_conductor_test/test_connector.rb +151 -0
- data/spec/hybrid_platforms_conductor_test/test_plugins/global.rb +30 -0
- data/spec/hybrid_platforms_conductor_test/test_plugins/node.rb +53 -0
- data/spec/hybrid_platforms_conductor_test/test_plugins/node_check.rb +47 -0
- data/spec/hybrid_platforms_conductor_test/test_plugins/node_ssh.rb +42 -0
- data/spec/hybrid_platforms_conductor_test/test_plugins/platform.rb +50 -0
- data/spec/hybrid_platforms_conductor_test/test_plugins/several_checks.rb +50 -0
- data/spec/hybrid_platforms_conductor_test/test_provisioner.rb +95 -0
- data/spec/hybrid_platforms_conductor_test/tests_report_plugin.rb +49 -0
- data/spec/spec_helper.rb +111 -0
- metadata +566 -0
|
@@ -0,0 +1,707 @@
|
|
|
1
|
+
# Require tmpdir before futex, as this Rubygem has a bug missing its require.
|
|
2
|
+
require 'tmpdir'
|
|
3
|
+
require 'futex'
|
|
4
|
+
require 'json'
|
|
5
|
+
require 'proxmox'
|
|
6
|
+
require 'time'
|
|
7
|
+
|
|
8
|
+
# Serve Proxmox reservation requests, like a waiter in a restaurant ;-)
|
|
9
|
+
# Multi-process safe.
|
|
10
|
+
class ProxmoxWaiter
|
|
11
|
+
|
|
12
|
+
# Integer: Timeout in seconds to get the futex
|
|
13
|
+
# Take into account that some processes can be lengthy while the futex is taken:
|
|
14
|
+
# * POST/DELETE operations in the Proxmox API requires tasks to be performed which can take a few seconds, depending on the load.
|
|
15
|
+
# * Proxmox API sometimes fails to respond when containers are being locked temporarily (we have a 30 secs timeout for each one).
|
|
16
|
+
FUTEX_TIMEOUT = 600
|
|
17
|
+
|
|
18
|
+
# Integer: Maximum timeout in seconds before retrying getting the Futex when we are not first in the queue (a rand will be applied to it)
|
|
19
|
+
RETRY_QUEUE_WAIT = 30
|
|
20
|
+
|
|
21
|
+
# Constructor
|
|
22
|
+
#
|
|
23
|
+
# Parameters::
|
|
24
|
+
# * *config_file* (String): Path to a JSON file containing a configuration for ProxmoxWaiter.
|
|
25
|
+
# Here is the file structure:
|
|
26
|
+
# * *proxmox_api_url* (String): Proxmox API URL.
|
|
27
|
+
# * *futex_file* (String): Path to the file serving as a futex.
|
|
28
|
+
# * *logs_dir* (String): Path to the directory containing logs [default: '.']
|
|
29
|
+
# * *pve_nodes* (Array<String>): List of PVE nodes allowed to spawn new containers [default: all]
|
|
30
|
+
# * *vm_ips_list* (Array<String>): The list of IPs that are available for the Proxomx containers.
|
|
31
|
+
# * *vm_ids_range* ([Integer, Integer]): Minimum and maximum reservable VM ID
|
|
32
|
+
# * *coeff_ram_consumption* (Integer): Importance coefficient to assign to the RAM consumption when selecting available PVE nodes
|
|
33
|
+
# * *coeff_disk_consumption* (Integer): Importance coefficient to assign to the disk consumption when selecting available PVE nodes
|
|
34
|
+
# * *expiration_period_secs* (Integer): Number of seconds defining the expiration period
|
|
35
|
+
# * *expire_stopped_vm_timeout_secs* (Integer): Number of seconds before defining stopped VMs as expired
|
|
36
|
+
# * *limits* (Hash): Limits to be taken into account while reserving resources. Each property is optional and no property means no limit.
|
|
37
|
+
# * *nbr_vms_max* (Integer): Max number of VMs we can reserve.
|
|
38
|
+
# * *cpu_loads_thresholds* ([Float, Float, Float]): CPU load thresholds from which a PVE node should not be used (as soon as 1 of the value is greater than 1 of those thresholds, discard the node).
|
|
39
|
+
# * *ram_percent_used_max* (Float): Max percentage (between 0 and 1) of RAM that can be reserved on a PVE node.
|
|
40
|
+
# * *disk_percent_used_max* (Float): Max percentage (between 0 and 1) of disk that can be reserved on a PVE node.
|
|
41
|
+
# * *proxmox_user* (String): Proxmox user to be used to connect to the API.
|
|
42
|
+
# * *proxmox_password* (String): Proxmox password to be used to connect to the API.
|
|
43
|
+
# * *proxmox_realm* (String): Proxmox realm to be used to connect to the API.
|
|
44
|
+
def initialize(config_file, proxmox_user, proxmox_password, proxmox_realm)
|
|
45
|
+
@config = JSON.parse(File.read(config_file))
|
|
46
|
+
@proxmox_user = proxmox_user
|
|
47
|
+
@proxmox_password = proxmox_password
|
|
48
|
+
@proxmox_realm = proxmox_realm
|
|
49
|
+
# Keep a memory of non-debug stopped containers, so that we can guess if they are expired or not after some time.
|
|
50
|
+
# Time when we noticed a given container is stopped, per creation date, per VM ID, per PVE node
|
|
51
|
+
# We add the creation date as a VM ID can be reused (with a different creation date) and we want to make sure we don't think a newly created VM is here for longer that it should.
|
|
52
|
+
# Hash< String, Hash< Integer, Hash< String, Time > > >
|
|
53
|
+
# Hash< pve_node, Hash< vm_id, Hash< creation_date, time_seen_as_stopped > > >
|
|
54
|
+
@non_debug_stopped_containers = {}
|
|
55
|
+
@log_file = "#{@config['logs_dir'] || '.'}/proxmox_waiter_#{Time.now.utc.strftime('%Y%m%d%H%M%S')}_pid_#{Process.pid}_#{File.basename(config_file, '.json')}.log"
|
|
56
|
+
FileUtils.mkdir_p File.dirname(@log_file)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Reserve resources for a new container.
|
|
60
|
+
# Check resources availability.
|
|
61
|
+
#
|
|
62
|
+
# Parameters::
|
|
63
|
+
# * *vm_info* (Hash<String,Object>): The VM info to be created, using the same properties as LXC container creation through Proxmox API.
|
|
64
|
+
# Result::
|
|
65
|
+
# * Hash<Symbol, Object> or Symbol: Reserved resource info, or Symbol in case of error.
|
|
66
|
+
# The following properties are set as resource info:
|
|
67
|
+
# * *pve_node* (String): Node on which the container has been created.
|
|
68
|
+
# * *vm_id* (Integer): The VM ID
|
|
69
|
+
# * *vm_ip* (String): The VM IP
|
|
70
|
+
# Possible error codes returned are:
|
|
71
|
+
# * *not_enough_resources*: There is no available free resources to be reserved
|
|
72
|
+
# * *no_available_ip*: There is no available IP to be reserved
|
|
73
|
+
# * *no_available_vm_id*: There is no available VM ID to be reserved
|
|
74
|
+
# * *exceeded_number_of_vms*: There is already too many VMs running
|
|
75
|
+
def create(vm_info)
|
|
76
|
+
log "Ask to create #{vm_info}"
|
|
77
|
+
# Extract the required resources from the desired VM info
|
|
78
|
+
nbr_cpus = vm_info['cpulimit']
|
|
79
|
+
ram_mb = vm_info['memory']
|
|
80
|
+
disk_gb = Integer(vm_info['rootfs'].split(':').last)
|
|
81
|
+
reserved_resource = nil
|
|
82
|
+
start do
|
|
83
|
+
pve_node_scores = pve_scores_for(nbr_cpus, ram_mb, disk_gb)
|
|
84
|
+
# Check if we are not exceeding hard-limits:
|
|
85
|
+
# * the number of vms to be created
|
|
86
|
+
# * the free IPs
|
|
87
|
+
# * the free VM IDs
|
|
88
|
+
# In such case, even when free resources on PVE nodes are enough to host the new container, we still need to clean-up before.
|
|
89
|
+
nbr_vms = nbr_vms_handled_by_us
|
|
90
|
+
if nbr_vms >= @config['limits']['nbr_vms_max'] || free_ips.empty? || free_vm_ids.empty?
|
|
91
|
+
log 'Hitting at least 1 hard-limit. Check if we can destroy expired containers.'
|
|
92
|
+
log "[ Hard limit reached ] - Already #{nbr_vms} are created (max is #{@config['limits']['nbr_vms_max']})." if nbr_vms >= @config['limits']['nbr_vms_max']
|
|
93
|
+
log '[ Hard limit reached ] - No more available IPs.' if free_ips.empty?
|
|
94
|
+
log '[ Hard limit reached ] - No more available VM IDs.' if free_vm_ids.empty?
|
|
95
|
+
clean_up_done = false
|
|
96
|
+
# Check if we can remove some expired ones
|
|
97
|
+
@config['pve_nodes'].each do |pve_node|
|
|
98
|
+
if api_get("nodes/#{pve_node}/lxc").any? { |lxc_info| is_vm_expired?(pve_node, Integer(lxc_info['vmid'])) }
|
|
99
|
+
destroy_expired_vms_on(pve_node)
|
|
100
|
+
clean_up_done = true
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
if clean_up_done
|
|
104
|
+
nbr_vms = nbr_vms_handled_by_us
|
|
105
|
+
if nbr_vms >= @config['limits']['nbr_vms_max']
|
|
106
|
+
log "[ Hard limit reached ] - Still too many running VMs after clean-up: #{nbr_vms}."
|
|
107
|
+
reserved_resource = :exceeded_number_of_vms
|
|
108
|
+
elsif free_ips.empty?
|
|
109
|
+
log '[ Hard limit reached ] - Still no available IP'
|
|
110
|
+
reserved_resource = :no_available_ip
|
|
111
|
+
elsif free_vm_ids.empty?
|
|
112
|
+
log '[ Hard limit reached ] - Still no available VM ID'
|
|
113
|
+
reserved_resource = :no_available_vm_id
|
|
114
|
+
end
|
|
115
|
+
else
|
|
116
|
+
log 'Could not find any expired VM to destroy.'
|
|
117
|
+
# There was nothing to clean. So wait for other processes to destroy their containers.
|
|
118
|
+
reserved_resource =
|
|
119
|
+
if nbr_vms >= @config['limits']['nbr_vms_max']
|
|
120
|
+
:exceeded_number_of_vms
|
|
121
|
+
elsif free_ips.empty?
|
|
122
|
+
:no_available_ip
|
|
123
|
+
else
|
|
124
|
+
:no_available_vm_id
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
if reserved_resource.nil?
|
|
129
|
+
# Select the best node, first keeping expired VMs if possible.
|
|
130
|
+
# This is the index of the scores to be checked: if we can choose without recycling VMs, do it by considering score index 0.
|
|
131
|
+
score_idx =
|
|
132
|
+
if pve_node_scores.all? { |_pve_node, pve_node_scores| pve_node_scores[0].nil? }
|
|
133
|
+
# No node was available without removing expired VMs.
|
|
134
|
+
# Therefore we consider only scores without expired VMs.
|
|
135
|
+
log 'No PVE node has enough free resources without removing eventual expired VMs'
|
|
136
|
+
1
|
|
137
|
+
else
|
|
138
|
+
0
|
|
139
|
+
end
|
|
140
|
+
selected_pve_node, selected_pve_node_score = pve_node_scores.inject([nil, nil]) do |(best_pve_node, best_score), (pve_node, pve_node_scores)|
|
|
141
|
+
if pve_node_scores[score_idx].nil? ||
|
|
142
|
+
(!best_score.nil? && pve_node_scores[score_idx] >= best_score)
|
|
143
|
+
[best_pve_node, best_score]
|
|
144
|
+
else
|
|
145
|
+
[pve_node, pve_node_scores[score_idx]]
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
if selected_pve_node.nil?
|
|
149
|
+
# No PVE node can host our request.
|
|
150
|
+
log 'Could not find any PVE node with enough free resources'
|
|
151
|
+
reserved_resource = :not_enough_resources
|
|
152
|
+
else
|
|
153
|
+
log "[ #{selected_pve_node} ] - PVE node selected with score #{selected_pve_node_score}"
|
|
154
|
+
# We know on which PVE node we can instantiate our new container.
|
|
155
|
+
# We have to purge expired VMs on this PVE node before reserving a new creation.
|
|
156
|
+
destroy_expired_vms_on(selected_pve_node) if score_idx == 1
|
|
157
|
+
# Now select the correct VM ID and VM IP.
|
|
158
|
+
vm_id_or_error, ip = reserve_on(selected_pve_node, nbr_cpus, ram_mb, disk_gb)
|
|
159
|
+
if ip.nil?
|
|
160
|
+
# We have an error
|
|
161
|
+
reserved_resource = vm_id_or_error
|
|
162
|
+
else
|
|
163
|
+
# Create the container for real
|
|
164
|
+
completed_vm_info = vm_info.dup
|
|
165
|
+
completed_vm_info['vmid'] = vm_id_or_error
|
|
166
|
+
completed_vm_info['net0'] = "#{completed_vm_info['net0']},ip=#{ip}/32"
|
|
167
|
+
completed_vm_info['description'] = "#{completed_vm_info['description']}creation_date: #{Time.now.utc.strftime('%FT%T')}\n"
|
|
168
|
+
log "[ #{selected_pve_node}/#{vm_id_or_error} ] - Create LXC container"
|
|
169
|
+
wait_for_proxmox_task(selected_pve_node, @proxmox.post("nodes/#{selected_pve_node}/lxc", completed_vm_info))
|
|
170
|
+
reserved_resource = {
|
|
171
|
+
pve_node: selected_pve_node,
|
|
172
|
+
vm_id: vm_id_or_error,
|
|
173
|
+
vm_ip: ip
|
|
174
|
+
}
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
reserved_resource
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Destroy a VM.
|
|
183
|
+
#
|
|
184
|
+
# Parameters::
|
|
185
|
+
# * *vm_info* (Hash<String,Object>): The VM info to be destroyed:
|
|
186
|
+
# * *vm_id* (Integer): The VM ID
|
|
187
|
+
# * *node* (String): The node for which this VM has been created
|
|
188
|
+
# * *environment* (String): The environment for which this VM has been created
|
|
189
|
+
# Result::
|
|
190
|
+
# * Hash<Symbol, Object> or Symbol: Released resource info, or Symbol in case of error.
|
|
191
|
+
# The following properties are set as resource info:
|
|
192
|
+
# * *pve_node* (String): Node on which the container has been released (if found).
|
|
193
|
+
# Possible error codes returned are:
|
|
194
|
+
# None
|
|
195
|
+
def destroy(vm_info)
|
|
196
|
+
log "Ask to destroy #{vm_info}"
|
|
197
|
+
found_pve_node = nil
|
|
198
|
+
start do
|
|
199
|
+
vm_id_str = vm_info['vm_id'].to_s
|
|
200
|
+
# Destroy the VM ID
|
|
201
|
+
# Find which PVE node hosts this VM
|
|
202
|
+
unless @config['pve_nodes'].any? do |pve_node|
|
|
203
|
+
api_get("nodes/#{pve_node}/lxc").any? do |lxc_info|
|
|
204
|
+
if lxc_info['vmid'] == vm_id_str
|
|
205
|
+
# Make sure this VM is still used for the node and environment we want.
|
|
206
|
+
# It could have been deleted manually and re-affected to another node/environment automatically, and in this case we should not remove it.
|
|
207
|
+
metadata = vm_metadata(pve_node, vm_info['vm_id'])
|
|
208
|
+
if metadata[:node] == vm_info['node'] && metadata[:environment] == vm_info['environment']
|
|
209
|
+
destroy_vm_on(pve_node, vm_info['vm_id'])
|
|
210
|
+
found_pve_node = pve_node
|
|
211
|
+
true
|
|
212
|
+
else
|
|
213
|
+
log "[ #{pve_node}/#{vm_info['vm_id']} ] - This container is not hosting the node/environment to be destroyed: #{metadata[:node]}/#{metadata[:environment]} != #{vm_info['node']}/#{vm_info['environment']}"
|
|
214
|
+
false
|
|
215
|
+
end
|
|
216
|
+
else
|
|
217
|
+
false
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
log "Could not find any PVE node hosting VM #{vm_info['vm_id']}"
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
reserved_resource = {}
|
|
225
|
+
reserved_resource[:pve_node] = found_pve_node unless found_pve_node.nil?
|
|
226
|
+
reserved_resource
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
private
|
|
230
|
+
|
|
231
|
+
# Log a message to stdout and in the log file
|
|
232
|
+
#
|
|
233
|
+
# Parameters::
|
|
234
|
+
# * *msg* (String): Message to log
|
|
235
|
+
def log(msg)
|
|
236
|
+
puts msg
|
|
237
|
+
File.open(@log_file, 'a') { |f| f.puts "[ #{Time.now.utc.strftime('%F %T.%L')} ] - [ PID #{Process.pid} ] - #{msg}" }
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Get the access queue from a file.
|
|
241
|
+
# Handle the case of missing file.
|
|
242
|
+
#
|
|
243
|
+
# Parameters::
|
|
244
|
+
# * *queue_file* (String): The file holding the queue
|
|
245
|
+
# Result::
|
|
246
|
+
# * Array<Integer>: PIDs queue
|
|
247
|
+
def read_access_queue(queue_file)
|
|
248
|
+
(File.exist?(queue_file) ? File.read(queue_file).split("\n").map { |line| Integer(line) } : [])
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Write the access queue to a file.
|
|
252
|
+
#
|
|
253
|
+
# Parameters::
|
|
254
|
+
# * *queue_file* (String): The file holding the queue
|
|
255
|
+
# * *access_queue* (Array<Integer>): PIDs queue
|
|
256
|
+
def write_access_queue(queue_file, access_queue)
|
|
257
|
+
File.write(queue_file, access_queue.join("\n"))
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Get an exclusive (based on PID) access using a futex-protected queue
|
|
261
|
+
#
|
|
262
|
+
# Parameters::
|
|
263
|
+
# * *futex_file* (String): Name of the file to be used as a futex
|
|
264
|
+
# * Prox: Code called with access authorized
|
|
265
|
+
def with_futex_queue_access_on(futex_file)
|
|
266
|
+
pid = Process.pid
|
|
267
|
+
queue_futex_file = "#{futex_file}.queue"
|
|
268
|
+
# Register ourselves in the queue (at the end)
|
|
269
|
+
Futex.new(queue_futex_file, timeout: FUTEX_TIMEOUT).open do
|
|
270
|
+
access_queue = read_access_queue(queue_futex_file)
|
|
271
|
+
log "[ Futex queue ] - Register our PID in the queue: #{access_queue.join(', ')}"
|
|
272
|
+
write_access_queue(queue_futex_file, access_queue + [pid])
|
|
273
|
+
end
|
|
274
|
+
# Loop until we are first ones in the queue
|
|
275
|
+
retry_futex_queue = true
|
|
276
|
+
while retry_futex_queue
|
|
277
|
+
Futex.new(futex_file, timeout: FUTEX_TIMEOUT).open do
|
|
278
|
+
# Check if we are the first one in the queue
|
|
279
|
+
Futex.new(queue_futex_file, timeout: FUTEX_TIMEOUT).open do
|
|
280
|
+
access_queue = read_access_queue(queue_futex_file)
|
|
281
|
+
idx = access_queue.index(pid)
|
|
282
|
+
log "[ Futex queue ] - We are ##{idx} in the queue: #{access_queue.join(', ')}"
|
|
283
|
+
if idx.nil?
|
|
284
|
+
# We disappeared from the queue!
|
|
285
|
+
log '[ Futex queue ] - !!! Somebody removed use from the queue. Add our PID back.'
|
|
286
|
+
write_access_queue(queue_futex_file, access_queue + [pid])
|
|
287
|
+
elsif idx == 0
|
|
288
|
+
# Access granted
|
|
289
|
+
log '[ Futex queue ] - Exclusive access granted'
|
|
290
|
+
write_access_queue(queue_futex_file, access_queue[1..-1])
|
|
291
|
+
retry_futex_queue = false
|
|
292
|
+
else
|
|
293
|
+
# Just check that the first PID still exists, otherwise remove it from the queue.
|
|
294
|
+
# This way we avoid starvation in case of killed processes.
|
|
295
|
+
first_pid = access_queue.first
|
|
296
|
+
first_pid_exist =
|
|
297
|
+
begin
|
|
298
|
+
Process.getpgid(first_pid)
|
|
299
|
+
true
|
|
300
|
+
rescue Errno::ESRCH
|
|
301
|
+
false
|
|
302
|
+
end
|
|
303
|
+
unless first_pid_exist
|
|
304
|
+
log "[ Futex queue ] - !!! First PID #{first_pid} does not exist - remove it from the queue"
|
|
305
|
+
write_access_queue(queue_futex_file, access_queue[1..-1])
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
yield unless retry_futex_queue
|
|
310
|
+
end
|
|
311
|
+
sleep(rand(RETRY_QUEUE_WAIT) + 1) if retry_futex_queue
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
# Grab the lock to start a new atomic session.
|
|
316
|
+
# Make sure the lock is released at the end of the session.
|
|
317
|
+
#
|
|
318
|
+
# Parameters::
|
|
319
|
+
# * Proc: Client code with the session started.
|
|
320
|
+
# The following instance variables are set:
|
|
321
|
+
# * *@expiration_date* (Time): The expiration date to be considered when selecting expired VMs
|
|
322
|
+
# * *@proxmox* (Proxmox): The Proxmox instance
|
|
323
|
+
def start
|
|
324
|
+
with_futex_queue_access_on(@config['futex_file']) do
|
|
325
|
+
# Connect to Proxmox's API
|
|
326
|
+
@proxmox = Proxmox::Proxmox.new(
|
|
327
|
+
"#{@config['proxmox_api_url']}/api2/json/",
|
|
328
|
+
# Proxmox uses the hostname as the node name so make the default API node derived from the URL.
|
|
329
|
+
# cf https://pve.proxmox.com/wiki/Renaming_a_PVE_node
|
|
330
|
+
URI.parse(@config['proxmox_api_url']).host.downcase.split('.').first,
|
|
331
|
+
@proxmox_user,
|
|
332
|
+
@proxmox_password,
|
|
333
|
+
@proxmox_realm,
|
|
334
|
+
{ verify_ssl: false }
|
|
335
|
+
)
|
|
336
|
+
# Cache of get queries to the API
|
|
337
|
+
@gets_cache = {}
|
|
338
|
+
# Check connectivity before going further
|
|
339
|
+
begin
|
|
340
|
+
nodes_info = api_get('nodes')
|
|
341
|
+
# Get the list of PVE nodes by default
|
|
342
|
+
@config['pve_nodes'] = nodes_info.map { |node_info| node_info['node'] } unless @config['pve_nodes']
|
|
343
|
+
rescue
|
|
344
|
+
raise "Unable to connect to Proxmox API #{@config['proxmox_api_url']} with user #{@proxmox_user}: #{$!}"
|
|
345
|
+
end
|
|
346
|
+
@expiration_date = Time.now.utc - @config['expiration_period_secs']
|
|
347
|
+
log "Consider expiration date #{@expiration_date.strftime('%F %T')}"
|
|
348
|
+
begin
|
|
349
|
+
yield
|
|
350
|
+
ensure
|
|
351
|
+
@expiration_date = nil
|
|
352
|
+
@proxmox = nil
|
|
353
|
+
end
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
# Compute scores if we were to allocate resources for each possible PVE node.
|
|
358
|
+
# Those scores can help in choosing the best PVE node to host those resources.
|
|
359
|
+
# The best score is the smallest one.
|
|
360
|
+
# The score is computed by simulating resources' consumptions on the node if our container was to be installed in this node.
|
|
361
|
+
# The score uses coefficients as to better weigh some criterias more than others (all configured in the config file).
|
|
362
|
+
# 2 scores are gathered: 1 with the current PVE node's VMs, and 1 with the node having expired VMs removed.
|
|
363
|
+
# If a score is nil, it means the node can't be used (for example when a hard limit has been hit).
|
|
364
|
+
# Prerequisites:
|
|
365
|
+
# * This method should be called in a #start block
|
|
366
|
+
#
|
|
367
|
+
# Parameters::
|
|
368
|
+
# * *nbr_cpus* (Integer): Wanted CPUs
|
|
369
|
+
# * *ram_mb* (Integer): Wanted MB of RAM
|
|
370
|
+
# * *disk_gb* (Integer): Wanted GB of disk
|
|
371
|
+
# Result::
|
|
372
|
+
# * Hash<String, [Float or nil, Float or nil]>: The set of 2 scores, per PVE node name
|
|
373
|
+
def pve_scores_for(nbr_cpus, ram_mb, disk_gb)
|
|
374
|
+
Hash[@config['pve_nodes'].map do |pve_node|
|
|
375
|
+
# Get some resource usages stats from the node directly
|
|
376
|
+
status_info = api_get("nodes/#{pve_node}/status")
|
|
377
|
+
load_average = status_info['loadavg'].map { |load_str| Float(load_str) }
|
|
378
|
+
log "[ #{pve_node} ] - Load average: #{load_average.join(', ')}"
|
|
379
|
+
[
|
|
380
|
+
pve_node,
|
|
381
|
+
# If CPU load is too high, don't select the node anyway.
|
|
382
|
+
if load_average.zip(@config['limits']['cpu_loads_thresholds']).all? { |load_current, load_limit| load_current <= load_limit }
|
|
383
|
+
storage_info = api_get("nodes/#{pve_node}/storage").find { |search_storage_info| search_storage_info['storage'] == 'local-lvm' }
|
|
384
|
+
disk_gb_total = storage_info['total'] / (1024 * 1024 * 1024)
|
|
385
|
+
ram_mb_total = status_info['memory']['total'] / (1024 * 1024)
|
|
386
|
+
# Used resources is the sum of the allocated resource for each VM in this PVE node.
|
|
387
|
+
# It is not forcefully the currently used resource.
|
|
388
|
+
# This way we are sure to keep the allocated resources intact for containers not handled by this script.
|
|
389
|
+
disk_gb_used = 0
|
|
390
|
+
ram_mb_used = 0
|
|
391
|
+
# Store the resources used by containers we can recycle in separate variables.
|
|
392
|
+
expired_disk_gb_used = 0
|
|
393
|
+
expired_ram_mb_used = 0
|
|
394
|
+
found_vm_ids = api_get("nodes/#{pve_node}/lxc").map do |lxc_info|
|
|
395
|
+
vm_id = Integer(lxc_info['vmid'])
|
|
396
|
+
# Some times the Proxmox API returns maxdisk as a String (but not always) even if it is documented as Integer here: https://pve.proxmox.com/pve-docs/api-viewer/#/nodes/{node}/lxc.
|
|
397
|
+
# TODO: Remove the Integer conversion when Proxmox API will be fixed.
|
|
398
|
+
lxc_disk_gb_used = Integer(lxc_info['maxdisk']) / (1024 * 1024 * 1024)
|
|
399
|
+
lxc_ram_mb_used = lxc_info['maxmem'] / (1024 * 1024)
|
|
400
|
+
if is_vm_expired?(pve_node, vm_id)
|
|
401
|
+
expired_disk_gb_used += lxc_disk_gb_used
|
|
402
|
+
expired_ram_mb_used += lxc_ram_mb_used
|
|
403
|
+
else
|
|
404
|
+
disk_gb_used += lxc_disk_gb_used
|
|
405
|
+
ram_mb_used += lxc_ram_mb_used
|
|
406
|
+
end
|
|
407
|
+
vm_id.to_s
|
|
408
|
+
end
|
|
409
|
+
log "[ #{pve_node} ] - RAM MB usage: #{ram_mb_used + expired_ram_mb_used} / #{ram_mb_total} (#{expired_ram_mb_used} MB from expired containers)"
|
|
410
|
+
log "[ #{pve_node} ] - Disk GB usage: #{disk_gb_used + expired_disk_gb_used} / #{disk_gb_total} (#{expired_disk_gb_used} GB from expired containers)"
|
|
411
|
+
# Evaluate the expected percentages of resources' usage if we were to add our new container to this PVE node.
|
|
412
|
+
expected_ram_percent_used = (ram_mb_used + expired_ram_mb_used + ram_mb).to_f / ram_mb_total
|
|
413
|
+
expected_disk_percent_used = (disk_gb_used + expired_disk_gb_used + disk_gb).to_f / disk_gb_total
|
|
414
|
+
expected_ram_percent_used_without_expired = (ram_mb_used + ram_mb).to_f / ram_mb_total
|
|
415
|
+
expected_disk_percent_used_without_expired = (disk_gb_used + disk_gb).to_f / disk_gb_total
|
|
416
|
+
# If we break the limits, don't select this node.
|
|
417
|
+
# Otherwise, store the scores, taking into account coefficients to then choose among possible PVE nodes.
|
|
418
|
+
[
|
|
419
|
+
if expected_ram_percent_used <= @config['limits']['ram_percent_used_max'] &&
|
|
420
|
+
expected_disk_percent_used <= @config['limits']['disk_percent_used_max']
|
|
421
|
+
expected_ram_percent_used * @config['coeff_ram_consumption'] + expected_disk_percent_used * @config['coeff_disk_consumption']
|
|
422
|
+
else
|
|
423
|
+
nil
|
|
424
|
+
end,
|
|
425
|
+
if expected_ram_percent_used_without_expired <= @config['limits']['ram_percent_used_max'] &&
|
|
426
|
+
expected_disk_percent_used_without_expired <= @config['limits']['disk_percent_used_max']
|
|
427
|
+
expected_ram_percent_used_without_expired * @config['coeff_ram_consumption'] + expected_disk_percent_used_without_expired * @config['coeff_disk_consumption']
|
|
428
|
+
else
|
|
429
|
+
nil
|
|
430
|
+
end
|
|
431
|
+
]
|
|
432
|
+
else
|
|
433
|
+
# CPU load is too high. Don't select this node.
|
|
434
|
+
log "[ #{pve_node} ] - Load average is too high for this PVE node to be selected (thresholds: : #{@config['limits']['cpu_loads_thresholds'].join(', ')})"
|
|
435
|
+
[nil, nil]
|
|
436
|
+
end
|
|
437
|
+
]
|
|
438
|
+
end]
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
# Is a given VM expired?
|
|
442
|
+
#
|
|
443
|
+
# Parameters::
|
|
444
|
+
# * *pve_node* (String): The PVE node hosting this VM
|
|
445
|
+
# * *vm_id* (Integer): The VM ID
|
|
446
|
+
# Result::
|
|
447
|
+
# * Boolean: Is the given VM expired?
|
|
448
|
+
def is_vm_expired?(pve_node, vm_id)
|
|
449
|
+
if vm_id.between?(*@config['vm_ids_range'])
|
|
450
|
+
# Get its reservation date from the notes
|
|
451
|
+
metadata = vm_metadata(pve_node, vm_id)
|
|
452
|
+
if metadata[:creation_date].nil? || Time.parse("#{metadata[:creation_date]} UTC") < @expiration_date
|
|
453
|
+
log "[ #{pve_node}/#{vm_id} ] - [ Expired ] - Creation date is #{metadata[:creation_date]}"
|
|
454
|
+
true
|
|
455
|
+
else
|
|
456
|
+
state = vm_state(pve_node, vm_id)
|
|
457
|
+
if state == 'running' || metadata[:debug] == 'true'
|
|
458
|
+
# Just in case it was previously remembered as a non-debug stopped container, clear it.
|
|
459
|
+
@non_debug_stopped_containers[pve_node].delete(vm_id) if @non_debug_stopped_containers.key?(pve_node)
|
|
460
|
+
log "[ #{pve_node}/#{vm_id} ] - State is #{state} and debug is #{metadata[:debug]}"
|
|
461
|
+
false
|
|
462
|
+
else
|
|
463
|
+
# Check if it is not a left-over from a crash.
|
|
464
|
+
# If it stays not running for long and is not meant for debug purposes, then it is also considered expired.
|
|
465
|
+
# For this, remember previously seen containers that were stopped
|
|
466
|
+
first_time_seen_as_stopped = @non_debug_stopped_containers.dig pve_node, vm_id, metadata[:creation_date]
|
|
467
|
+
if first_time_seen_as_stopped.nil?
|
|
468
|
+
# It is the first time we see it stopped.
|
|
469
|
+
# Remember it and consider it as non-expired.
|
|
470
|
+
@non_debug_stopped_containers[pve_node] = {} unless @non_debug_stopped_containers.key?(pve_node)
|
|
471
|
+
@non_debug_stopped_containers[pve_node][vm_id] = {} unless @non_debug_stopped_containers[pve_node].key?(vm_id)
|
|
472
|
+
@non_debug_stopped_containers[pve_node][vm_id][metadata[:creation_date]] = Time.now
|
|
473
|
+
log "[ #{pve_node}/#{vm_id} ] - Discovered non-debug container (created on #{metadata[:creation_date]}) as stopped"
|
|
474
|
+
false
|
|
475
|
+
elsif Time.now - first_time_seen_as_stopped >= @config['expire_stopped_vm_timeout_secs']
|
|
476
|
+
# If it is stopped from more than the timeout, then consider it expired
|
|
477
|
+
log "[ #{pve_node}/#{vm_id} ] - [ Expired ] - Non-debug container (created on #{metadata[:creation_date]}) is stopped since #{first_time_seen_as_stopped.strftime('%F %T')} (more than #{@config['expire_stopped_vm_timeout_secs']} seconds ago)"
|
|
478
|
+
true
|
|
479
|
+
else
|
|
480
|
+
log "[ #{pve_node}/#{vm_id} ] - Non-debug container (created on #{metadata[:creation_date]}) is stopped since #{first_time_seen_as_stopped.strftime('%F %T')} (less than #{@config['expire_stopped_vm_timeout_secs']} seconds ago)"
|
|
481
|
+
false
|
|
482
|
+
end
|
|
483
|
+
end
|
|
484
|
+
end
|
|
485
|
+
else
|
|
486
|
+
log "[ #{pve_node}/#{vm_id} ] - Container is not part of our VM ID range."
|
|
487
|
+
false
|
|
488
|
+
end
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
# Get the metadata we associate to VMs.
|
|
492
|
+
# It can be empty if no metadata found.
|
|
493
|
+
#
|
|
494
|
+
# Parameters::
|
|
495
|
+
# * *pve_node* (String): The PVE node hosting this VM
|
|
496
|
+
# * *vm_id* (Integer): The VM ID
|
|
497
|
+
# Result::
|
|
498
|
+
# * Hash<Symbol, String>: The metadata
|
|
499
|
+
def vm_metadata(pve_node, vm_id)
|
|
500
|
+
lxc_config = api_get("nodes/#{pve_node}/lxc/#{vm_id}/config")
|
|
501
|
+
vm_description_lines = (lxc_config['description'] || '').split("\n")
|
|
502
|
+
hpc_marker_idx = vm_description_lines.index('===== HPC info =====')
|
|
503
|
+
if hpc_marker_idx.nil?
|
|
504
|
+
{}
|
|
505
|
+
else
|
|
506
|
+
Hash[vm_description_lines[hpc_marker_idx + 1..-1].map do |line|
|
|
507
|
+
property, value = line.split(': ')
|
|
508
|
+
[property.to_sym, value]
|
|
509
|
+
end]
|
|
510
|
+
end
|
|
511
|
+
end
|
|
512
|
+
|
|
513
|
+
# Count the number of VMs handled by us currently existing.
|
|
514
|
+
#
|
|
515
|
+
# Result::
|
|
516
|
+
# * Integer: Number of VMs handled by us
|
|
517
|
+
def nbr_vms_handled_by_us
|
|
518
|
+
@config['pve_nodes'].map do |pve_node|
|
|
519
|
+
api_get("nodes/#{pve_node}/lxc").select { |lxc_info| Integer(lxc_info['vmid']).between?(*@config['vm_ids_range']) }.size
|
|
520
|
+
end.sum
|
|
521
|
+
end
|
|
522
|
+
|
|
523
|
+
# Reserve resources for a new container on a PVE node, and assign a new VM ID and IP to it.
|
|
524
|
+
# Prerequisites:
|
|
525
|
+
# * This method should be called in a #start block
|
|
526
|
+
#
|
|
527
|
+
# Parameters::
|
|
528
|
+
# * *pve_node* (String): Node on which we reserve the resources.
|
|
529
|
+
# * *nbr_cpus* (Integer): Wanted CPUs
|
|
530
|
+
# * *ram_mb* (Integer): Wanted MB of RAM
|
|
531
|
+
# * *disk_gb* (Integer): Wanted GB of disk
|
|
532
|
+
# Result::
|
|
533
|
+
# * [Integer, String] or Symbol: Reserved resource info ([vm_id, ip]), or Symbol in case of error.
|
|
534
|
+
# Possible error codes returned are:
|
|
535
|
+
# * *no_available_ip*: There is no available IP to be reserved
|
|
536
|
+
# * *no_available_vm_id*: There is no available VM ID to be reserved
|
|
537
|
+
def reserve_on(pve_node, nbr_cpus, ram_mb, disk_gb)
|
|
538
|
+
# We select a new VM ID and VM IP.
|
|
539
|
+
selected_vm_ip = free_ips.first
|
|
540
|
+
if selected_vm_ip.nil?
|
|
541
|
+
# No available IP for now.
|
|
542
|
+
:no_available_ip
|
|
543
|
+
else
|
|
544
|
+
selected_vm_id = free_vm_ids.first
|
|
545
|
+
if selected_vm_id.nil?
|
|
546
|
+
# No available ID for now.
|
|
547
|
+
:no_available_vm_id
|
|
548
|
+
else
|
|
549
|
+
# Success
|
|
550
|
+
log "[ #{pve_node}/#{selected_vm_id} ] - New LXC container reserved with IP #{selected_vm_ip}"
|
|
551
|
+
[selected_vm_id, selected_vm_ip]
|
|
552
|
+
end
|
|
553
|
+
end
|
|
554
|
+
end
|
|
555
|
+
|
|
556
|
+
# Destroy expired VMs on a PVE node.
|
|
557
|
+
# Only consider VMs that fall in the config VM ID range and are expired.
|
|
558
|
+
#
|
|
559
|
+
# Parameters::
|
|
560
|
+
# * *pve_node* (String): PVE node to delete expired VMs from.
|
|
561
|
+
def destroy_expired_vms_on(pve_node)
|
|
562
|
+
api_get("nodes/#{pve_node}/lxc").each do |lxc_info|
|
|
563
|
+
vm_id = Integer(lxc_info['vmid'])
|
|
564
|
+
destroy_vm_on(pve_node, vm_id) if is_vm_expired?(pve_node, vm_id)
|
|
565
|
+
end
|
|
566
|
+
# Invalidate the API cache for anything related to this PVE node
|
|
567
|
+
pve_node_paths_regexp = /^nodes\/#{Regexp.escape(pve_node)}\/.+$/
|
|
568
|
+
@gets_cache.delete_if { |path, _result| path =~ pve_node_paths_regexp }
|
|
569
|
+
end
|
|
570
|
+
|
|
571
|
+
# Destroy a VM on a PVE node.
|
|
572
|
+
# Stop it if needed before destroy.
|
|
573
|
+
#
|
|
574
|
+
# Parameters::
|
|
575
|
+
# * *pve_node* (String): PVE node hosting the VM
|
|
576
|
+
# * *vm_id* (Integer): The VM ID to destroy
|
|
577
|
+
def destroy_vm_on(pve_node, vm_id)
|
|
578
|
+
if vm_state(pve_node, vm_id) == 'running'
|
|
579
|
+
log "[ #{pve_node}/#{vm_id} ] - Stop LXC container"
|
|
580
|
+
wait_for_proxmox_task(pve_node, @proxmox.post("nodes/#{pve_node}/lxc/#{vm_id}/status/stop"))
|
|
581
|
+
end
|
|
582
|
+
log "[ #{pve_node}/#{vm_id} ] - Destroy LXC container"
|
|
583
|
+
wait_for_proxmox_task(pve_node, @proxmox.delete("nodes/#{pve_node}/lxc/#{vm_id}"))
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
# Return the list of available IPs
|
|
587
|
+
#
|
|
588
|
+
# Result::
|
|
589
|
+
# * Array<String>: List of available IPs
|
|
590
|
+
def free_ips
|
|
591
|
+
# Consider all nodes and all IPs to ensure we won't create any conflict, even outside our allowed range
|
|
592
|
+
@config['vm_ips_list'] -
|
|
593
|
+
api_get('nodes').map do |pve_node_info|
|
|
594
|
+
pve_node = pve_node_info['node']
|
|
595
|
+
api_get("nodes/#{pve_node}/lxc").map do |lxc_info|
|
|
596
|
+
ip_of(pve_node, Integer(lxc_info['vmid']))
|
|
597
|
+
end.compact
|
|
598
|
+
end.flatten
|
|
599
|
+
end
|
|
600
|
+
|
|
601
|
+
# Return the list of available VM IDs
|
|
602
|
+
#
|
|
603
|
+
# Result::
|
|
604
|
+
# * Array<Integer>: List of available VM IDs
|
|
605
|
+
def free_vm_ids
|
|
606
|
+
Range.new(*@config['vm_ids_range']).to_a -
|
|
607
|
+
api_get('nodes').map do |pve_node_info|
|
|
608
|
+
api_get("nodes/#{pve_node_info['node']}/lxc").map { |lxc_info| Integer(lxc_info['vmid']) }
|
|
609
|
+
end.flatten
|
|
610
|
+
end
|
|
611
|
+
|
|
612
|
+
# Wait for a given Proxmox task completion
|
|
613
|
+
#
|
|
614
|
+
# Parameters::
|
|
615
|
+
# * *pve_node* (String): The PVE node on which the task is run
|
|
616
|
+
# * *task* (String): The task ID
|
|
617
|
+
def wait_for_proxmox_task(pve_node, task)
|
|
618
|
+
raise "Invalid task: #{task}" if task[0..3] == 'NOK:'
|
|
619
|
+
while task_status(pve_node, task) == 'running'
|
|
620
|
+
log "[ #{pve_node} ] - Wait for Proxmox task #{task} to complete..."
|
|
621
|
+
sleep 1
|
|
622
|
+
end
|
|
623
|
+
log "[ #{pve_node} ] - Proxmox task #{task} completed."
|
|
624
|
+
end
|
|
625
|
+
|
|
626
|
+
# Get task status
|
|
627
|
+
#
|
|
628
|
+
# Parameters::
|
|
629
|
+
# * *pve_node* (String): Node on which the task status is to be queried
|
|
630
|
+
# * *task* (String): Task ID to query
|
|
631
|
+
# Result::
|
|
632
|
+
# * String: The task status
|
|
633
|
+
def task_status(pve_node, task)
|
|
634
|
+
status_info = @proxmox.get("nodes/#{pve_node}/tasks/#{task}/status")
|
|
635
|
+
"#{status_info['status']}#{status_info['exitstatus'] ? ":#{status_info['exitstatus']}" : ''}"
|
|
636
|
+
end
|
|
637
|
+
|
|
638
|
+
# Get a path from the API it returns its JSON result.
|
|
639
|
+
# Keep a cache of it, whose lifespan is this ProxmoxWaiter instance.
|
|
640
|
+
#
|
|
641
|
+
# Parameters::
|
|
642
|
+
# * *path* (String): API path to query
|
|
643
|
+
def api_get(path)
|
|
644
|
+
@gets_cache[path] = @proxmox.get(path) unless @gets_cache.key?(path)
|
|
645
|
+
@gets_cache[path]
|
|
646
|
+
end
|
|
647
|
+
|
|
648
|
+
# Get the state of a VM
|
|
649
|
+
#
|
|
650
|
+
# Parameters::
|
|
651
|
+
# * *pve_node* (String): The PVE node having the container
|
|
652
|
+
# * *vm_id* (Integer): The VM ID
|
|
653
|
+
# Result::
|
|
654
|
+
# * String: The state
|
|
655
|
+
def vm_state(pve_node, vm_id)
|
|
656
|
+
api_get("nodes/#{pve_node}/lxc/#{vm_id}/status/current")['status']
|
|
657
|
+
end
|
|
658
|
+
|
|
659
|
+
# Timeout in seconds before giving up on a lock
|
|
660
|
+
LOCK_TIMEOUT = 30
|
|
661
|
+
|
|
662
|
+
# Get the IP address of a given LXC container
|
|
663
|
+
#
|
|
664
|
+
# Parameters::
|
|
665
|
+
# * *pve_node* (String): The PVE node having the container
|
|
666
|
+
# * *vm_id* (Integer): The VM ID
|
|
667
|
+
# Result::
|
|
668
|
+
# * String or nil: The corresponding IP address, or nil if not found (could be that the container has disappeared, as this method is used also for containers not part of our sync node)
|
|
669
|
+
def ip_of(pve_node, vm_id)
|
|
670
|
+
ip_found = nil
|
|
671
|
+
config_path = "nodes/#{pve_node}/lxc/#{vm_id}/config"
|
|
672
|
+
lxc_config = nil
|
|
673
|
+
begin_time = Time.now
|
|
674
|
+
loop do
|
|
675
|
+
lxc_config = api_get(config_path)
|
|
676
|
+
if lxc_config.is_a?(String)
|
|
677
|
+
log "[ #{pve_node}/#{vm_id} ] - Error while checking its config: #{lxc_config}. Might be that the VM has disappeared."
|
|
678
|
+
lxc_config = { 'lock' => "Error: #{lxc_config}" }
|
|
679
|
+
elsif lxc_config.key?('lock')
|
|
680
|
+
# The node is currently doing some task. Wait for the lock to be released.
|
|
681
|
+
log "[ #{pve_node}/#{vm_id} ] - Node is being locked (reason: #{lxc_config['lock']}). Wait for the lock to be released..."
|
|
682
|
+
sleep 1
|
|
683
|
+
else
|
|
684
|
+
break
|
|
685
|
+
end
|
|
686
|
+
# Make sure we don't cache the error or the lock
|
|
687
|
+
@gets_cache.delete(config_path)
|
|
688
|
+
if Time.now - begin_time > LOCK_TIMEOUT
|
|
689
|
+
log "[ #{pve_node}/#{vm_id} ] - !!! Timeout while waiting for to be unlocked (reason: #{lxc_config['lock']})."
|
|
690
|
+
break
|
|
691
|
+
end
|
|
692
|
+
end
|
|
693
|
+
if lxc_config['net0'].nil?
|
|
694
|
+
log "[ #{pve_node}/#{vm_id} ] - !!! Config does not contain net0 information: #{lxc_config}"
|
|
695
|
+
else
|
|
696
|
+
lxc_config['net0'].split(',').each do |net_info|
|
|
697
|
+
property, value = net_info.split('=')
|
|
698
|
+
if property == 'ip'
|
|
699
|
+
ip_found = value.split('/').first
|
|
700
|
+
break
|
|
701
|
+
end
|
|
702
|
+
end
|
|
703
|
+
end
|
|
704
|
+
ip_found
|
|
705
|
+
end
|
|
706
|
+
|
|
707
|
+
end
|