RubyGems - hybrid_platforms_conductor - Versions diffs - 32.7.2 → 32.9.0 - Mend

hybrid_platforms_conductor 32.7.2 → 32.9.0

Files changed (20) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3abcfb2c500d444ed5ea2486bda7150aa451986c502cafdf68afdd802e0af866
-  data.tar.gz: 06cf63f0c1e5a187ca53e259133f7e14e4e73d6b4e2d7a3b79180b4797ffaf82
+  metadata.gz: 378b6da76cab8e20f60c0517f7bbef90933b5d573069437ee16d913ca81c0689
+  data.tar.gz: 1924dd81e2740b50ee55758271282729d6df1fd6a45b555f96c321d1b40e54e3
 SHA512:
-  metadata.gz: f3c4891e7add29c7b424256888fb959967f5b7ab428bb4564aecab19a2de0c34f7dc4d05cf232cea944a4aa4008bd5a541a2a90786e2d4cf2fdf7750eb439b12
-  data.tar.gz: 254f4f7c9c3e9638be1632cd27d7eb0aa296a6be5b13167e1926ab0de52bfb9f8490c6b6251bef7cc20876f686fe161d67fb48bace985e743ee80f52359e3093
+  metadata.gz: f8a72778375c154e42829a430bec151a7553b79874d1a878e129d80eed22b0f77f094751ee370c84441e7ed658d9e7b87e80c8f950ba64b491e1e62eec1f4a2f
+  data.tar.gz: e437a3f4871bde44d6bcbcd8a588dd83ff33c0374cee3769eada500a3acf04606e37144f98d2a49aaf69e0ff8cdd935d3eaa396fea5bb19ac485184df883db24

data/lib/hybrid_platforms_conductor/deployer.rb CHANGED Viewed

@@ -311,13 +311,24 @@ module HybridPlatformsConductor
           environment: environment,
           logger: @logger,
           logger_stderr: @logger_stderr,
-          config: @config,
+          config: sub_executable.config,
           cmd_runner: @cmd_runner,
           # Here we use the NodesHandler that will be bound to the sub-Deployer only, as the node's metadata might be modified by the Provisioner.
           nodes_handler: sub_executable.nodes_handler,
           actions_executor: @actions_executor
         )
         instance.with_running_instance(stop_on_exit: true, destroy_on_exit: !reuse_instance, port: 22) do
+          # Test-provisioned nodes have SSH Session Exec capabilities
+          sub_executable.nodes_handler.override_metadata_of node, :ssh_session_exec, 'true'
+          # Test-provisioned nodes use default sudo
+          sub_executable.config.sudo_procs.replace(sub_executable.config.sudo_procs.map do |sudo_proc_info|
+            {
+              nodes_selectors_stack: sudo_proc_info[:nodes_selectors_stack].map do |nodes_selector|
+                @nodes_handler.select_nodes(nodes_selector).select { |selected_node| selected_node != node }
+              end,
+              sudo_proc: sudo_proc_info[:sudo_proc]
+            }
+          end)
           actions_executor = sub_executable.actions_executor
           deployer = sub_executable.deployer
           # Setup test environment for this container

data/lib/hybrid_platforms_conductor/hpc_plugins/connector/ssh.rb CHANGED Viewed

@@ -505,15 +505,25 @@ module HybridPlatformsConductor
                       if @nodes_handler.get_ssh_session_exec_of(node) == 'false'
                         # Here we have to create a ControlMaster using an interactive session, as the SSH server prohibits ExecSession, and so command executions.
                         # We'll do that using another terminal spawned in the background.
-                        Thread.new do
-                          log_debug "[ ControlMaster - #{ssh_url} ] - Spawn interactive ControlMaster in separate terminal"
-                          @cmd_runner.run_cmd "xterm -e '#{ssh_exec} -o ControlMaster=yes -o ControlPersist=yes #{ssh_url}'", log_to_stdout: log_debug?
-                          log_debug "[ ControlMaster - #{ssh_url} ] - Separate interactive ControlMaster closed"
+                        if ENV['hpc_interactive'] == 'false'
+                          error = "Can't spawn interactive ControlMaster to #{node} in non-interactive mode. You may want to change the hpc_interactive env variable."
+                          if no_exception
+                            log_error error
+                            exit_status = :non_interactive
+                          else
+                            raise error
+                          end
+                        else
+                          Thread.new do
+                            log_debug "[ ControlMaster - #{ssh_url} ] - Spawn interactive ControlMaster in separate terminal"
+                            @cmd_runner.run_cmd "xterm -e '#{ssh_exec} -o ControlMaster=yes -o ControlPersist=yes #{ssh_url}'", log_to_stdout: log_debug?
+                            log_debug "[ ControlMaster - #{ssh_url} ] - Separate interactive ControlMaster closed"
+                          end
+                          out 'External ControlMaster has been spawned.'
+                          out 'Please login into it, keep its session opened and press enter here when done...'
+                          $stdin.gets
+                          exit_status = 0
                         end
-                        out 'External ControlMaster has been spawned.'
-                        out 'Please login into it, keep its session opened and press enter here when done...'
-                        $stdin.gets
-                        exit_status = 0
                       else
                         # Create the control master
                         ssh_control_master_start_cmd = "#{ssh_exec}#{@passwords.key?(node) || @auth_password ? '' : ' -o BatchMode=yes'} -o ControlMaster=yes -o ControlPersist=yes #{ssh_url} true"

data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/proxmox.rb CHANGED Viewed

@@ -74,13 +74,13 @@ module HybridPlatformsConductor
           # First check if we already have a test container that corresponds to this node and environment
           @lxc_details = nil
           with_proxmox do |proxmox|
-            proxmox.get('nodes').each do |node_info|
+            proxmox_get(proxmox, 'nodes').each do |node_info|
               if proxmox_test_info[:test_config][:pve_nodes].include?(node_info['node']) && node_info['status'] == 'online'
-                proxmox.get("nodes/#{node_info['node']}/lxc").each do |lxc_info|
+                proxmox_get(proxmox, "nodes/#{node_info['node']}/lxc").each do |lxc_info|
                   vm_id = Integer(lxc_info['vmid'])
                   if vm_id.between?(*proxmox_test_info[:test_config][:vm_ids_range])
                     # Check if the description contains our ID
-                    lxc_config = proxmox.get("nodes/#{node_info['node']}/lxc/#{vm_id}/config")
+                    lxc_config = proxmox_get(proxmox, "nodes/#{node_info['node']}/lxc/#{vm_id}/config")
                     vm_description_lines = (lxc_config['description'] || '').split("\n")
                     hpc_marker_idx = vm_description_lines.index('===== HPC info =====')
                     unless hpc_marker_idx.nil?
@@ -222,8 +222,8 @@ module HybridPlatformsConductor
             with_proxmox do |proxmox|
               vm_id_str = @lxc_details[:vm_id].to_s
               status =
-                if proxmox.get("nodes/#{@lxc_details[:pve_node]}/lxc").any? { |data_info| data_info['vmid'] == vm_id_str }
-                  status_info = proxmox.get("nodes/#{@lxc_details[:pve_node]}/lxc/#{@lxc_details[:vm_id]}/status/current")
+                if proxmox_get(proxmox, "nodes/#{@lxc_details[:pve_node]}/lxc").any? { |data_info| data_info['vmid'] == vm_id_str }
+                  status_info = proxmox_get(proxmox, "nodes/#{@lxc_details[:pve_node]}/lxc/#{@lxc_details[:vm_id]}/status/current")
                   # Careful that it is possible that somebody destroyed the VM and so its status is missing
                   status = status_info.key?('status') ? status_info['status'].to_sym : :missing
                   status = :exited if status == :stopped
@@ -292,11 +292,27 @@ module HybridPlatformsConductor
           end
         end
-        # Maximum number of retries to perform on the Proxmox API.
-        NBR_RETRIES_MAX = 5
-        # Minimum seconds to wait between retries
-        RETRY_WAIT_TIME_SECS = 5
+        # Perform a get operation on the API
+        # Protect the get API methods with a retry mechanism in case of 5xx errors.
+        #
+        # Parameters::
+        # * *proxmox* (Proxmox): The Proxmox instance
+        # * *path* (String): Path to get
+        # Result::
+        # * Object: API response
+        def proxmox_get(proxmox, path)
+          response = nil
+          idx_try = 0
+          loop do
+            response = proxmox.get(path)
+            break if !(response.is_a?(String)) || !(response =~ /^NOK: error code = 5\d\d$/)
+            log_warn "[ #{@node}/#{@environment} ] - Proxmox API call get #{path} returned error #{response} (attempt ##{idx_try}/#{proxmox_test_info[:api_max_retries]})"
+            raise "[ #{@node}/#{@environment} ] - Proxmox API call get #{path} returns #{response} continuously (tried #{idx_try + 1} times)" if idx_try >= proxmox_test_info[:api_max_retries]
+            idx_try += 1
+            sleep proxmox_test_info[:api_wait_between_retries_secs] + rand(5)
+          end
+          response
+        end
         # Run a Proxmox task.
         # Handle a retry mechanism in case of 5xx errors.
@@ -313,11 +329,11 @@ module HybridPlatformsConductor
           while task.nil? do
             task = proxmox.send(http_method, "nodes/#{pve_node}/#{sub_path}", *args)
             if task =~ /^NOK: error code = 5\d\d$/
-              log_warn "[ #{@node}/#{@environment} ] - Proxmox API call #{http_method} nodes/#{pve_node}/#{sub_path} #{args} returned error #{task} (attempt ##{idx_try}/#{NBR_RETRIES_MAX})"
+              log_warn "[ #{@node}/#{@environment} ] - Proxmox API call #{http_method} nodes/#{pve_node}/#{sub_path} #{args} returned error #{task} (attempt ##{idx_try}/#{proxmox_test_info[:api_max_retries]})"
               task = nil
+              break if idx_try >= proxmox_test_info[:api_max_retries]
               idx_try += 1
-              break if idx_try == NBR_RETRIES_MAX
-              sleep RETRY_WAIT_TIME_SECS + rand(5)
+              sleep proxmox_test_info[:api_wait_between_retries_secs] + rand(5)
             end
           end
           if task.nil?
@@ -358,7 +374,7 @@ module HybridPlatformsConductor
         # Result::
         # * String: The task status
         def task_status(proxmox, pve_node, task)
-          status_info = proxmox.get("nodes/#{pve_node}/tasks/#{task}/status")
+          status_info = proxmox_get(proxmox, "nodes/#{pve_node}/tasks/#{task}/status")
           "#{status_info['status']}#{status_info['exitstatus'] ? ":#{status_info['exitstatus']}" : ''}"
         end
@@ -377,7 +393,9 @@ module HybridPlatformsConductor
             (proxmox_test_info[:test_config].merge(
               proxmox_api_url: proxmox_test_info[:api_url],
               futex_file: '/tmp/hpc_proxmox_allocations.futex',
-              logs_dir: '/tmp/hpc_proxmox_waiter_logs'
+              logs_dir: '/tmp/hpc_proxmox_waiter_logs',
+              api_max_retries: proxmox_test_info[:api_max_retries],
+              api_wait_between_retries_secs: proxmox_test_info[:api_wait_between_retries_secs]
             )).to_json
           )
           result = nil
@@ -486,7 +504,7 @@ module HybridPlatformsConductor
         # So remaining length is 255 - 13 = 242 characters.
         MAX_FILE_ID_SIZE = 242
-        # Get an ID unique for theis node/environment and that can be used in file names.
+        # Get an ID unique for this node/environment and that can be used in file names.
         #
         # Result::
         # * String: ID
@@ -506,6 +524,8 @@ module HybridPlatformsConductor
         # Result::
         # * Hash<Symbol,Object>: Configuration of the Proxmox instance to be used:
         #   * *api_url* (String): The Proxmox API URL
+        #   * *api_max_retries* (Integer): Max number of API retries
+        #   * *api_wait_between_retries_secs* (Integer): Number of seconds to wait between API retries
         #   * *sync_node* (String): Node to be used to synchronize Proxmox resources acquisition
         #   * *test_config* (Hash<Symbol,Object>): The test configuration. Check ProxmoxWaiter#initialize (config_file structure) method to get details.
         #   * *vm_config* (Hash<Symbol,Object>): Extra configuration of a created container. Check #request_lxc_creation_for results to get details.

data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/proxmox/proxmox_waiter.rb CHANGED Viewed

@@ -26,6 +26,8 @@ class ProxmoxWaiter
   #   * *proxmox_api_url* (String): Proxmox API URL.
   #   * *futex_file* (String): Path to the file serving as a futex.
   #   * *logs_dir* (String): Path to the directory containing logs [default: '.']
+  #   * *api_max_retries* (Integer): Max number of API retries
+  #   * *api_wait_between_retries_secs* (Integer): Number of seconds to wait between API retries
   #   * *pve_nodes* (Array<String>): List of PVE nodes allowed to spawn new containers [default: all]
   #   * *vm_ips_list* (Array<String>): The list of IPs that are available for the Proxomx containers.
   #   * *vm_ids_range* ([Integer, Integer]): Minimum and maximum reservable VM ID
@@ -637,11 +639,23 @@ class ProxmoxWaiter
   # Get a path from the API it returns its JSON result.
   # Keep a cache of it, whose lifespan is this ProxmoxWaiter instance.
+  # Have a retry mechanism to make sure eventual non-deterministic 5xx errors are not an issue.
   #
   # Parameters::
   # * *path* (String): API path to query
-  def api_get(path)
-    @gets_cache[path] = @proxmox.get(path) unless @gets_cache.key?(path)
+  # Result::
+  # * Object: The API response
+  def api_get(path, nbr_retries: 3, wait_between_retry_secs: 10)
+    unless @gets_cache.key?(path)
+      idx_try = 0
+      loop do
+        @gets_cache[path] = @proxmox.get(path)
+        break unless @gets_cache[path].is_a?(String) && @gets_cache[path] =~ /^NOK: error code = 5\d\d$/
+        raise "Proxmox API get #{path} returns #{@gets_cache[path]} continuously (tried #{idx_try + 1} times)" if idx_try >= @config['api_max_retries']
+        idx_try += 1
+        sleep @config['api_wait_between_retries_secs']
+      end
+    end
     @gets_cache[path]
   end

data/lib/hybrid_platforms_conductor/hpc_plugins/test/check_deploy_and_idempotence.rb CHANGED Viewed

@@ -54,17 +54,19 @@ module HybridPlatformsConductor
                 instance.stop
                 instance.with_running_instance(port: 22) do
-                  # ===== Deploy removes root access
-                  # Check that we can't connect with root
-                  ssh_ok = false
-                  begin
-                    Net::SSH.start(instance.ip, 'root', password: 'root_pwd', auth_methods: ['password'], verify_host_key: :never) do |ssh|
-                      ssh_ok = ssh.exec!('echo Works').strip == 'Works'
+                  unless @nodes_handler.get_root_access_allowed_of(@node) == 'true'
+                    # ===== Deploy removes root access
+                    # Check that we can't connect with root
+                    ssh_ok = false
+                    begin
+                      Net::SSH.start(instance.ip, 'root', password: 'root_pwd', auth_methods: ['password'], verify_host_key: :never) do |ssh|
+                        ssh_ok = ssh.exec!('echo Works').strip == 'Works'
+                      end
+                    rescue
                     end
-                  rescue
+                    assert_equal ssh_ok, false, 'Root can still connect on the image after deployment'
+                    # Even if we can connect using root, run the idempotence test
                   end
-                  assert_equal ssh_ok, false, 'Root can still connect on the image after deployment'
-                  # Even if we can connect using root, run the idempotence test
                   # ===== Idempotence
                   unless ssh_ok

data/lib/hybrid_platforms_conductor/provisioner.rb CHANGED Viewed

@@ -76,6 +76,15 @@ module HybridPlatformsConductor
             # Make sure we update it.
             @nodes_handler.override_metadata_of @node, :host_ip, instance_ip
             @nodes_handler.invalidate_metadata_of @node, :host_keys
+            # Make sure the SSH transformations don't apply to this node
+            @config.ssh_connection_transforms.replace(@config.ssh_connection_transforms.map do |ssh_transform_info|
+              {
+                nodes_selectors_stack: ssh_transform_info[:nodes_selectors_stack].map do |nodes_selector|
+                  @nodes_handler.select_nodes(nodes_selector).select { |selected_node| selected_node != @node }
+                end,
+                transform: ssh_transform_info[:transform]
+              }
+            end)
           end
           wait_for_port!(port) if port
           yield

data/lib/hybrid_platforms_conductor/tests_runner.rb CHANGED Viewed

@@ -428,9 +428,11 @@ module HybridPlatformsConductor
             end
           end
           # Compute the timeout that will be applied, from the max timeout sum for every node that has tests to run
-          timeout = CONNECTION_TIMEOUT + @cmds_to_run.map do |_node, cmds_list|
-            cmds_list.inject(0) { |total_timeout, (_cmd, test_info)| test_info[:timeout] + total_timeout }
-          end.max
+          timeout = CONNECTION_TIMEOUT + (
+            @cmds_to_run.map do |_node, cmds_list|
+              cmds_list.inject(0) { |total_timeout, (_cmd, test_info)| test_info[:timeout] + total_timeout }
+            end.max || 0
+          )
           # Run commands on nodes, in grouped way to avoid too many connections, per node
           # Hash< String, Array<String> >
           @test_cmds = Hash[@cmds_to_run.map do |node, cmds_list|
@@ -464,33 +466,35 @@ module HybridPlatformsConductor
         end,
         test_execution: proc do |test|
           exit_status, stdout, stderr = @actions_result[test.node]
-          if exit_status.is_a?(Symbol)
-            test.error "Error while executing tests: #{exit_status}: #{stderr}"
-          else
-            log_debug <<~EOS
-              ----- Commands for #{test.node}:
-              #{@test_cmds[test.node][:remote_bash].join("\n")}
-              ----- STDOUT:
-              #{stdout}
-              ----- STDERR:
-              #{stderr}
-              -----
-            EOS
-            # Skip the first section, as it can contain SSH banners
-            cmd_stdouts = stdout.split("#{CMD_SEPARATOR}\n")[1..-1]
-            cmd_stdouts = [] if cmd_stdouts.nil?
-            cmd_stderrs = stderr.split("#{CMD_SEPARATOR}\n")[1..-1]
-            cmd_stderrs = [] if cmd_stderrs.nil?
-            @cmds_to_run[test.node].zip(cmd_stdouts, cmd_stderrs).each do |(cmd, test_info), cmd_stdout, cmd_stderr|
-              # Find the section that corresponds to this test
-              if test_info[:test] == test
-                cmd_stdout = '' if cmd_stdout.nil?
-                cmd_stderr = '' if cmd_stderr.nil?
-                stdout_lines = cmd_stdout.split("\n")
-                # Last line of stdout is the return code
-                return_code = stdout_lines.empty? ? :command_cant_run : Integer(stdout_lines.last)
-                test.error "Command '#{cmd}' returned error code #{return_code}", "----- STDOUT:\n#{stdout_lines[0..-2].join("\n")}\n----- STDERR:\n#{cmd_stderr}" unless return_code == 0
-                test_info[:validator].call(stdout_lines[0..-2], cmd_stderr.split("\n"), return_code)
+          unless exit_status.nil?
+            if exit_status.is_a?(Symbol)
+              test.error "Error while executing tests: #{exit_status}: #{stderr}"
+            else
+              log_debug <<~EOS
+                ----- Commands for #{test.node}:
+                #{@test_cmds[test.node][:remote_bash].join("\n")}
+                ----- STDOUT:
+                #{stdout}
+                ----- STDERR:
+                #{stderr}
+                -----
+              EOS
+              # Skip the first section, as it can contain SSH banners
+              cmd_stdouts = stdout.split("#{CMD_SEPARATOR}\n")[1..-1]
+              cmd_stdouts = [] if cmd_stdouts.nil?
+              cmd_stderrs = stderr.split("#{CMD_SEPARATOR}\n")[1..-1]
+              cmd_stderrs = [] if cmd_stderrs.nil?
+              @cmds_to_run[test.node].zip(cmd_stdouts, cmd_stderrs).each do |(cmd, test_info), cmd_stdout, cmd_stderr|
+                # Find the section that corresponds to this test
+                if test_info[:test] == test
+                  cmd_stdout = '' if cmd_stdout.nil?
+                  cmd_stderr = '' if cmd_stderr.nil?
+                  stdout_lines = cmd_stdout.split("\n")
+                  # Last line of stdout is the return code
+                  return_code = stdout_lines.empty? ? :command_cant_run : Integer(stdout_lines.last)
+                  test.error "Command '#{cmd}' returned error code #{return_code}", "----- STDOUT:\n#{stdout_lines[0..-2].join("\n")}\n----- STDERR:\n#{cmd_stderr}" unless return_code == 0
+                  test_info[:validator].call(stdout_lines[0..-2], cmd_stderr.split("\n"), return_code)
+                end
               end
             end
           end

data/lib/hybrid_platforms_conductor/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module HybridPlatformsConductor
-  VERSION = '32.7.2'
+  VERSION = '32.9.0'
 end

data/spec/hybrid_platforms_conductor_test.rb CHANGED Viewed

@@ -94,6 +94,7 @@ module HybridPlatformsConductorTest
         ENV.delete 'hpc_password_for_thycotic'
         ENV.delete 'hpc_domain_for_thycotic'
         ENV.delete 'hpc_certificates'
+        ENV.delete 'hpc_interactive'
         # Set the necessary Hybrid Platforms Conductor environment variables
         ENV['hpc_ssh_user'] = 'test_user'
         HybridPlatformsConductor::ServicesHandler.packaged_deployments.clear

data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/connections_spec.rb CHANGED Viewed

@@ -44,6 +44,58 @@ describe HybridPlatformsConductor::ActionsExecutor do
         end
       end
+      it 'can\'t create an SSH master to 1 node not having Session Exec capabilities when hpc_interactive is false' do
+        with_test_platform(nodes: { 'node' => { meta: { host_ip: '192.168.42.42', ssh_session_exec: 'false' } } }) do
+          ENV['hpc_interactive'] = 'false'
+          with_cmd_runner_mocked(
+            [
+              ['which env', proc { [0, "/usr/bin/env\n", ''] }],
+              ['ssh -V 2>&1', proc { [0, "OpenSSH_7.4p1 Debian-10+deb9u7, OpenSSL 1.0.2u  20 Dec 2019\n", ''] }]
+            ] + ssh_expected_commands_for(
+              { 'node' => { connection: '192.168.42.42', user: 'test_user' } },
+              with_control_master_create: false,
+              with_control_master_destroy: false
+            )
+          ) do
+            test_connector.ssh_user = 'test_user'
+            expect do
+              test_connector.with_connection_to(['node']) do
+              end
+            end.to raise_error 'Can\'t spawn interactive ControlMaster to node in non-interactive mode. You may want to change the hpc_interactive env variable.'
+          end
+        end
+      end
+      it 'fails without creating exception when creating an SSH master to 1 node not having Session Exec capabilities when hpc_interactive is false and we use no_exception' do
+        with_test_platform(nodes: {
+          'node1' => { meta: { host_ip: '192.168.42.1' } },
+          'node2' => { meta: { host_ip: '192.168.42.2', ssh_session_exec: 'false' } },
+          'node3' => { meta: { host_ip: '192.168.42.3' } }
+        }) do
+          ENV['hpc_interactive'] = 'false'
+          with_cmd_runner_mocked(
+            [
+              ['which env', proc { [0, "/usr/bin/env\n", ''] }],
+              ['ssh -V 2>&1', proc { [0, "OpenSSH_7.4p1 Debian-10+deb9u7, OpenSSL 1.0.2u  20 Dec 2019\n", ''] }]
+            ] + ssh_expected_commands_for(
+              'node1' => { connection: '192.168.42.1', user: 'test_user' },
+              'node3' => { connection: '192.168.42.3', user: 'test_user' }
+            ) + ssh_expected_commands_for(
+              {
+                'node2' => { connection: '192.168.42.2', user: 'test_user' }
+              },
+              with_control_master_create: false,
+              with_control_master_destroy: false
+            )
+          ) do
+            test_connector.ssh_user = 'test_user'
+            test_connector.with_connection_to(%w[node1 node2 node3], no_exception: true) do |connected_nodes|
+              expect(connected_nodes.sort).to eq %w[node1 node3].sort
+            end
+          end
+        end
+      end
       it 'creates SSH master to several nodes' do
         with_test_platform(nodes: {
           'node1' => { meta: { host_ip: '192.168.42.1' } },
@@ -123,6 +175,8 @@ describe HybridPlatformsConductor::ActionsExecutor do
                 'node3' => { connection: '192.168.42.3', user: 'test_user' }
               },
               # Here the threads for node1's and node3's ControlMasters might not trigger before the one for node2, so they will not destroy it.
+              # Sometimes they don't even have time to create the Control Masters that node2 has already failed.
+              with_control_master_create_optional: true,
               with_control_master_destroy_optional: true
             ) + ssh_expected_commands_for(
               {

data/spec/hybrid_platforms_conductor_test/api/deployer/provisioner_spec.rb CHANGED Viewed

@@ -17,8 +17,8 @@ describe HybridPlatformsConductor::Deployer do
           block.call
         end
         provisioner = nil
-        test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |test_deployer, test_instance|
-          expect(test_deployer.local_environment).to eq true
+        test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
+          expect(sub_test_deployer.local_environment).to eq true
           provisioner = test_instance
           expect(test_instance.node).to eq 'node'
           expect(test_instance.environment).to match /^#{Regexp.escape(`whoami`.strip)}_hpc_testing_provisioner_\d+_\d+_\w+$/
@@ -40,8 +40,8 @@ describe HybridPlatformsConductor::Deployer do
           block.call
         end
         provisioner = nil
-        test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |test_deployer, test_instance|
-          expect(test_deployer.local_environment).to eq true
+        test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
+          expect(sub_test_deployer.local_environment).to eq true
           provisioner = test_instance
           expect(test_instance.node).to eq 'node'
           expect(test_instance.environment).to match /^#{Regexp.escape(`whoami`.strip)}_hpc_testing_provisioner_\d+_\d+_\w+$/
@@ -50,6 +50,70 @@ describe HybridPlatformsConductor::Deployer do
       end
     end
+    it 'gives a new test instance ready to be used in place of the node without SSH transformations' do
+      with_test_platform(
+        {
+          nodes: {
+            'node1' => { meta: { host_ip: '192.168.42.1', ssh_session_exec: 'false' } },
+            'node2' => { meta: { host_ip: '192.168.42.2', ssh_session_exec: 'false' } }
+          }
+        },
+        false,
+        '
+          for_nodes(%w[node1 node2]) do
+            transform_ssh_connection do |node, connection, connection_user, gateway, gateway_user|
+              ["#{connection}_#{node}", "#{connection_user}_#{node}", "#{gateway}_#{node}", "#{gateway_user}_#{node}"]
+            end
+          end
+        '
+      ) do |repository|
+        register_plugins(:provisioner, { test_provisioner: HybridPlatformsConductorTest::TestProvisioner })
+        File.write("#{test_config.hybrid_platforms_dir}/dummy_secrets.json", '{}')
+        HybridPlatformsConductorTest::TestProvisioner.mocked_states = %i[created created running exited]
+        HybridPlatformsConductorTest::TestProvisioner.mocked_ip = '172.17.0.1'
+        expect(Socket).to receive(:tcp).with('172.17.0.1', 22, { connect_timeout: 1 }) do |&block|
+          block.call
+        end
+        test_deployer.with_test_provisioned_instance(:test_provisioner, 'node1', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
+          expect(sub_test_deployer.instance_eval { @nodes_handler.get_ssh_session_exec_of('node1') }).to eq 'true'
+          expect(sub_test_deployer.instance_eval { @nodes_handler.get_ssh_session_exec_of('node2') }).to eq 'false'
+          ssh_transforms = test_instance.instance_eval { @config.ssh_connection_transforms }
+          expect(ssh_transforms.size).to eq 1
+          expect(ssh_transforms[0][:nodes_selectors_stack]).to eq [%w[node2]]
+        end
+      end
+    end
+    it 'gives a new test instance ready to be used in place of the node without sudo specificities' do
+      with_test_platform(
+        {
+          nodes: {
+            'node1' => { meta: { host_ip: '192.168.42.1' } },
+            'node2' => { meta: { host_ip: '192.168.42.2' } }
+          }
+        },
+        false,
+        '
+          for_nodes(%w[node1 node2]) do
+            sudo_for { |user| "other_sudo --user #{user}" }
+          end
+        '
+      ) do |repository|
+        register_plugins(:provisioner, { test_provisioner: HybridPlatformsConductorTest::TestProvisioner })
+        File.write("#{test_config.hybrid_platforms_dir}/dummy_secrets.json", '{}')
+        HybridPlatformsConductorTest::TestProvisioner.mocked_states = %i[created created running exited]
+        HybridPlatformsConductorTest::TestProvisioner.mocked_ip = '172.17.0.1'
+        expect(Socket).to receive(:tcp).with('172.17.0.1', 22, { connect_timeout: 1 }) do |&block|
+          block.call
+        end
+        test_deployer.with_test_provisioned_instance(:test_provisioner, 'node1', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
+          sudo_procs = test_instance.instance_eval { @config.sudo_procs }
+          expect(sudo_procs.size).to eq 1
+          expect(sudo_procs[0][:nodes_selectors_stack]).to eq [%w[node2]]
+        end
+      end
+    end
     it 'does not destroy instances when asked to reuse' do
       with_test_platform(
         nodes: { 'node' => { meta: { host_ip: '192.168.42.42' } } }
@@ -62,8 +126,8 @@ describe HybridPlatformsConductor::Deployer do
           block.call
         end
         provisioner = nil
-        test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner', reuse_instance: true) do |test_deployer, test_instance|
-          expect(test_deployer.local_environment).to eq true
+        test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner', reuse_instance: true) do |sub_test_deployer, test_instance|
+          expect(sub_test_deployer.local_environment).to eq true
           provisioner = test_instance
           expect(test_instance.node).to eq 'node'
           expect(test_instance.environment).to eq "#{`whoami`.strip}_hpc_testing_provisioner"
@@ -84,8 +148,8 @@ describe HybridPlatformsConductor::Deployer do
           block.call
         end
         provisioner = nil
-        test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner', reuse_instance: true) do |test_deployer, test_instance|
-          expect(test_deployer.local_environment).to eq true
+        test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner', reuse_instance: true) do |sub_test_deployer, test_instance|
+          expect(sub_test_deployer.local_environment).to eq true
           provisioner = test_instance
           expect(test_instance.node).to eq 'node'
           expect(test_instance.environment).to eq "#{`whoami`.strip}_hpc_testing_provisioner"
@@ -102,7 +166,7 @@ describe HybridPlatformsConductor::Deployer do
         File.write("#{test_config.hybrid_platforms_dir}/dummy_secrets.json", '{}')
         HybridPlatformsConductorTest::TestProvisioner.mocked_states = %i[created created created exited exited]
         expect do
-          test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |test_deployer, test_instance|
+          test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
           end
         end.to raise_error /\[ node\/#{Regexp.escape(`whoami`.strip)}_hpc_testing_provisioner_\d+_\d+_\w+ \] - Instance fails to be in a state among \(running\) with timeout 1\. Currently in state exited/
       end
@@ -120,7 +184,7 @@ describe HybridPlatformsConductor::Deployer do
           raise Errno::ETIMEDOUT, 'Timeout while reading from port 22'
         end
         expect do
-          test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |test_deployer, test_instance|
+          test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
           end
         end.to raise_error /\[ node\/#{Regexp.escape(`whoami`.strip)}_hpc_testing_provisioner_\d+_\d+_\w+ \] - Instance fails to have port 22 opened with timeout 1\./
       end

data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/retries_spec.rb CHANGED Viewed

@@ -28,6 +28,26 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
         end
       end
+      it 'retries a few times before ending in error for a 5xx API error' do
+        with_sync_node do
+          mock_proxmox(mocked_pve_nodes: [{ 'pve_node_name' => { error_strings: ['NOK: error code = 500'] * 5 } }])
+          result = call_reserve_proxmox_container(2, 1024, 4, config: { api_max_retries: 4 })
+          expect(result[:error]).not_to eq nil
+          expect(result[:error]).to match /Unhandled exception from reserve_proxmox_container: Proxmox API get nodes\/pve_node_name\/lxc returns NOK: error code = 500 continuously \(tried 5 times\)/
+        end
+      end
+      it 'retries API errors a few times until it gets resolved' do
+        with_sync_node do
+          mock_proxmox(mocked_pve_nodes: [{ 'pve_node_name' => { error_strings: ['NOK: error code = 500'] * 3 } }])
+          expect(call_reserve_proxmox_container(2, 1024, 4, config: { api_max_retries: 4 })).to eq(
+            pve_node: 'pve_node_name',
+            vm_id: 1000,
+            vm_ip: '192.168.0.100'
+          )
+        end
+      end
     end
   end

data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/start_spec.rb CHANGED Viewed

@@ -39,16 +39,7 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
           mock_proxmox_to_start_node(nbr_api_errors: 3)
         ]
         instance.create
-        # To speed up the test, alter the wait time between retries.
-        old_wait_secs = HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_get(:RETRY_WAIT_TIME_SECS)
-        begin
-          HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
-          HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, 1)
-          instance.start
-        ensure
-          HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
-          HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, old_wait_secs)
-        end
+        instance.start
       end
     end
@@ -58,19 +49,10 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
           # 1 - The info on existing containers
           mock_proxmox_to_get_nodes_info,
           # 2 - The start of the container - fail too many times
-          mock_proxmox_to_start_node(nbr_api_errors: HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_get(:NBR_RETRIES_MAX), task_status: nil)
+          mock_proxmox_to_start_node(nbr_api_errors: 4, task_status: nil)
         ]
         instance.create
-        # To speed up the test, alter the wait time between retries.
-        old_wait_secs = HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_get(:RETRY_WAIT_TIME_SECS)
-        begin
-          HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
-          HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, 1)
-          expect { instance.start }.to raise_error '[ node/test ] - Proxmox API call post nodes/pve_node_name/lxc/1024/status/start [] is constantly failing. Giving up.'
-        ensure
-          HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
-          HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, old_wait_secs)
-        end
+        expect { instance.start }.to raise_error '[ node/test ] - Proxmox API call post nodes/pve_node_name/lxc/1024/status/start [] is constantly failing. Giving up.'
       end
     end

data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/state_spec.rb CHANGED Viewed

@@ -23,6 +23,46 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
       end
     end
+    it '' do
+      with_test_proxmox_platform do |instance|
+        mock_proxmox_calls_with [
+          # 1 - The info on existing containers
+          mock_proxmox_to_get_nodes_info,
+          # 2 - The start of the container - fail a few times
+          mock_proxmox_to_start_node(nbr_api_errors: 2)
+        ]
+        instance.create
+        instance.start
+      end
+    end
+    it 'retries calls to the API when getting back errors 5xx' do
+      with_test_proxmox_platform do |instance|
+        mock_proxmox_calls_with [
+          # 1 - The info on existing containers
+          mock_proxmox_to_get_nodes_info,
+          # 2 - The status of the container
+          mock_proxmox_to_status_node(nbr_api_errors: 3)
+        ]
+        instance.create
+        expect(instance.state).to eq :created
+      end
+    end
+    it 'fails to get an instance\'s status when the Proxmox API fails too many times' do
+      with_test_proxmox_platform do |instance|
+        mock_proxmox_calls_with [
+          # 1 - The info on existing containers
+          mock_proxmox_to_get_nodes_info,
+          # 2 - The status of the container
+          mock_proxmox_to_status_node(nbr_api_errors: 4, status: nil)
+        ]
+        instance.create
+        expect { instance.state }.to raise_error '[ node/test ] - Proxmox API call get nodes/pve_node_name/lxc returns NOK: error code = 500 continuously (tried 4 times)'
+      end
+    end
   end
 end

data/spec/hybrid_platforms_conductor_test/api/tests_runner/node_ssh_spec.rb CHANGED Viewed

@@ -69,7 +69,7 @@ describe HybridPlatformsConductor::TestsRunner do
           'node12' => { 'test_node12.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node12', stdout, stderr, exit_code] } },
           'node21' => { 'test_node21.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node21', stdout, stderr, exit_code] } },
           'node22' => { 'test_node22.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node22', stdout, stderr, exit_code] } }
-        }}
+        } }
         expect(test_tests_runner.run_tests([{ all: true }])).to eq 0
         expect(ssh_executions.sort).to eq [
           ['node11', ['stdout11'], ['stderr11'], 0],
@@ -88,7 +88,7 @@ describe HybridPlatformsConductor::TestsRunner do
         HybridPlatformsConductorTest::TestPlugins::NodeSsh.node_tests = { node_ssh_test: {
           'node12' => { 'test_node12.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node12', stdout, stderr, exit_code] } },
           'node22' => { 'test_node22.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node22', stdout, stderr, exit_code] } }
-        }}
+        } }
         expect(test_tests_runner.run_tests(%w[node12 node22])).to eq 0
         expect(ssh_executions.sort).to eq [
           ['node12', ['stdout12'], ['stderr12'], 0],
@@ -97,6 +97,19 @@ describe HybridPlatformsConductor::TestsRunner do
       end
     end
+    it 'does not execute anything when the tests report no command' do
+      with_test_platform_for_node_connection_tests do
+        test_tests_runner.tests = [:node_ssh_test]
+        ssh_executions = []
+        HybridPlatformsConductorTest::TestPlugins::NodeSsh.node_tests = { node_ssh_test: {
+          'node12' => {},
+          'node22' => {}
+        } }
+        expect(test_tests_runner.run_tests(%w[node12 node22])).to eq 0
+        expect(ssh_executions).to eq []
+      end
+    end
     it 'executes several SSH node tests once per node with the correct command, grouping commands' do
       with_test_platform_for_node_connection_tests do
         expect_actions_executor_runs([proc do |actions|

data/spec/hybrid_platforms_conductor_test/helpers/cmd_runner_helpers.rb CHANGED Viewed

@@ -89,7 +89,11 @@ module HybridPlatformsConductorTest
           remaining_expected_commands.select do |(_expected_command, _command_code, options)|
             !options[:optional]
           end
-        ).to eq([]), "Expected CmdRunner commands were not run:\n#{remaining_expected_commands.map(&:first).join("\n")}"
+        ).to eq([]), "Expected CmdRunner commands were not run:\n#{
+          remaining_expected_commands.map do |(expected_command, _command_code, options)|
+            "#{options[:optional] ? '[Optional] ' : ''}#{expected_command}"
+          end.join("\n")
+        }"
         # Un-mock the command runner
         allow(cmd_runner).to receive(:run_cmd).and_call_original
       end

data/spec/hybrid_platforms_conductor_test/helpers/connector_ssh_helpers.rb CHANGED Viewed

@@ -15,6 +15,7 @@ module HybridPlatformsConductorTest
       #   * *times* (Integer): Number of times this connection should be used [default: 1]
       #   * *control_master_create_error* (String or nil): Error to simulate during the SSH ControlMaster creation, or nil for none [default: nil]
       # * *with_control_master_create* (Boolean): Do we create the control master? [default: true]
+      # * *with_control_master_create_optional* (Boolean): If true, then consider the ControlMaster creation to be optional [default: false]
       # * *with_control_master_check* (Boolean): Do we check the control master? [default: false]
       # * *with_control_master_destroy* (Boolean): Do we destroy the control master? [default: true]
       # * *with_control_master_destroy_optional* (Boolean): If true, then consider the ControlMaster destruction to be optional [default: false]
@@ -26,6 +27,7 @@ module HybridPlatformsConductorTest
       def ssh_expected_commands_for(
         nodes_connections,
         with_control_master_create: true,
+        with_control_master_create_optional: false,
         with_control_master_check: false,
         with_control_master_destroy: true,
         with_control_master_destroy_optional: false,
@@ -52,13 +54,15 @@ module HybridPlatformsConductorTest
               if with_session_exec
                 /^.+\/ssh #{with_batch_mode ? '-o BatchMode=yes ' : ''}-o ControlMaster=yes -o ControlPersist=yes hpc\.#{Regexp.escape(node)} true$/
               else
-                # Mock the user hitting enter as the Control Master will be created in another thread and the main thread waits for user input.
-                expect($stdin).to receive(:gets) do
-                  # We have to wait for the Control Master creation thread to actually create the Control Master before hitting Enter.
-                  while !control_master_created do
-                    sleep 0.1
+                unless ENV['hpc_interactive'] == 'false'
+                  # Mock the user hitting enter as the Control Master will be created in another thread and the main thread waits for user input.
+                  expect($stdin).to receive(:gets) do
+                    # We have to wait for the Control Master creation thread to actually create the Control Master before hitting Enter.
+                    while !control_master_created do
+                      sleep 0.1
+                    end
+                    "\n"
                   end
-                  "\n"
                 end
                 /^xterm -e '.+\/ssh -o ControlMaster=yes -o ControlPersist=yes hpc\.#{Regexp.escape(node)}'$/
               end,
@@ -78,7 +82,8 @@ module HybridPlatformsConductorTest
                 else
                   [255, '', node_connection_info[:control_master_create_error]]
                 end
-              end
+              end,
+              { optional: with_control_master_create_optional }
             ]
           end
           if with_control_master_check

data/spec/hybrid_platforms_conductor_test/helpers/provisioner_proxmox_helpers.rb CHANGED Viewed

@@ -23,6 +23,8 @@ module HybridPlatformsConductorTest
             test_platform path: '#{repository}'
             proxmox(
               api_url: 'https://my-proxmox.my-domain.com:8006',
+              api_max_retries: 3,
+              api_wait_between_retries_secs: 0,
               sync_node: 'node',
               test_config: {
                 pve_nodes: ['pve_node_name'],
@@ -75,12 +77,20 @@ module HybridPlatformsConductorTest
       # * *proxmox_password* (String or nil): Proxmox password used to connect to Proxmox API [default: nil]
       # * *proxmox_realm* (String or nil): Proxmox realm used to connect to Proxmox API [default: 'pam']
       # * *nodes_info* (Array<Hash>): Nodes info returned by the Proxmox API [default: []]
+      # * *nbr_api_errors* (Integer): Number of API errors 500 to mock before getting a successful query [defaults: 0]
       # * *extra_expects* (Proc or nil): Code called for additional expectations on the proxmox instance, or nil if none [default: nil]
       #   * Parameters::
       #     * *proxmox* (Double): The mocked Proxmox instance
       # Result::
       # * Proc: Code called in place of Proxmox.new. Signature is the same as Proxmox.new.
-      def mock_proxmox_to_get_nodes_info(proxmox_user: nil, proxmox_password: nil, proxmox_realm: 'pam', nodes_info: [], extra_expects: nil)
+      def mock_proxmox_to_get_nodes_info(
+        proxmox_user: nil,
+        proxmox_password: nil,
+        proxmox_realm: 'pam',
+        nodes_info: [],
+        nbr_api_errors: 0,
+        extra_expects: nil
+      )
         proc do |url, pve_node, user, password, realm, options|
           expect(url).to eq 'https://my-proxmox.my-domain.com:8006/api2/json/'
           expect(pve_node).to eq 'my-proxmox'
@@ -97,8 +107,10 @@ module HybridPlatformsConductorTest
             # Nothing
           end
           # Mock checking existing nodes
-          expect(proxmox).to receive(:get).with('nodes') do
-            nodes_info
+          idx_try = 0
+          expect(proxmox).to receive(:get).exactly(nbr_api_errors + 1).times.with('nodes') do
+            idx_try += 1
+            idx_try <= nbr_api_errors ? 'NOK: error code = 500' : nodes_info
           end
           extra_expects.call(proxmox) unless extra_expects.nil?
           proxmox
@@ -243,13 +255,15 @@ module HybridPlatformsConductorTest
       # Parameters::
       # * *proxmox_user* (String or nil): Proxmox user used to connect to Proxmox API [default: nil]
       # * *proxmox_password* (String or nil): Proxmox password used to connect to Proxmox API [default: nil]
-      # * *status* (String): Mocked status [default: 'created']
+      # * *status* (String or nil): Mocked status, or nil if it should not be asked [default: 'created']
+      # * *nbr_api_errors* (Integer): Number of API errors 500 to mock before getting a successful query [defaults: 0]
       # Result::
       # * Proc: Code called in place of Proxmox.new. Signature is the same as Proxmox.new.
       def mock_proxmox_to_status_node(
         proxmox_user: nil,
         proxmox_password: nil,
-        task_status: 'OK'
+        status: 'created',
+        nbr_api_errors: 0
       )
         proc do |url, pve_node, user, password, realm, options|
           expect(url).to eq 'https://my-proxmox.my-domain.com:8006/api2/json/'
@@ -267,17 +281,25 @@ module HybridPlatformsConductorTest
             # Nothing
           end
           # Mock getting status of a container
-          expect(proxmox).to receive(:get).with('nodes/pve_node_name/lxc') do
-            [
+          idx_try = 0
+          expect(proxmox).to receive(:get).exactly(nbr_api_errors + (status.nil? ? 0 : 1)).times.with('nodes/pve_node_name/lxc') do
+            idx_try += 1
+            if idx_try <= nbr_api_errors
+              'NOK: error code = 500'
+            else
+              [
+                {
+                  'vmid' => '1024'
+                }
+              ]
+            end
+          end
+          unless status.nil?
+            expect(proxmox).to receive(:get).with('nodes/pve_node_name/lxc/1024/status/current') do
               {
-                'vmid' => '1024'
+                'status' => status
               }
-            ]
-          end
-          expect(proxmox).to receive(:get).with('nodes/pve_node_name/lxc/1024/status/current') do
-            {
-              'status' => 'created'
-            }
+            end
           end
           proxmox
         end
@@ -548,13 +570,17 @@ module HybridPlatformsConductorTest
                   ]
                 when /^nodes\/([^\/]+)\/lxc$/
                   pve_node_name = $1
-                  pve_nodes[pve_node_name][:lxc_containers].map do |vm_id, vm_info|
-                    {
-                      'vmid' => vm_id.to_s,
-                      'maxdisk' => vm_info[:maxdisk],
-                      'maxmem' => vm_info[:maxmem],
-                      'cpus' => vm_info[:cpus]
-                    }
+                  if pve_nodes[pve_node_name][:error_strings].nil? || pve_nodes[pve_node_name][:error_strings].empty?
+                    pve_nodes[pve_node_name][:lxc_containers].map do |vm_id, vm_info|
+                      {
+                        'vmid' => vm_id.to_s,
+                        'maxdisk' => vm_info[:maxdisk],
+                        'maxmem' => vm_info[:maxmem],
+                        'cpus' => vm_info[:cpus]
+                      }
+                    end
+                  else
+                    pve_nodes[pve_node_name][:error_strings].shift
                   end
                 when /^nodes\/([^\/]+)\/lxc\/([^\/]+)\/config$/
                   pve_node_name = $1
@@ -642,14 +668,26 @@ module HybridPlatformsConductorTest
       # * *wait_before_retry* (Integer): Specify the number of seconds to wait before retry [default: 0]
       # * *create* (Hash or nil): Create file content, or nil if none [default: nil]
       # * *destroy* (Hash or nil): Destroy file content, or nil if none [default: nil]
+      # * *api_max_retries* (Integer): Max number of API retries [default: 3]
+      # * *api_wait_between_retries_secs* (Integer): Number of seconds to wait between API retries [default: 0]
       # Result::
       # * Hash: JSON result of the call
-      def call_reserve_proxmox_container_with(config: {}, max_retries: 1, wait_before_retry: 0, create: nil, destroy: nil)
+      def call_reserve_proxmox_container_with(
+        config: {},
+        max_retries: 1,
+        wait_before_retry: 0,
+        create: nil,
+        destroy: nil,
+        api_max_retries: 3,
+        api_wait_between_retries_secs: 0
+      )
         # Make sure we set default values in the config
         config = {
           proxmox_api_url: 'https://my-proxmox.my-domain.com:8006',
           futex_file: "#{@repository}/proxmox/allocations.futex",
           logs_dir: "#{Dir.tmpdir}/hpc_test_proxmox_waiter_logs",
+          api_max_retries: api_max_retries,
+          api_wait_between_retries_secs: api_wait_between_retries_secs,
           pve_nodes: ['pve_node_name'],
           vm_ips_list: %w[
             192.168.0.100
@@ -716,7 +754,14 @@ module HybridPlatformsConductorTest
       # * *wait_before_retry* (Integer): Specify the number of seconds to wait before retry [default: 0]
       # Result::
       # * Hash: JSON result of the call
-      def call_reserve_proxmox_container(cpus, ram_mb, disk_gb, config: {}, max_retries: 1, wait_before_retry: 0)
+      def call_reserve_proxmox_container(
+        cpus,
+        ram_mb,
+        disk_gb,
+        config: {},
+        max_retries: 1,
+        wait_before_retry: 0
+      )
         call_reserve_proxmox_container_with(
           config: config,
           max_retries: max_retries,

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: hybrid_platforms_conductor
 version: !ruby/object:Gem::Version
-  version: 32.7.2
+  version: 32.9.0
 platform: ruby
 authors:
 - Muriel Salvan
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-03-12 00:00:00.000000000 Z
+date: 2021-03-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: range_operators