hybrid_platforms_conductor 32.7.2 → 32.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. checksums.yaml +4 -4
  2. data/lib/hybrid_platforms_conductor/deployer.rb +12 -1
  3. data/lib/hybrid_platforms_conductor/hpc_plugins/connector/ssh.rb +18 -8
  4. data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/proxmox.rb +36 -16
  5. data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/proxmox/proxmox_waiter.rb +16 -2
  6. data/lib/hybrid_platforms_conductor/hpc_plugins/test/check_deploy_and_idempotence.rb +11 -9
  7. data/lib/hybrid_platforms_conductor/provisioner.rb +9 -0
  8. data/lib/hybrid_platforms_conductor/tests_runner.rb +34 -30
  9. data/lib/hybrid_platforms_conductor/version.rb +1 -1
  10. data/spec/hybrid_platforms_conductor_test.rb +1 -0
  11. data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/connections_spec.rb +54 -0
  12. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioner_spec.rb +74 -10
  13. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/retries_spec.rb +20 -0
  14. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/start_spec.rb +3 -21
  15. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/state_spec.rb +40 -0
  16. data/spec/hybrid_platforms_conductor_test/api/tests_runner/node_ssh_spec.rb +15 -2
  17. data/spec/hybrid_platforms_conductor_test/helpers/cmd_runner_helpers.rb +5 -1
  18. data/spec/hybrid_platforms_conductor_test/helpers/connector_ssh_helpers.rb +12 -7
  19. data/spec/hybrid_platforms_conductor_test/helpers/provisioner_proxmox_helpers.rb +68 -23
  20. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3abcfb2c500d444ed5ea2486bda7150aa451986c502cafdf68afdd802e0af866
4
- data.tar.gz: 06cf63f0c1e5a187ca53e259133f7e14e4e73d6b4e2d7a3b79180b4797ffaf82
3
+ metadata.gz: 378b6da76cab8e20f60c0517f7bbef90933b5d573069437ee16d913ca81c0689
4
+ data.tar.gz: 1924dd81e2740b50ee55758271282729d6df1fd6a45b555f96c321d1b40e54e3
5
5
  SHA512:
6
- metadata.gz: f3c4891e7add29c7b424256888fb959967f5b7ab428bb4564aecab19a2de0c34f7dc4d05cf232cea944a4aa4008bd5a541a2a90786e2d4cf2fdf7750eb439b12
7
- data.tar.gz: 254f4f7c9c3e9638be1632cd27d7eb0aa296a6be5b13167e1926ab0de52bfb9f8490c6b6251bef7cc20876f686fe161d67fb48bace985e743ee80f52359e3093
6
+ metadata.gz: f8a72778375c154e42829a430bec151a7553b79874d1a878e129d80eed22b0f77f094751ee370c84441e7ed658d9e7b87e80c8f950ba64b491e1e62eec1f4a2f
7
+ data.tar.gz: e437a3f4871bde44d6bcbcd8a588dd83ff33c0374cee3769eada500a3acf04606e37144f98d2a49aaf69e0ff8cdd935d3eaa396fea5bb19ac485184df883db24
@@ -311,13 +311,24 @@ module HybridPlatformsConductor
311
311
  environment: environment,
312
312
  logger: @logger,
313
313
  logger_stderr: @logger_stderr,
314
- config: @config,
314
+ config: sub_executable.config,
315
315
  cmd_runner: @cmd_runner,
316
316
  # Here we use the NodesHandler that will be bound to the sub-Deployer only, as the node's metadata might be modified by the Provisioner.
317
317
  nodes_handler: sub_executable.nodes_handler,
318
318
  actions_executor: @actions_executor
319
319
  )
320
320
  instance.with_running_instance(stop_on_exit: true, destroy_on_exit: !reuse_instance, port: 22) do
321
+ # Test-provisioned nodes have SSH Session Exec capabilities
322
+ sub_executable.nodes_handler.override_metadata_of node, :ssh_session_exec, 'true'
323
+ # Test-provisioned nodes use default sudo
324
+ sub_executable.config.sudo_procs.replace(sub_executable.config.sudo_procs.map do |sudo_proc_info|
325
+ {
326
+ nodes_selectors_stack: sudo_proc_info[:nodes_selectors_stack].map do |nodes_selector|
327
+ @nodes_handler.select_nodes(nodes_selector).select { |selected_node| selected_node != node }
328
+ end,
329
+ sudo_proc: sudo_proc_info[:sudo_proc]
330
+ }
331
+ end)
321
332
  actions_executor = sub_executable.actions_executor
322
333
  deployer = sub_executable.deployer
323
334
  # Setup test environment for this container
@@ -505,15 +505,25 @@ module HybridPlatformsConductor
505
505
  if @nodes_handler.get_ssh_session_exec_of(node) == 'false'
506
506
  # Here we have to create a ControlMaster using an interactive session, as the SSH server prohibits ExecSession, and so command executions.
507
507
  # We'll do that using another terminal spawned in the background.
508
- Thread.new do
509
- log_debug "[ ControlMaster - #{ssh_url} ] - Spawn interactive ControlMaster in separate terminal"
510
- @cmd_runner.run_cmd "xterm -e '#{ssh_exec} -o ControlMaster=yes -o ControlPersist=yes #{ssh_url}'", log_to_stdout: log_debug?
511
- log_debug "[ ControlMaster - #{ssh_url} ] - Separate interactive ControlMaster closed"
508
+ if ENV['hpc_interactive'] == 'false'
509
+ error = "Can't spawn interactive ControlMaster to #{node} in non-interactive mode. You may want to change the hpc_interactive env variable."
510
+ if no_exception
511
+ log_error error
512
+ exit_status = :non_interactive
513
+ else
514
+ raise error
515
+ end
516
+ else
517
+ Thread.new do
518
+ log_debug "[ ControlMaster - #{ssh_url} ] - Spawn interactive ControlMaster in separate terminal"
519
+ @cmd_runner.run_cmd "xterm -e '#{ssh_exec} -o ControlMaster=yes -o ControlPersist=yes #{ssh_url}'", log_to_stdout: log_debug?
520
+ log_debug "[ ControlMaster - #{ssh_url} ] - Separate interactive ControlMaster closed"
521
+ end
522
+ out 'External ControlMaster has been spawned.'
523
+ out 'Please login into it, keep its session opened and press enter here when done...'
524
+ $stdin.gets
525
+ exit_status = 0
512
526
  end
513
- out 'External ControlMaster has been spawned.'
514
- out 'Please login into it, keep its session opened and press enter here when done...'
515
- $stdin.gets
516
- exit_status = 0
517
527
  else
518
528
  # Create the control master
519
529
  ssh_control_master_start_cmd = "#{ssh_exec}#{@passwords.key?(node) || @auth_password ? '' : ' -o BatchMode=yes'} -o ControlMaster=yes -o ControlPersist=yes #{ssh_url} true"
@@ -74,13 +74,13 @@ module HybridPlatformsConductor
74
74
  # First check if we already have a test container that corresponds to this node and environment
75
75
  @lxc_details = nil
76
76
  with_proxmox do |proxmox|
77
- proxmox.get('nodes').each do |node_info|
77
+ proxmox_get(proxmox, 'nodes').each do |node_info|
78
78
  if proxmox_test_info[:test_config][:pve_nodes].include?(node_info['node']) && node_info['status'] == 'online'
79
- proxmox.get("nodes/#{node_info['node']}/lxc").each do |lxc_info|
79
+ proxmox_get(proxmox, "nodes/#{node_info['node']}/lxc").each do |lxc_info|
80
80
  vm_id = Integer(lxc_info['vmid'])
81
81
  if vm_id.between?(*proxmox_test_info[:test_config][:vm_ids_range])
82
82
  # Check if the description contains our ID
83
- lxc_config = proxmox.get("nodes/#{node_info['node']}/lxc/#{vm_id}/config")
83
+ lxc_config = proxmox_get(proxmox, "nodes/#{node_info['node']}/lxc/#{vm_id}/config")
84
84
  vm_description_lines = (lxc_config['description'] || '').split("\n")
85
85
  hpc_marker_idx = vm_description_lines.index('===== HPC info =====')
86
86
  unless hpc_marker_idx.nil?
@@ -222,8 +222,8 @@ module HybridPlatformsConductor
222
222
  with_proxmox do |proxmox|
223
223
  vm_id_str = @lxc_details[:vm_id].to_s
224
224
  status =
225
- if proxmox.get("nodes/#{@lxc_details[:pve_node]}/lxc").any? { |data_info| data_info['vmid'] == vm_id_str }
226
- status_info = proxmox.get("nodes/#{@lxc_details[:pve_node]}/lxc/#{@lxc_details[:vm_id]}/status/current")
225
+ if proxmox_get(proxmox, "nodes/#{@lxc_details[:pve_node]}/lxc").any? { |data_info| data_info['vmid'] == vm_id_str }
226
+ status_info = proxmox_get(proxmox, "nodes/#{@lxc_details[:pve_node]}/lxc/#{@lxc_details[:vm_id]}/status/current")
227
227
  # Careful that it is possible that somebody destroyed the VM and so its status is missing
228
228
  status = status_info.key?('status') ? status_info['status'].to_sym : :missing
229
229
  status = :exited if status == :stopped
@@ -292,11 +292,27 @@ module HybridPlatformsConductor
292
292
  end
293
293
  end
294
294
 
295
- # Maximum number of retries to perform on the Proxmox API.
296
- NBR_RETRIES_MAX = 5
297
-
298
- # Minimum seconds to wait between retries
299
- RETRY_WAIT_TIME_SECS = 5
295
+ # Perform a get operation on the API
296
+ # Protect the get API methods with a retry mechanism in case of 5xx errors.
297
+ #
298
+ # Parameters::
299
+ # * *proxmox* (Proxmox): The Proxmox instance
300
+ # * *path* (String): Path to get
301
+ # Result::
302
+ # * Object: API response
303
+ def proxmox_get(proxmox, path)
304
+ response = nil
305
+ idx_try = 0
306
+ loop do
307
+ response = proxmox.get(path)
308
+ break if !(response.is_a?(String)) || !(response =~ /^NOK: error code = 5\d\d$/)
309
+ log_warn "[ #{@node}/#{@environment} ] - Proxmox API call get #{path} returned error #{response} (attempt ##{idx_try}/#{proxmox_test_info[:api_max_retries]})"
310
+ raise "[ #{@node}/#{@environment} ] - Proxmox API call get #{path} returns #{response} continuously (tried #{idx_try + 1} times)" if idx_try >= proxmox_test_info[:api_max_retries]
311
+ idx_try += 1
312
+ sleep proxmox_test_info[:api_wait_between_retries_secs] + rand(5)
313
+ end
314
+ response
315
+ end
300
316
 
301
317
  # Run a Proxmox task.
302
318
  # Handle a retry mechanism in case of 5xx errors.
@@ -313,11 +329,11 @@ module HybridPlatformsConductor
313
329
  while task.nil? do
314
330
  task = proxmox.send(http_method, "nodes/#{pve_node}/#{sub_path}", *args)
315
331
  if task =~ /^NOK: error code = 5\d\d$/
316
- log_warn "[ #{@node}/#{@environment} ] - Proxmox API call #{http_method} nodes/#{pve_node}/#{sub_path} #{args} returned error #{task} (attempt ##{idx_try}/#{NBR_RETRIES_MAX})"
332
+ log_warn "[ #{@node}/#{@environment} ] - Proxmox API call #{http_method} nodes/#{pve_node}/#{sub_path} #{args} returned error #{task} (attempt ##{idx_try}/#{proxmox_test_info[:api_max_retries]})"
317
333
  task = nil
334
+ break if idx_try >= proxmox_test_info[:api_max_retries]
318
335
  idx_try += 1
319
- break if idx_try == NBR_RETRIES_MAX
320
- sleep RETRY_WAIT_TIME_SECS + rand(5)
336
+ sleep proxmox_test_info[:api_wait_between_retries_secs] + rand(5)
321
337
  end
322
338
  end
323
339
  if task.nil?
@@ -358,7 +374,7 @@ module HybridPlatformsConductor
358
374
  # Result::
359
375
  # * String: The task status
360
376
  def task_status(proxmox, pve_node, task)
361
- status_info = proxmox.get("nodes/#{pve_node}/tasks/#{task}/status")
377
+ status_info = proxmox_get(proxmox, "nodes/#{pve_node}/tasks/#{task}/status")
362
378
  "#{status_info['status']}#{status_info['exitstatus'] ? ":#{status_info['exitstatus']}" : ''}"
363
379
  end
364
380
 
@@ -377,7 +393,9 @@ module HybridPlatformsConductor
377
393
  (proxmox_test_info[:test_config].merge(
378
394
  proxmox_api_url: proxmox_test_info[:api_url],
379
395
  futex_file: '/tmp/hpc_proxmox_allocations.futex',
380
- logs_dir: '/tmp/hpc_proxmox_waiter_logs'
396
+ logs_dir: '/tmp/hpc_proxmox_waiter_logs',
397
+ api_max_retries: proxmox_test_info[:api_max_retries],
398
+ api_wait_between_retries_secs: proxmox_test_info[:api_wait_between_retries_secs]
381
399
  )).to_json
382
400
  )
383
401
  result = nil
@@ -486,7 +504,7 @@ module HybridPlatformsConductor
486
504
  # So remaining length is 255 - 13 = 242 characters.
487
505
  MAX_FILE_ID_SIZE = 242
488
506
 
489
- # Get an ID unique for theis node/environment and that can be used in file names.
507
+ # Get an ID unique for this node/environment and that can be used in file names.
490
508
  #
491
509
  # Result::
492
510
  # * String: ID
@@ -506,6 +524,8 @@ module HybridPlatformsConductor
506
524
  # Result::
507
525
  # * Hash<Symbol,Object>: Configuration of the Proxmox instance to be used:
508
526
  # * *api_url* (String): The Proxmox API URL
527
+ # * *api_max_retries* (Integer): Max number of API retries
528
+ # * *api_wait_between_retries_secs* (Integer): Number of seconds to wait between API retries
509
529
  # * *sync_node* (String): Node to be used to synchronize Proxmox resources acquisition
510
530
  # * *test_config* (Hash<Symbol,Object>): The test configuration. Check ProxmoxWaiter#initialize (config_file structure) method to get details.
511
531
  # * *vm_config* (Hash<Symbol,Object>): Extra configuration of a created container. Check #request_lxc_creation_for results to get details.
@@ -26,6 +26,8 @@ class ProxmoxWaiter
26
26
  # * *proxmox_api_url* (String): Proxmox API URL.
27
27
  # * *futex_file* (String): Path to the file serving as a futex.
28
28
  # * *logs_dir* (String): Path to the directory containing logs [default: '.']
29
+ # * *api_max_retries* (Integer): Max number of API retries
30
+ # * *api_wait_between_retries_secs* (Integer): Number of seconds to wait between API retries
29
31
  # * *pve_nodes* (Array<String>): List of PVE nodes allowed to spawn new containers [default: all]
30
32
  # * *vm_ips_list* (Array<String>): The list of IPs that are available for the Proxomx containers.
31
33
  # * *vm_ids_range* ([Integer, Integer]): Minimum and maximum reservable VM ID
@@ -637,11 +639,23 @@ class ProxmoxWaiter
637
639
 
638
640
  # Get a path from the API it returns its JSON result.
639
641
  # Keep a cache of it, whose lifespan is this ProxmoxWaiter instance.
642
+ # Have a retry mechanism to make sure eventual non-deterministic 5xx errors are not an issue.
640
643
  #
641
644
  # Parameters::
642
645
  # * *path* (String): API path to query
643
- def api_get(path)
644
- @gets_cache[path] = @proxmox.get(path) unless @gets_cache.key?(path)
646
+ # Result::
647
+ # * Object: The API response
648
+ def api_get(path, nbr_retries: 3, wait_between_retry_secs: 10)
649
+ unless @gets_cache.key?(path)
650
+ idx_try = 0
651
+ loop do
652
+ @gets_cache[path] = @proxmox.get(path)
653
+ break unless @gets_cache[path].is_a?(String) && @gets_cache[path] =~ /^NOK: error code = 5\d\d$/
654
+ raise "Proxmox API get #{path} returns #{@gets_cache[path]} continuously (tried #{idx_try + 1} times)" if idx_try >= @config['api_max_retries']
655
+ idx_try += 1
656
+ sleep @config['api_wait_between_retries_secs']
657
+ end
658
+ end
645
659
  @gets_cache[path]
646
660
  end
647
661
 
@@ -54,17 +54,19 @@ module HybridPlatformsConductor
54
54
  instance.stop
55
55
  instance.with_running_instance(port: 22) do
56
56
 
57
- # ===== Deploy removes root access
58
- # Check that we can't connect with root
59
- ssh_ok = false
60
- begin
61
- Net::SSH.start(instance.ip, 'root', password: 'root_pwd', auth_methods: ['password'], verify_host_key: :never) do |ssh|
62
- ssh_ok = ssh.exec!('echo Works').strip == 'Works'
57
+ unless @nodes_handler.get_root_access_allowed_of(@node) == 'true'
58
+ # ===== Deploy removes root access
59
+ # Check that we can't connect with root
60
+ ssh_ok = false
61
+ begin
62
+ Net::SSH.start(instance.ip, 'root', password: 'root_pwd', auth_methods: ['password'], verify_host_key: :never) do |ssh|
63
+ ssh_ok = ssh.exec!('echo Works').strip == 'Works'
64
+ end
65
+ rescue
63
66
  end
64
- rescue
67
+ assert_equal ssh_ok, false, 'Root can still connect on the image after deployment'
68
+ # Even if we can connect using root, run the idempotence test
65
69
  end
66
- assert_equal ssh_ok, false, 'Root can still connect on the image after deployment'
67
- # Even if we can connect using root, run the idempotence test
68
70
 
69
71
  # ===== Idempotence
70
72
  unless ssh_ok
@@ -76,6 +76,15 @@ module HybridPlatformsConductor
76
76
  # Make sure we update it.
77
77
  @nodes_handler.override_metadata_of @node, :host_ip, instance_ip
78
78
  @nodes_handler.invalidate_metadata_of @node, :host_keys
79
+ # Make sure the SSH transformations don't apply to this node
80
+ @config.ssh_connection_transforms.replace(@config.ssh_connection_transforms.map do |ssh_transform_info|
81
+ {
82
+ nodes_selectors_stack: ssh_transform_info[:nodes_selectors_stack].map do |nodes_selector|
83
+ @nodes_handler.select_nodes(nodes_selector).select { |selected_node| selected_node != @node }
84
+ end,
85
+ transform: ssh_transform_info[:transform]
86
+ }
87
+ end)
79
88
  end
80
89
  wait_for_port!(port) if port
81
90
  yield
@@ -428,9 +428,11 @@ module HybridPlatformsConductor
428
428
  end
429
429
  end
430
430
  # Compute the timeout that will be applied, from the max timeout sum for every node that has tests to run
431
- timeout = CONNECTION_TIMEOUT + @cmds_to_run.map do |_node, cmds_list|
432
- cmds_list.inject(0) { |total_timeout, (_cmd, test_info)| test_info[:timeout] + total_timeout }
433
- end.max
431
+ timeout = CONNECTION_TIMEOUT + (
432
+ @cmds_to_run.map do |_node, cmds_list|
433
+ cmds_list.inject(0) { |total_timeout, (_cmd, test_info)| test_info[:timeout] + total_timeout }
434
+ end.max || 0
435
+ )
434
436
  # Run commands on nodes, in grouped way to avoid too many connections, per node
435
437
  # Hash< String, Array<String> >
436
438
  @test_cmds = Hash[@cmds_to_run.map do |node, cmds_list|
@@ -464,33 +466,35 @@ module HybridPlatformsConductor
464
466
  end,
465
467
  test_execution: proc do |test|
466
468
  exit_status, stdout, stderr = @actions_result[test.node]
467
- if exit_status.is_a?(Symbol)
468
- test.error "Error while executing tests: #{exit_status}: #{stderr}"
469
- else
470
- log_debug <<~EOS
471
- ----- Commands for #{test.node}:
472
- #{@test_cmds[test.node][:remote_bash].join("\n")}
473
- ----- STDOUT:
474
- #{stdout}
475
- ----- STDERR:
476
- #{stderr}
477
- -----
478
- EOS
479
- # Skip the first section, as it can contain SSH banners
480
- cmd_stdouts = stdout.split("#{CMD_SEPARATOR}\n")[1..-1]
481
- cmd_stdouts = [] if cmd_stdouts.nil?
482
- cmd_stderrs = stderr.split("#{CMD_SEPARATOR}\n")[1..-1]
483
- cmd_stderrs = [] if cmd_stderrs.nil?
484
- @cmds_to_run[test.node].zip(cmd_stdouts, cmd_stderrs).each do |(cmd, test_info), cmd_stdout, cmd_stderr|
485
- # Find the section that corresponds to this test
486
- if test_info[:test] == test
487
- cmd_stdout = '' if cmd_stdout.nil?
488
- cmd_stderr = '' if cmd_stderr.nil?
489
- stdout_lines = cmd_stdout.split("\n")
490
- # Last line of stdout is the return code
491
- return_code = stdout_lines.empty? ? :command_cant_run : Integer(stdout_lines.last)
492
- test.error "Command '#{cmd}' returned error code #{return_code}", "----- STDOUT:\n#{stdout_lines[0..-2].join("\n")}\n----- STDERR:\n#{cmd_stderr}" unless return_code == 0
493
- test_info[:validator].call(stdout_lines[0..-2], cmd_stderr.split("\n"), return_code)
469
+ unless exit_status.nil?
470
+ if exit_status.is_a?(Symbol)
471
+ test.error "Error while executing tests: #{exit_status}: #{stderr}"
472
+ else
473
+ log_debug <<~EOS
474
+ ----- Commands for #{test.node}:
475
+ #{@test_cmds[test.node][:remote_bash].join("\n")}
476
+ ----- STDOUT:
477
+ #{stdout}
478
+ ----- STDERR:
479
+ #{stderr}
480
+ -----
481
+ EOS
482
+ # Skip the first section, as it can contain SSH banners
483
+ cmd_stdouts = stdout.split("#{CMD_SEPARATOR}\n")[1..-1]
484
+ cmd_stdouts = [] if cmd_stdouts.nil?
485
+ cmd_stderrs = stderr.split("#{CMD_SEPARATOR}\n")[1..-1]
486
+ cmd_stderrs = [] if cmd_stderrs.nil?
487
+ @cmds_to_run[test.node].zip(cmd_stdouts, cmd_stderrs).each do |(cmd, test_info), cmd_stdout, cmd_stderr|
488
+ # Find the section that corresponds to this test
489
+ if test_info[:test] == test
490
+ cmd_stdout = '' if cmd_stdout.nil?
491
+ cmd_stderr = '' if cmd_stderr.nil?
492
+ stdout_lines = cmd_stdout.split("\n")
493
+ # Last line of stdout is the return code
494
+ return_code = stdout_lines.empty? ? :command_cant_run : Integer(stdout_lines.last)
495
+ test.error "Command '#{cmd}' returned error code #{return_code}", "----- STDOUT:\n#{stdout_lines[0..-2].join("\n")}\n----- STDERR:\n#{cmd_stderr}" unless return_code == 0
496
+ test_info[:validator].call(stdout_lines[0..-2], cmd_stderr.split("\n"), return_code)
497
+ end
494
498
  end
495
499
  end
496
500
  end
@@ -1,5 +1,5 @@
1
1
  module HybridPlatformsConductor
2
2
 
3
- VERSION = '32.7.2'
3
+ VERSION = '32.9.0'
4
4
 
5
5
  end
@@ -94,6 +94,7 @@ module HybridPlatformsConductorTest
94
94
  ENV.delete 'hpc_password_for_thycotic'
95
95
  ENV.delete 'hpc_domain_for_thycotic'
96
96
  ENV.delete 'hpc_certificates'
97
+ ENV.delete 'hpc_interactive'
97
98
  # Set the necessary Hybrid Platforms Conductor environment variables
98
99
  ENV['hpc_ssh_user'] = 'test_user'
99
100
  HybridPlatformsConductor::ServicesHandler.packaged_deployments.clear
@@ -44,6 +44,58 @@ describe HybridPlatformsConductor::ActionsExecutor do
44
44
  end
45
45
  end
46
46
 
47
+ it 'can\'t create an SSH master to 1 node not having Session Exec capabilities when hpc_interactive is false' do
48
+ with_test_platform(nodes: { 'node' => { meta: { host_ip: '192.168.42.42', ssh_session_exec: 'false' } } }) do
49
+ ENV['hpc_interactive'] = 'false'
50
+ with_cmd_runner_mocked(
51
+ [
52
+ ['which env', proc { [0, "/usr/bin/env\n", ''] }],
53
+ ['ssh -V 2>&1', proc { [0, "OpenSSH_7.4p1 Debian-10+deb9u7, OpenSSL 1.0.2u 20 Dec 2019\n", ''] }]
54
+ ] + ssh_expected_commands_for(
55
+ { 'node' => { connection: '192.168.42.42', user: 'test_user' } },
56
+ with_control_master_create: false,
57
+ with_control_master_destroy: false
58
+ )
59
+ ) do
60
+ test_connector.ssh_user = 'test_user'
61
+ expect do
62
+ test_connector.with_connection_to(['node']) do
63
+ end
64
+ end.to raise_error 'Can\'t spawn interactive ControlMaster to node in non-interactive mode. You may want to change the hpc_interactive env variable.'
65
+ end
66
+ end
67
+ end
68
+
69
+ it 'fails without creating exception when creating an SSH master to 1 node not having Session Exec capabilities when hpc_interactive is false and we use no_exception' do
70
+ with_test_platform(nodes: {
71
+ 'node1' => { meta: { host_ip: '192.168.42.1' } },
72
+ 'node2' => { meta: { host_ip: '192.168.42.2', ssh_session_exec: 'false' } },
73
+ 'node3' => { meta: { host_ip: '192.168.42.3' } }
74
+ }) do
75
+ ENV['hpc_interactive'] = 'false'
76
+ with_cmd_runner_mocked(
77
+ [
78
+ ['which env', proc { [0, "/usr/bin/env\n", ''] }],
79
+ ['ssh -V 2>&1', proc { [0, "OpenSSH_7.4p1 Debian-10+deb9u7, OpenSSL 1.0.2u 20 Dec 2019\n", ''] }]
80
+ ] + ssh_expected_commands_for(
81
+ 'node1' => { connection: '192.168.42.1', user: 'test_user' },
82
+ 'node3' => { connection: '192.168.42.3', user: 'test_user' }
83
+ ) + ssh_expected_commands_for(
84
+ {
85
+ 'node2' => { connection: '192.168.42.2', user: 'test_user' }
86
+ },
87
+ with_control_master_create: false,
88
+ with_control_master_destroy: false
89
+ )
90
+ ) do
91
+ test_connector.ssh_user = 'test_user'
92
+ test_connector.with_connection_to(%w[node1 node2 node3], no_exception: true) do |connected_nodes|
93
+ expect(connected_nodes.sort).to eq %w[node1 node3].sort
94
+ end
95
+ end
96
+ end
97
+ end
98
+
47
99
  it 'creates SSH master to several nodes' do
48
100
  with_test_platform(nodes: {
49
101
  'node1' => { meta: { host_ip: '192.168.42.1' } },
@@ -123,6 +175,8 @@ describe HybridPlatformsConductor::ActionsExecutor do
123
175
  'node3' => { connection: '192.168.42.3', user: 'test_user' }
124
176
  },
125
177
  # Here the threads for node1's and node3's ControlMasters might not trigger before the one for node2, so they will not destroy it.
178
+ # Sometimes they don't even have time to create the Control Masters that node2 has already failed.
179
+ with_control_master_create_optional: true,
126
180
  with_control_master_destroy_optional: true
127
181
  ) + ssh_expected_commands_for(
128
182
  {
@@ -17,8 +17,8 @@ describe HybridPlatformsConductor::Deployer do
17
17
  block.call
18
18
  end
19
19
  provisioner = nil
20
- test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |test_deployer, test_instance|
21
- expect(test_deployer.local_environment).to eq true
20
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
21
+ expect(sub_test_deployer.local_environment).to eq true
22
22
  provisioner = test_instance
23
23
  expect(test_instance.node).to eq 'node'
24
24
  expect(test_instance.environment).to match /^#{Regexp.escape(`whoami`.strip)}_hpc_testing_provisioner_\d+_\d+_\w+$/
@@ -40,8 +40,8 @@ describe HybridPlatformsConductor::Deployer do
40
40
  block.call
41
41
  end
42
42
  provisioner = nil
43
- test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |test_deployer, test_instance|
44
- expect(test_deployer.local_environment).to eq true
43
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
44
+ expect(sub_test_deployer.local_environment).to eq true
45
45
  provisioner = test_instance
46
46
  expect(test_instance.node).to eq 'node'
47
47
  expect(test_instance.environment).to match /^#{Regexp.escape(`whoami`.strip)}_hpc_testing_provisioner_\d+_\d+_\w+$/
@@ -50,6 +50,70 @@ describe HybridPlatformsConductor::Deployer do
50
50
  end
51
51
  end
52
52
 
53
+ it 'gives a new test instance ready to be used in place of the node without SSH transformations' do
54
+ with_test_platform(
55
+ {
56
+ nodes: {
57
+ 'node1' => { meta: { host_ip: '192.168.42.1', ssh_session_exec: 'false' } },
58
+ 'node2' => { meta: { host_ip: '192.168.42.2', ssh_session_exec: 'false' } }
59
+ }
60
+ },
61
+ false,
62
+ '
63
+ for_nodes(%w[node1 node2]) do
64
+ transform_ssh_connection do |node, connection, connection_user, gateway, gateway_user|
65
+ ["#{connection}_#{node}", "#{connection_user}_#{node}", "#{gateway}_#{node}", "#{gateway_user}_#{node}"]
66
+ end
67
+ end
68
+ '
69
+ ) do |repository|
70
+ register_plugins(:provisioner, { test_provisioner: HybridPlatformsConductorTest::TestProvisioner })
71
+ File.write("#{test_config.hybrid_platforms_dir}/dummy_secrets.json", '{}')
72
+ HybridPlatformsConductorTest::TestProvisioner.mocked_states = %i[created created running exited]
73
+ HybridPlatformsConductorTest::TestProvisioner.mocked_ip = '172.17.0.1'
74
+ expect(Socket).to receive(:tcp).with('172.17.0.1', 22, { connect_timeout: 1 }) do |&block|
75
+ block.call
76
+ end
77
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node1', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
78
+ expect(sub_test_deployer.instance_eval { @nodes_handler.get_ssh_session_exec_of('node1') }).to eq 'true'
79
+ expect(sub_test_deployer.instance_eval { @nodes_handler.get_ssh_session_exec_of('node2') }).to eq 'false'
80
+ ssh_transforms = test_instance.instance_eval { @config.ssh_connection_transforms }
81
+ expect(ssh_transforms.size).to eq 1
82
+ expect(ssh_transforms[0][:nodes_selectors_stack]).to eq [%w[node2]]
83
+ end
84
+ end
85
+ end
86
+
87
+ it 'gives a new test instance ready to be used in place of the node without sudo specificities' do
88
+ with_test_platform(
89
+ {
90
+ nodes: {
91
+ 'node1' => { meta: { host_ip: '192.168.42.1' } },
92
+ 'node2' => { meta: { host_ip: '192.168.42.2' } }
93
+ }
94
+ },
95
+ false,
96
+ '
97
+ for_nodes(%w[node1 node2]) do
98
+ sudo_for { |user| "other_sudo --user #{user}" }
99
+ end
100
+ '
101
+ ) do |repository|
102
+ register_plugins(:provisioner, { test_provisioner: HybridPlatformsConductorTest::TestProvisioner })
103
+ File.write("#{test_config.hybrid_platforms_dir}/dummy_secrets.json", '{}')
104
+ HybridPlatformsConductorTest::TestProvisioner.mocked_states = %i[created created running exited]
105
+ HybridPlatformsConductorTest::TestProvisioner.mocked_ip = '172.17.0.1'
106
+ expect(Socket).to receive(:tcp).with('172.17.0.1', 22, { connect_timeout: 1 }) do |&block|
107
+ block.call
108
+ end
109
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node1', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
110
+ sudo_procs = test_instance.instance_eval { @config.sudo_procs }
111
+ expect(sudo_procs.size).to eq 1
112
+ expect(sudo_procs[0][:nodes_selectors_stack]).to eq [%w[node2]]
113
+ end
114
+ end
115
+ end
116
+
53
117
  it 'does not destroy instances when asked to reuse' do
54
118
  with_test_platform(
55
119
  nodes: { 'node' => { meta: { host_ip: '192.168.42.42' } } }
@@ -62,8 +126,8 @@ describe HybridPlatformsConductor::Deployer do
62
126
  block.call
63
127
  end
64
128
  provisioner = nil
65
- test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner', reuse_instance: true) do |test_deployer, test_instance|
66
- expect(test_deployer.local_environment).to eq true
129
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner', reuse_instance: true) do |sub_test_deployer, test_instance|
130
+ expect(sub_test_deployer.local_environment).to eq true
67
131
  provisioner = test_instance
68
132
  expect(test_instance.node).to eq 'node'
69
133
  expect(test_instance.environment).to eq "#{`whoami`.strip}_hpc_testing_provisioner"
@@ -84,8 +148,8 @@ describe HybridPlatformsConductor::Deployer do
84
148
  block.call
85
149
  end
86
150
  provisioner = nil
87
- test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner', reuse_instance: true) do |test_deployer, test_instance|
88
- expect(test_deployer.local_environment).to eq true
151
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner', reuse_instance: true) do |sub_test_deployer, test_instance|
152
+ expect(sub_test_deployer.local_environment).to eq true
89
153
  provisioner = test_instance
90
154
  expect(test_instance.node).to eq 'node'
91
155
  expect(test_instance.environment).to eq "#{`whoami`.strip}_hpc_testing_provisioner"
@@ -102,7 +166,7 @@ describe HybridPlatformsConductor::Deployer do
102
166
  File.write("#{test_config.hybrid_platforms_dir}/dummy_secrets.json", '{}')
103
167
  HybridPlatformsConductorTest::TestProvisioner.mocked_states = %i[created created created exited exited]
104
168
  expect do
105
- test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |test_deployer, test_instance|
169
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
106
170
  end
107
171
  end.to raise_error /\[ node\/#{Regexp.escape(`whoami`.strip)}_hpc_testing_provisioner_\d+_\d+_\w+ \] - Instance fails to be in a state among \(running\) with timeout 1\. Currently in state exited/
108
172
  end
@@ -120,7 +184,7 @@ describe HybridPlatformsConductor::Deployer do
120
184
  raise Errno::ETIMEDOUT, 'Timeout while reading from port 22'
121
185
  end
122
186
  expect do
123
- test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |test_deployer, test_instance|
187
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
124
188
  end
125
189
  end.to raise_error /\[ node\/#{Regexp.escape(`whoami`.strip)}_hpc_testing_provisioner_\d+_\d+_\w+ \] - Instance fails to have port 22 opened with timeout 1\./
126
190
  end
@@ -28,6 +28,26 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
28
28
  end
29
29
  end
30
30
 
31
+ it 'retries a few times before ending in error for a 5xx API error' do
32
+ with_sync_node do
33
+ mock_proxmox(mocked_pve_nodes: [{ 'pve_node_name' => { error_strings: ['NOK: error code = 500'] * 5 } }])
34
+ result = call_reserve_proxmox_container(2, 1024, 4, config: { api_max_retries: 4 })
35
+ expect(result[:error]).not_to eq nil
36
+ expect(result[:error]).to match /Unhandled exception from reserve_proxmox_container: Proxmox API get nodes\/pve_node_name\/lxc returns NOK: error code = 500 continuously \(tried 5 times\)/
37
+ end
38
+ end
39
+
40
+ it 'retries API errors a few times until it gets resolved' do
41
+ with_sync_node do
42
+ mock_proxmox(mocked_pve_nodes: [{ 'pve_node_name' => { error_strings: ['NOK: error code = 500'] * 3 } }])
43
+ expect(call_reserve_proxmox_container(2, 1024, 4, config: { api_max_retries: 4 })).to eq(
44
+ pve_node: 'pve_node_name',
45
+ vm_id: 1000,
46
+ vm_ip: '192.168.0.100'
47
+ )
48
+ end
49
+ end
50
+
31
51
  end
32
52
 
33
53
  end
@@ -39,16 +39,7 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
39
39
  mock_proxmox_to_start_node(nbr_api_errors: 3)
40
40
  ]
41
41
  instance.create
42
- # To speed up the test, alter the wait time between retries.
43
- old_wait_secs = HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_get(:RETRY_WAIT_TIME_SECS)
44
- begin
45
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
46
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, 1)
47
- instance.start
48
- ensure
49
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
50
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, old_wait_secs)
51
- end
42
+ instance.start
52
43
  end
53
44
  end
54
45
 
@@ -58,19 +49,10 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
58
49
  # 1 - The info on existing containers
59
50
  mock_proxmox_to_get_nodes_info,
60
51
  # 2 - The start of the container - fail too many times
61
- mock_proxmox_to_start_node(nbr_api_errors: HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_get(:NBR_RETRIES_MAX), task_status: nil)
52
+ mock_proxmox_to_start_node(nbr_api_errors: 4, task_status: nil)
62
53
  ]
63
54
  instance.create
64
- # To speed up the test, alter the wait time between retries.
65
- old_wait_secs = HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_get(:RETRY_WAIT_TIME_SECS)
66
- begin
67
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
68
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, 1)
69
- expect { instance.start }.to raise_error '[ node/test ] - Proxmox API call post nodes/pve_node_name/lxc/1024/status/start [] is constantly failing. Giving up.'
70
- ensure
71
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
72
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, old_wait_secs)
73
- end
55
+ expect { instance.start }.to raise_error '[ node/test ] - Proxmox API call post nodes/pve_node_name/lxc/1024/status/start [] is constantly failing. Giving up.'
74
56
  end
75
57
  end
76
58
 
@@ -23,6 +23,46 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
23
23
  end
24
24
  end
25
25
 
26
+
27
+ it '' do
28
+ with_test_proxmox_platform do |instance|
29
+ mock_proxmox_calls_with [
30
+ # 1 - The info on existing containers
31
+ mock_proxmox_to_get_nodes_info,
32
+ # 2 - The start of the container - fail a few times
33
+ mock_proxmox_to_start_node(nbr_api_errors: 2)
34
+ ]
35
+ instance.create
36
+ instance.start
37
+ end
38
+ end
39
+
40
+ it 'retries calls to the API when getting back errors 5xx' do
41
+ with_test_proxmox_platform do |instance|
42
+ mock_proxmox_calls_with [
43
+ # 1 - The info on existing containers
44
+ mock_proxmox_to_get_nodes_info,
45
+ # 2 - The status of the container
46
+ mock_proxmox_to_status_node(nbr_api_errors: 3)
47
+ ]
48
+ instance.create
49
+ expect(instance.state).to eq :created
50
+ end
51
+ end
52
+
53
+ it 'fails to get an instance\'s status when the Proxmox API fails too many times' do
54
+ with_test_proxmox_platform do |instance|
55
+ mock_proxmox_calls_with [
56
+ # 1 - The info on existing containers
57
+ mock_proxmox_to_get_nodes_info,
58
+ # 2 - The status of the container
59
+ mock_proxmox_to_status_node(nbr_api_errors: 4, status: nil)
60
+ ]
61
+ instance.create
62
+ expect { instance.state }.to raise_error '[ node/test ] - Proxmox API call get nodes/pve_node_name/lxc returns NOK: error code = 500 continuously (tried 4 times)'
63
+ end
64
+ end
65
+
26
66
  end
27
67
 
28
68
  end
@@ -69,7 +69,7 @@ describe HybridPlatformsConductor::TestsRunner do
69
69
  'node12' => { 'test_node12.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node12', stdout, stderr, exit_code] } },
70
70
  'node21' => { 'test_node21.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node21', stdout, stderr, exit_code] } },
71
71
  'node22' => { 'test_node22.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node22', stdout, stderr, exit_code] } }
72
- }}
72
+ } }
73
73
  expect(test_tests_runner.run_tests([{ all: true }])).to eq 0
74
74
  expect(ssh_executions.sort).to eq [
75
75
  ['node11', ['stdout11'], ['stderr11'], 0],
@@ -88,7 +88,7 @@ describe HybridPlatformsConductor::TestsRunner do
88
88
  HybridPlatformsConductorTest::TestPlugins::NodeSsh.node_tests = { node_ssh_test: {
89
89
  'node12' => { 'test_node12.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node12', stdout, stderr, exit_code] } },
90
90
  'node22' => { 'test_node22.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node22', stdout, stderr, exit_code] } }
91
- }}
91
+ } }
92
92
  expect(test_tests_runner.run_tests(%w[node12 node22])).to eq 0
93
93
  expect(ssh_executions.sort).to eq [
94
94
  ['node12', ['stdout12'], ['stderr12'], 0],
@@ -97,6 +97,19 @@ describe HybridPlatformsConductor::TestsRunner do
97
97
  end
98
98
  end
99
99
 
100
+ it 'does not execute anything when the tests report no command' do
101
+ with_test_platform_for_node_connection_tests do
102
+ test_tests_runner.tests = [:node_ssh_test]
103
+ ssh_executions = []
104
+ HybridPlatformsConductorTest::TestPlugins::NodeSsh.node_tests = { node_ssh_test: {
105
+ 'node12' => {},
106
+ 'node22' => {}
107
+ } }
108
+ expect(test_tests_runner.run_tests(%w[node12 node22])).to eq 0
109
+ expect(ssh_executions).to eq []
110
+ end
111
+ end
112
+
100
113
  it 'executes several SSH node tests once per node with the correct command, grouping commands' do
101
114
  with_test_platform_for_node_connection_tests do
102
115
  expect_actions_executor_runs([proc do |actions|
@@ -89,7 +89,11 @@ module HybridPlatformsConductorTest
89
89
  remaining_expected_commands.select do |(_expected_command, _command_code, options)|
90
90
  !options[:optional]
91
91
  end
92
- ).to eq([]), "Expected CmdRunner commands were not run:\n#{remaining_expected_commands.map(&:first).join("\n")}"
92
+ ).to eq([]), "Expected CmdRunner commands were not run:\n#{
93
+ remaining_expected_commands.map do |(expected_command, _command_code, options)|
94
+ "#{options[:optional] ? '[Optional] ' : ''}#{expected_command}"
95
+ end.join("\n")
96
+ }"
93
97
  # Un-mock the command runner
94
98
  allow(cmd_runner).to receive(:run_cmd).and_call_original
95
99
  end
@@ -15,6 +15,7 @@ module HybridPlatformsConductorTest
15
15
  # * *times* (Integer): Number of times this connection should be used [default: 1]
16
16
  # * *control_master_create_error* (String or nil): Error to simulate during the SSH ControlMaster creation, or nil for none [default: nil]
17
17
  # * *with_control_master_create* (Boolean): Do we create the control master? [default: true]
18
+ # * *with_control_master_create_optional* (Boolean): If true, then consider the ControlMaster creation to be optional [default: false]
18
19
  # * *with_control_master_check* (Boolean): Do we check the control master? [default: false]
19
20
  # * *with_control_master_destroy* (Boolean): Do we destroy the control master? [default: true]
20
21
  # * *with_control_master_destroy_optional* (Boolean): If true, then consider the ControlMaster destruction to be optional [default: false]
@@ -26,6 +27,7 @@ module HybridPlatformsConductorTest
26
27
  def ssh_expected_commands_for(
27
28
  nodes_connections,
28
29
  with_control_master_create: true,
30
+ with_control_master_create_optional: false,
29
31
  with_control_master_check: false,
30
32
  with_control_master_destroy: true,
31
33
  with_control_master_destroy_optional: false,
@@ -52,13 +54,15 @@ module HybridPlatformsConductorTest
52
54
  if with_session_exec
53
55
  /^.+\/ssh #{with_batch_mode ? '-o BatchMode=yes ' : ''}-o ControlMaster=yes -o ControlPersist=yes hpc\.#{Regexp.escape(node)} true$/
54
56
  else
55
- # Mock the user hitting enter as the Control Master will be created in another thread and the main thread waits for user input.
56
- expect($stdin).to receive(:gets) do
57
- # We have to wait for the Control Master creation thread to actually create the Control Master before hitting Enter.
58
- while !control_master_created do
59
- sleep 0.1
57
+ unless ENV['hpc_interactive'] == 'false'
58
+ # Mock the user hitting enter as the Control Master will be created in another thread and the main thread waits for user input.
59
+ expect($stdin).to receive(:gets) do
60
+ # We have to wait for the Control Master creation thread to actually create the Control Master before hitting Enter.
61
+ while !control_master_created do
62
+ sleep 0.1
63
+ end
64
+ "\n"
60
65
  end
61
- "\n"
62
66
  end
63
67
  /^xterm -e '.+\/ssh -o ControlMaster=yes -o ControlPersist=yes hpc\.#{Regexp.escape(node)}'$/
64
68
  end,
@@ -78,7 +82,8 @@ module HybridPlatformsConductorTest
78
82
  else
79
83
  [255, '', node_connection_info[:control_master_create_error]]
80
84
  end
81
- end
85
+ end,
86
+ { optional: with_control_master_create_optional }
82
87
  ]
83
88
  end
84
89
  if with_control_master_check
@@ -23,6 +23,8 @@ module HybridPlatformsConductorTest
23
23
  test_platform path: '#{repository}'
24
24
  proxmox(
25
25
  api_url: 'https://my-proxmox.my-domain.com:8006',
26
+ api_max_retries: 3,
27
+ api_wait_between_retries_secs: 0,
26
28
  sync_node: 'node',
27
29
  test_config: {
28
30
  pve_nodes: ['pve_node_name'],
@@ -75,12 +77,20 @@ module HybridPlatformsConductorTest
75
77
  # * *proxmox_password* (String or nil): Proxmox password used to connect to Proxmox API [default: nil]
76
78
  # * *proxmox_realm* (String or nil): Proxmox realm used to connect to Proxmox API [default: 'pam']
77
79
  # * *nodes_info* (Array<Hash>): Nodes info returned by the Proxmox API [default: []]
80
+ # * *nbr_api_errors* (Integer): Number of API errors 500 to mock before getting a successful query [defaults: 0]
78
81
  # * *extra_expects* (Proc or nil): Code called for additional expectations on the proxmox instance, or nil if none [default: nil]
79
82
  # * Parameters::
80
83
  # * *proxmox* (Double): The mocked Proxmox instance
81
84
  # Result::
82
85
  # * Proc: Code called in place of Proxmox.new. Signature is the same as Proxmox.new.
83
- def mock_proxmox_to_get_nodes_info(proxmox_user: nil, proxmox_password: nil, proxmox_realm: 'pam', nodes_info: [], extra_expects: nil)
86
+ def mock_proxmox_to_get_nodes_info(
87
+ proxmox_user: nil,
88
+ proxmox_password: nil,
89
+ proxmox_realm: 'pam',
90
+ nodes_info: [],
91
+ nbr_api_errors: 0,
92
+ extra_expects: nil
93
+ )
84
94
  proc do |url, pve_node, user, password, realm, options|
85
95
  expect(url).to eq 'https://my-proxmox.my-domain.com:8006/api2/json/'
86
96
  expect(pve_node).to eq 'my-proxmox'
@@ -97,8 +107,10 @@ module HybridPlatformsConductorTest
97
107
  # Nothing
98
108
  end
99
109
  # Mock checking existing nodes
100
- expect(proxmox).to receive(:get).with('nodes') do
101
- nodes_info
110
+ idx_try = 0
111
+ expect(proxmox).to receive(:get).exactly(nbr_api_errors + 1).times.with('nodes') do
112
+ idx_try += 1
113
+ idx_try <= nbr_api_errors ? 'NOK: error code = 500' : nodes_info
102
114
  end
103
115
  extra_expects.call(proxmox) unless extra_expects.nil?
104
116
  proxmox
@@ -243,13 +255,15 @@ module HybridPlatformsConductorTest
243
255
  # Parameters::
244
256
  # * *proxmox_user* (String or nil): Proxmox user used to connect to Proxmox API [default: nil]
245
257
  # * *proxmox_password* (String or nil): Proxmox password used to connect to Proxmox API [default: nil]
246
- # * *status* (String): Mocked status [default: 'created']
258
+ # * *status* (String or nil): Mocked status, or nil if it should not be asked [default: 'created']
259
+ # * *nbr_api_errors* (Integer): Number of API errors 500 to mock before getting a successful query [defaults: 0]
247
260
  # Result::
248
261
  # * Proc: Code called in place of Proxmox.new. Signature is the same as Proxmox.new.
249
262
  def mock_proxmox_to_status_node(
250
263
  proxmox_user: nil,
251
264
  proxmox_password: nil,
252
- task_status: 'OK'
265
+ status: 'created',
266
+ nbr_api_errors: 0
253
267
  )
254
268
  proc do |url, pve_node, user, password, realm, options|
255
269
  expect(url).to eq 'https://my-proxmox.my-domain.com:8006/api2/json/'
@@ -267,17 +281,25 @@ module HybridPlatformsConductorTest
267
281
  # Nothing
268
282
  end
269
283
  # Mock getting status of a container
270
- expect(proxmox).to receive(:get).with('nodes/pve_node_name/lxc') do
271
- [
284
+ idx_try = 0
285
+ expect(proxmox).to receive(:get).exactly(nbr_api_errors + (status.nil? ? 0 : 1)).times.with('nodes/pve_node_name/lxc') do
286
+ idx_try += 1
287
+ if idx_try <= nbr_api_errors
288
+ 'NOK: error code = 500'
289
+ else
290
+ [
291
+ {
292
+ 'vmid' => '1024'
293
+ }
294
+ ]
295
+ end
296
+ end
297
+ unless status.nil?
298
+ expect(proxmox).to receive(:get).with('nodes/pve_node_name/lxc/1024/status/current') do
272
299
  {
273
- 'vmid' => '1024'
300
+ 'status' => status
274
301
  }
275
- ]
276
- end
277
- expect(proxmox).to receive(:get).with('nodes/pve_node_name/lxc/1024/status/current') do
278
- {
279
- 'status' => 'created'
280
- }
302
+ end
281
303
  end
282
304
  proxmox
283
305
  end
@@ -548,13 +570,17 @@ module HybridPlatformsConductorTest
548
570
  ]
549
571
  when /^nodes\/([^\/]+)\/lxc$/
550
572
  pve_node_name = $1
551
- pve_nodes[pve_node_name][:lxc_containers].map do |vm_id, vm_info|
552
- {
553
- 'vmid' => vm_id.to_s,
554
- 'maxdisk' => vm_info[:maxdisk],
555
- 'maxmem' => vm_info[:maxmem],
556
- 'cpus' => vm_info[:cpus]
557
- }
573
+ if pve_nodes[pve_node_name][:error_strings].nil? || pve_nodes[pve_node_name][:error_strings].empty?
574
+ pve_nodes[pve_node_name][:lxc_containers].map do |vm_id, vm_info|
575
+ {
576
+ 'vmid' => vm_id.to_s,
577
+ 'maxdisk' => vm_info[:maxdisk],
578
+ 'maxmem' => vm_info[:maxmem],
579
+ 'cpus' => vm_info[:cpus]
580
+ }
581
+ end
582
+ else
583
+ pve_nodes[pve_node_name][:error_strings].shift
558
584
  end
559
585
  when /^nodes\/([^\/]+)\/lxc\/([^\/]+)\/config$/
560
586
  pve_node_name = $1
@@ -642,14 +668,26 @@ module HybridPlatformsConductorTest
642
668
  # * *wait_before_retry* (Integer): Specify the number of seconds to wait before retry [default: 0]
643
669
  # * *create* (Hash or nil): Create file content, or nil if none [default: nil]
644
670
  # * *destroy* (Hash or nil): Destroy file content, or nil if none [default: nil]
671
+ # * *api_max_retries* (Integer): Max number of API retries [default: 3]
672
+ # * *api_wait_between_retries_secs* (Integer): Number of seconds to wait between API retries [default: 0]
645
673
  # Result::
646
674
  # * Hash: JSON result of the call
647
- def call_reserve_proxmox_container_with(config: {}, max_retries: 1, wait_before_retry: 0, create: nil, destroy: nil)
675
+ def call_reserve_proxmox_container_with(
676
+ config: {},
677
+ max_retries: 1,
678
+ wait_before_retry: 0,
679
+ create: nil,
680
+ destroy: nil,
681
+ api_max_retries: 3,
682
+ api_wait_between_retries_secs: 0
683
+ )
648
684
  # Make sure we set default values in the config
649
685
  config = {
650
686
  proxmox_api_url: 'https://my-proxmox.my-domain.com:8006',
651
687
  futex_file: "#{@repository}/proxmox/allocations.futex",
652
688
  logs_dir: "#{Dir.tmpdir}/hpc_test_proxmox_waiter_logs",
689
+ api_max_retries: api_max_retries,
690
+ api_wait_between_retries_secs: api_wait_between_retries_secs,
653
691
  pve_nodes: ['pve_node_name'],
654
692
  vm_ips_list: %w[
655
693
  192.168.0.100
@@ -716,7 +754,14 @@ module HybridPlatformsConductorTest
716
754
  # * *wait_before_retry* (Integer): Specify the number of seconds to wait before retry [default: 0]
717
755
  # Result::
718
756
  # * Hash: JSON result of the call
719
- def call_reserve_proxmox_container(cpus, ram_mb, disk_gb, config: {}, max_retries: 1, wait_before_retry: 0)
757
+ def call_reserve_proxmox_container(
758
+ cpus,
759
+ ram_mb,
760
+ disk_gb,
761
+ config: {},
762
+ max_retries: 1,
763
+ wait_before_retry: 0
764
+ )
720
765
  call_reserve_proxmox_container_with(
721
766
  config: config,
722
767
  max_retries: max_retries,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hybrid_platforms_conductor
3
3
  version: !ruby/object:Gem::Version
4
- version: 32.7.2
4
+ version: 32.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Muriel Salvan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-12 00:00:00.000000000 Z
11
+ date: 2021-03-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: range_operators