hybrid_platforms_conductor 32.7.2 → 32.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (20) hide show
  1. checksums.yaml +4 -4
  2. data/lib/hybrid_platforms_conductor/deployer.rb +12 -1
  3. data/lib/hybrid_platforms_conductor/hpc_plugins/connector/ssh.rb +18 -8
  4. data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/proxmox.rb +36 -16
  5. data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/proxmox/proxmox_waiter.rb +16 -2
  6. data/lib/hybrid_platforms_conductor/hpc_plugins/test/check_deploy_and_idempotence.rb +11 -9
  7. data/lib/hybrid_platforms_conductor/provisioner.rb +9 -0
  8. data/lib/hybrid_platforms_conductor/tests_runner.rb +34 -30
  9. data/lib/hybrid_platforms_conductor/version.rb +1 -1
  10. data/spec/hybrid_platforms_conductor_test.rb +1 -0
  11. data/spec/hybrid_platforms_conductor_test/api/actions_executor/connectors/ssh/connections_spec.rb +54 -0
  12. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioner_spec.rb +74 -10
  13. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/retries_spec.rb +20 -0
  14. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/start_spec.rb +3 -21
  15. data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/state_spec.rb +40 -0
  16. data/spec/hybrid_platforms_conductor_test/api/tests_runner/node_ssh_spec.rb +15 -2
  17. data/spec/hybrid_platforms_conductor_test/helpers/cmd_runner_helpers.rb +5 -1
  18. data/spec/hybrid_platforms_conductor_test/helpers/connector_ssh_helpers.rb +12 -7
  19. data/spec/hybrid_platforms_conductor_test/helpers/provisioner_proxmox_helpers.rb +68 -23
  20. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3abcfb2c500d444ed5ea2486bda7150aa451986c502cafdf68afdd802e0af866
4
- data.tar.gz: 06cf63f0c1e5a187ca53e259133f7e14e4e73d6b4e2d7a3b79180b4797ffaf82
3
+ metadata.gz: 378b6da76cab8e20f60c0517f7bbef90933b5d573069437ee16d913ca81c0689
4
+ data.tar.gz: 1924dd81e2740b50ee55758271282729d6df1fd6a45b555f96c321d1b40e54e3
5
5
  SHA512:
6
- metadata.gz: f3c4891e7add29c7b424256888fb959967f5b7ab428bb4564aecab19a2de0c34f7dc4d05cf232cea944a4aa4008bd5a541a2a90786e2d4cf2fdf7750eb439b12
7
- data.tar.gz: 254f4f7c9c3e9638be1632cd27d7eb0aa296a6be5b13167e1926ab0de52bfb9f8490c6b6251bef7cc20876f686fe161d67fb48bace985e743ee80f52359e3093
6
+ metadata.gz: f8a72778375c154e42829a430bec151a7553b79874d1a878e129d80eed22b0f77f094751ee370c84441e7ed658d9e7b87e80c8f950ba64b491e1e62eec1f4a2f
7
+ data.tar.gz: e437a3f4871bde44d6bcbcd8a588dd83ff33c0374cee3769eada500a3acf04606e37144f98d2a49aaf69e0ff8cdd935d3eaa396fea5bb19ac485184df883db24
@@ -311,13 +311,24 @@ module HybridPlatformsConductor
311
311
  environment: environment,
312
312
  logger: @logger,
313
313
  logger_stderr: @logger_stderr,
314
- config: @config,
314
+ config: sub_executable.config,
315
315
  cmd_runner: @cmd_runner,
316
316
  # Here we use the NodesHandler that will be bound to the sub-Deployer only, as the node's metadata might be modified by the Provisioner.
317
317
  nodes_handler: sub_executable.nodes_handler,
318
318
  actions_executor: @actions_executor
319
319
  )
320
320
  instance.with_running_instance(stop_on_exit: true, destroy_on_exit: !reuse_instance, port: 22) do
321
+ # Test-provisioned nodes have SSH Session Exec capabilities
322
+ sub_executable.nodes_handler.override_metadata_of node, :ssh_session_exec, 'true'
323
+ # Test-provisioned nodes use default sudo
324
+ sub_executable.config.sudo_procs.replace(sub_executable.config.sudo_procs.map do |sudo_proc_info|
325
+ {
326
+ nodes_selectors_stack: sudo_proc_info[:nodes_selectors_stack].map do |nodes_selector|
327
+ @nodes_handler.select_nodes(nodes_selector).select { |selected_node| selected_node != node }
328
+ end,
329
+ sudo_proc: sudo_proc_info[:sudo_proc]
330
+ }
331
+ end)
321
332
  actions_executor = sub_executable.actions_executor
322
333
  deployer = sub_executable.deployer
323
334
  # Setup test environment for this container
@@ -505,15 +505,25 @@ module HybridPlatformsConductor
505
505
  if @nodes_handler.get_ssh_session_exec_of(node) == 'false'
506
506
  # Here we have to create a ControlMaster using an interactive session, as the SSH server prohibits ExecSession, and so command executions.
507
507
  # We'll do that using another terminal spawned in the background.
508
- Thread.new do
509
- log_debug "[ ControlMaster - #{ssh_url} ] - Spawn interactive ControlMaster in separate terminal"
510
- @cmd_runner.run_cmd "xterm -e '#{ssh_exec} -o ControlMaster=yes -o ControlPersist=yes #{ssh_url}'", log_to_stdout: log_debug?
511
- log_debug "[ ControlMaster - #{ssh_url} ] - Separate interactive ControlMaster closed"
508
+ if ENV['hpc_interactive'] == 'false'
509
+ error = "Can't spawn interactive ControlMaster to #{node} in non-interactive mode. You may want to change the hpc_interactive env variable."
510
+ if no_exception
511
+ log_error error
512
+ exit_status = :non_interactive
513
+ else
514
+ raise error
515
+ end
516
+ else
517
+ Thread.new do
518
+ log_debug "[ ControlMaster - #{ssh_url} ] - Spawn interactive ControlMaster in separate terminal"
519
+ @cmd_runner.run_cmd "xterm -e '#{ssh_exec} -o ControlMaster=yes -o ControlPersist=yes #{ssh_url}'", log_to_stdout: log_debug?
520
+ log_debug "[ ControlMaster - #{ssh_url} ] - Separate interactive ControlMaster closed"
521
+ end
522
+ out 'External ControlMaster has been spawned.'
523
+ out 'Please login into it, keep its session opened and press enter here when done...'
524
+ $stdin.gets
525
+ exit_status = 0
512
526
  end
513
- out 'External ControlMaster has been spawned.'
514
- out 'Please login into it, keep its session opened and press enter here when done...'
515
- $stdin.gets
516
- exit_status = 0
517
527
  else
518
528
  # Create the control master
519
529
  ssh_control_master_start_cmd = "#{ssh_exec}#{@passwords.key?(node) || @auth_password ? '' : ' -o BatchMode=yes'} -o ControlMaster=yes -o ControlPersist=yes #{ssh_url} true"
@@ -74,13 +74,13 @@ module HybridPlatformsConductor
74
74
  # First check if we already have a test container that corresponds to this node and environment
75
75
  @lxc_details = nil
76
76
  with_proxmox do |proxmox|
77
- proxmox.get('nodes').each do |node_info|
77
+ proxmox_get(proxmox, 'nodes').each do |node_info|
78
78
  if proxmox_test_info[:test_config][:pve_nodes].include?(node_info['node']) && node_info['status'] == 'online'
79
- proxmox.get("nodes/#{node_info['node']}/lxc").each do |lxc_info|
79
+ proxmox_get(proxmox, "nodes/#{node_info['node']}/lxc").each do |lxc_info|
80
80
  vm_id = Integer(lxc_info['vmid'])
81
81
  if vm_id.between?(*proxmox_test_info[:test_config][:vm_ids_range])
82
82
  # Check if the description contains our ID
83
- lxc_config = proxmox.get("nodes/#{node_info['node']}/lxc/#{vm_id}/config")
83
+ lxc_config = proxmox_get(proxmox, "nodes/#{node_info['node']}/lxc/#{vm_id}/config")
84
84
  vm_description_lines = (lxc_config['description'] || '').split("\n")
85
85
  hpc_marker_idx = vm_description_lines.index('===== HPC info =====')
86
86
  unless hpc_marker_idx.nil?
@@ -222,8 +222,8 @@ module HybridPlatformsConductor
222
222
  with_proxmox do |proxmox|
223
223
  vm_id_str = @lxc_details[:vm_id].to_s
224
224
  status =
225
- if proxmox.get("nodes/#{@lxc_details[:pve_node]}/lxc").any? { |data_info| data_info['vmid'] == vm_id_str }
226
- status_info = proxmox.get("nodes/#{@lxc_details[:pve_node]}/lxc/#{@lxc_details[:vm_id]}/status/current")
225
+ if proxmox_get(proxmox, "nodes/#{@lxc_details[:pve_node]}/lxc").any? { |data_info| data_info['vmid'] == vm_id_str }
226
+ status_info = proxmox_get(proxmox, "nodes/#{@lxc_details[:pve_node]}/lxc/#{@lxc_details[:vm_id]}/status/current")
227
227
  # Careful that it is possible that somebody destroyed the VM and so its status is missing
228
228
  status = status_info.key?('status') ? status_info['status'].to_sym : :missing
229
229
  status = :exited if status == :stopped
@@ -292,11 +292,27 @@ module HybridPlatformsConductor
292
292
  end
293
293
  end
294
294
 
295
- # Maximum number of retries to perform on the Proxmox API.
296
- NBR_RETRIES_MAX = 5
297
-
298
- # Minimum seconds to wait between retries
299
- RETRY_WAIT_TIME_SECS = 5
295
+ # Perform a get operation on the API
296
+ # Protect the get API methods with a retry mechanism in case of 5xx errors.
297
+ #
298
+ # Parameters::
299
+ # * *proxmox* (Proxmox): The Proxmox instance
300
+ # * *path* (String): Path to get
301
+ # Result::
302
+ # * Object: API response
303
+ def proxmox_get(proxmox, path)
304
+ response = nil
305
+ idx_try = 0
306
+ loop do
307
+ response = proxmox.get(path)
308
+ break if !(response.is_a?(String)) || !(response =~ /^NOK: error code = 5\d\d$/)
309
+ log_warn "[ #{@node}/#{@environment} ] - Proxmox API call get #{path} returned error #{response} (attempt ##{idx_try}/#{proxmox_test_info[:api_max_retries]})"
310
+ raise "[ #{@node}/#{@environment} ] - Proxmox API call get #{path} returns #{response} continuously (tried #{idx_try + 1} times)" if idx_try >= proxmox_test_info[:api_max_retries]
311
+ idx_try += 1
312
+ sleep proxmox_test_info[:api_wait_between_retries_secs] + rand(5)
313
+ end
314
+ response
315
+ end
300
316
 
301
317
  # Run a Proxmox task.
302
318
  # Handle a retry mechanism in case of 5xx errors.
@@ -313,11 +329,11 @@ module HybridPlatformsConductor
313
329
  while task.nil? do
314
330
  task = proxmox.send(http_method, "nodes/#{pve_node}/#{sub_path}", *args)
315
331
  if task =~ /^NOK: error code = 5\d\d$/
316
- log_warn "[ #{@node}/#{@environment} ] - Proxmox API call #{http_method} nodes/#{pve_node}/#{sub_path} #{args} returned error #{task} (attempt ##{idx_try}/#{NBR_RETRIES_MAX})"
332
+ log_warn "[ #{@node}/#{@environment} ] - Proxmox API call #{http_method} nodes/#{pve_node}/#{sub_path} #{args} returned error #{task} (attempt ##{idx_try}/#{proxmox_test_info[:api_max_retries]})"
317
333
  task = nil
334
+ break if idx_try >= proxmox_test_info[:api_max_retries]
318
335
  idx_try += 1
319
- break if idx_try == NBR_RETRIES_MAX
320
- sleep RETRY_WAIT_TIME_SECS + rand(5)
336
+ sleep proxmox_test_info[:api_wait_between_retries_secs] + rand(5)
321
337
  end
322
338
  end
323
339
  if task.nil?
@@ -358,7 +374,7 @@ module HybridPlatformsConductor
358
374
  # Result::
359
375
  # * String: The task status
360
376
  def task_status(proxmox, pve_node, task)
361
- status_info = proxmox.get("nodes/#{pve_node}/tasks/#{task}/status")
377
+ status_info = proxmox_get(proxmox, "nodes/#{pve_node}/tasks/#{task}/status")
362
378
  "#{status_info['status']}#{status_info['exitstatus'] ? ":#{status_info['exitstatus']}" : ''}"
363
379
  end
364
380
 
@@ -377,7 +393,9 @@ module HybridPlatformsConductor
377
393
  (proxmox_test_info[:test_config].merge(
378
394
  proxmox_api_url: proxmox_test_info[:api_url],
379
395
  futex_file: '/tmp/hpc_proxmox_allocations.futex',
380
- logs_dir: '/tmp/hpc_proxmox_waiter_logs'
396
+ logs_dir: '/tmp/hpc_proxmox_waiter_logs',
397
+ api_max_retries: proxmox_test_info[:api_max_retries],
398
+ api_wait_between_retries_secs: proxmox_test_info[:api_wait_between_retries_secs]
381
399
  )).to_json
382
400
  )
383
401
  result = nil
@@ -486,7 +504,7 @@ module HybridPlatformsConductor
486
504
  # So remaining length is 255 - 13 = 242 characters.
487
505
  MAX_FILE_ID_SIZE = 242
488
506
 
489
- # Get an ID unique for theis node/environment and that can be used in file names.
507
+ # Get an ID unique for this node/environment and that can be used in file names.
490
508
  #
491
509
  # Result::
492
510
  # * String: ID
@@ -506,6 +524,8 @@ module HybridPlatformsConductor
506
524
  # Result::
507
525
  # * Hash<Symbol,Object>: Configuration of the Proxmox instance to be used:
508
526
  # * *api_url* (String): The Proxmox API URL
527
+ # * *api_max_retries* (Integer): Max number of API retries
528
+ # * *api_wait_between_retries_secs* (Integer): Number of seconds to wait between API retries
509
529
  # * *sync_node* (String): Node to be used to synchronize Proxmox resources acquisition
510
530
  # * *test_config* (Hash<Symbol,Object>): The test configuration. Check ProxmoxWaiter#initialize (config_file structure) method to get details.
511
531
  # * *vm_config* (Hash<Symbol,Object>): Extra configuration of a created container. Check #request_lxc_creation_for results to get details.
@@ -26,6 +26,8 @@ class ProxmoxWaiter
26
26
  # * *proxmox_api_url* (String): Proxmox API URL.
27
27
  # * *futex_file* (String): Path to the file serving as a futex.
28
28
  # * *logs_dir* (String): Path to the directory containing logs [default: '.']
29
+ # * *api_max_retries* (Integer): Max number of API retries
30
+ # * *api_wait_between_retries_secs* (Integer): Number of seconds to wait between API retries
29
31
  # * *pve_nodes* (Array<String>): List of PVE nodes allowed to spawn new containers [default: all]
30
32
  # * *vm_ips_list* (Array<String>): The list of IPs that are available for the Proxomx containers.
31
33
  # * *vm_ids_range* ([Integer, Integer]): Minimum and maximum reservable VM ID
@@ -637,11 +639,23 @@ class ProxmoxWaiter
637
639
 
638
640
  # Get a path from the API it returns its JSON result.
639
641
  # Keep a cache of it, whose lifespan is this ProxmoxWaiter instance.
642
+ # Have a retry mechanism to make sure eventual non-deterministic 5xx errors are not an issue.
640
643
  #
641
644
  # Parameters::
642
645
  # * *path* (String): API path to query
643
- def api_get(path)
644
- @gets_cache[path] = @proxmox.get(path) unless @gets_cache.key?(path)
646
+ # Result::
647
+ # * Object: The API response
648
+ def api_get(path, nbr_retries: 3, wait_between_retry_secs: 10)
649
+ unless @gets_cache.key?(path)
650
+ idx_try = 0
651
+ loop do
652
+ @gets_cache[path] = @proxmox.get(path)
653
+ break unless @gets_cache[path].is_a?(String) && @gets_cache[path] =~ /^NOK: error code = 5\d\d$/
654
+ raise "Proxmox API get #{path} returns #{@gets_cache[path]} continuously (tried #{idx_try + 1} times)" if idx_try >= @config['api_max_retries']
655
+ idx_try += 1
656
+ sleep @config['api_wait_between_retries_secs']
657
+ end
658
+ end
645
659
  @gets_cache[path]
646
660
  end
647
661
 
@@ -54,17 +54,19 @@ module HybridPlatformsConductor
54
54
  instance.stop
55
55
  instance.with_running_instance(port: 22) do
56
56
 
57
- # ===== Deploy removes root access
58
- # Check that we can't connect with root
59
- ssh_ok = false
60
- begin
61
- Net::SSH.start(instance.ip, 'root', password: 'root_pwd', auth_methods: ['password'], verify_host_key: :never) do |ssh|
62
- ssh_ok = ssh.exec!('echo Works').strip == 'Works'
57
+ unless @nodes_handler.get_root_access_allowed_of(@node) == 'true'
58
+ # ===== Deploy removes root access
59
+ # Check that we can't connect with root
60
+ ssh_ok = false
61
+ begin
62
+ Net::SSH.start(instance.ip, 'root', password: 'root_pwd', auth_methods: ['password'], verify_host_key: :never) do |ssh|
63
+ ssh_ok = ssh.exec!('echo Works').strip == 'Works'
64
+ end
65
+ rescue
63
66
  end
64
- rescue
67
+ assert_equal ssh_ok, false, 'Root can still connect on the image after deployment'
68
+ # Even if we can connect using root, run the idempotence test
65
69
  end
66
- assert_equal ssh_ok, false, 'Root can still connect on the image after deployment'
67
- # Even if we can connect using root, run the idempotence test
68
70
 
69
71
  # ===== Idempotence
70
72
  unless ssh_ok
@@ -76,6 +76,15 @@ module HybridPlatformsConductor
76
76
  # Make sure we update it.
77
77
  @nodes_handler.override_metadata_of @node, :host_ip, instance_ip
78
78
  @nodes_handler.invalidate_metadata_of @node, :host_keys
79
+ # Make sure the SSH transformations don't apply to this node
80
+ @config.ssh_connection_transforms.replace(@config.ssh_connection_transforms.map do |ssh_transform_info|
81
+ {
82
+ nodes_selectors_stack: ssh_transform_info[:nodes_selectors_stack].map do |nodes_selector|
83
+ @nodes_handler.select_nodes(nodes_selector).select { |selected_node| selected_node != @node }
84
+ end,
85
+ transform: ssh_transform_info[:transform]
86
+ }
87
+ end)
79
88
  end
80
89
  wait_for_port!(port) if port
81
90
  yield
@@ -428,9 +428,11 @@ module HybridPlatformsConductor
428
428
  end
429
429
  end
430
430
  # Compute the timeout that will be applied, from the max timeout sum for every node that has tests to run
431
- timeout = CONNECTION_TIMEOUT + @cmds_to_run.map do |_node, cmds_list|
432
- cmds_list.inject(0) { |total_timeout, (_cmd, test_info)| test_info[:timeout] + total_timeout }
433
- end.max
431
+ timeout = CONNECTION_TIMEOUT + (
432
+ @cmds_to_run.map do |_node, cmds_list|
433
+ cmds_list.inject(0) { |total_timeout, (_cmd, test_info)| test_info[:timeout] + total_timeout }
434
+ end.max || 0
435
+ )
434
436
  # Run commands on nodes, in grouped way to avoid too many connections, per node
435
437
  # Hash< String, Array<String> >
436
438
  @test_cmds = Hash[@cmds_to_run.map do |node, cmds_list|
@@ -464,33 +466,35 @@ module HybridPlatformsConductor
464
466
  end,
465
467
  test_execution: proc do |test|
466
468
  exit_status, stdout, stderr = @actions_result[test.node]
467
- if exit_status.is_a?(Symbol)
468
- test.error "Error while executing tests: #{exit_status}: #{stderr}"
469
- else
470
- log_debug <<~EOS
471
- ----- Commands for #{test.node}:
472
- #{@test_cmds[test.node][:remote_bash].join("\n")}
473
- ----- STDOUT:
474
- #{stdout}
475
- ----- STDERR:
476
- #{stderr}
477
- -----
478
- EOS
479
- # Skip the first section, as it can contain SSH banners
480
- cmd_stdouts = stdout.split("#{CMD_SEPARATOR}\n")[1..-1]
481
- cmd_stdouts = [] if cmd_stdouts.nil?
482
- cmd_stderrs = stderr.split("#{CMD_SEPARATOR}\n")[1..-1]
483
- cmd_stderrs = [] if cmd_stderrs.nil?
484
- @cmds_to_run[test.node].zip(cmd_stdouts, cmd_stderrs).each do |(cmd, test_info), cmd_stdout, cmd_stderr|
485
- # Find the section that corresponds to this test
486
- if test_info[:test] == test
487
- cmd_stdout = '' if cmd_stdout.nil?
488
- cmd_stderr = '' if cmd_stderr.nil?
489
- stdout_lines = cmd_stdout.split("\n")
490
- # Last line of stdout is the return code
491
- return_code = stdout_lines.empty? ? :command_cant_run : Integer(stdout_lines.last)
492
- test.error "Command '#{cmd}' returned error code #{return_code}", "----- STDOUT:\n#{stdout_lines[0..-2].join("\n")}\n----- STDERR:\n#{cmd_stderr}" unless return_code == 0
493
- test_info[:validator].call(stdout_lines[0..-2], cmd_stderr.split("\n"), return_code)
469
+ unless exit_status.nil?
470
+ if exit_status.is_a?(Symbol)
471
+ test.error "Error while executing tests: #{exit_status}: #{stderr}"
472
+ else
473
+ log_debug <<~EOS
474
+ ----- Commands for #{test.node}:
475
+ #{@test_cmds[test.node][:remote_bash].join("\n")}
476
+ ----- STDOUT:
477
+ #{stdout}
478
+ ----- STDERR:
479
+ #{stderr}
480
+ -----
481
+ EOS
482
+ # Skip the first section, as it can contain SSH banners
483
+ cmd_stdouts = stdout.split("#{CMD_SEPARATOR}\n")[1..-1]
484
+ cmd_stdouts = [] if cmd_stdouts.nil?
485
+ cmd_stderrs = stderr.split("#{CMD_SEPARATOR}\n")[1..-1]
486
+ cmd_stderrs = [] if cmd_stderrs.nil?
487
+ @cmds_to_run[test.node].zip(cmd_stdouts, cmd_stderrs).each do |(cmd, test_info), cmd_stdout, cmd_stderr|
488
+ # Find the section that corresponds to this test
489
+ if test_info[:test] == test
490
+ cmd_stdout = '' if cmd_stdout.nil?
491
+ cmd_stderr = '' if cmd_stderr.nil?
492
+ stdout_lines = cmd_stdout.split("\n")
493
+ # Last line of stdout is the return code
494
+ return_code = stdout_lines.empty? ? :command_cant_run : Integer(stdout_lines.last)
495
+ test.error "Command '#{cmd}' returned error code #{return_code}", "----- STDOUT:\n#{stdout_lines[0..-2].join("\n")}\n----- STDERR:\n#{cmd_stderr}" unless return_code == 0
496
+ test_info[:validator].call(stdout_lines[0..-2], cmd_stderr.split("\n"), return_code)
497
+ end
494
498
  end
495
499
  end
496
500
  end
@@ -1,5 +1,5 @@
1
1
  module HybridPlatformsConductor
2
2
 
3
- VERSION = '32.7.2'
3
+ VERSION = '32.9.0'
4
4
 
5
5
  end
@@ -94,6 +94,7 @@ module HybridPlatformsConductorTest
94
94
  ENV.delete 'hpc_password_for_thycotic'
95
95
  ENV.delete 'hpc_domain_for_thycotic'
96
96
  ENV.delete 'hpc_certificates'
97
+ ENV.delete 'hpc_interactive'
97
98
  # Set the necessary Hybrid Platforms Conductor environment variables
98
99
  ENV['hpc_ssh_user'] = 'test_user'
99
100
  HybridPlatformsConductor::ServicesHandler.packaged_deployments.clear
@@ -44,6 +44,58 @@ describe HybridPlatformsConductor::ActionsExecutor do
44
44
  end
45
45
  end
46
46
 
47
+ it 'can\'t create an SSH master to 1 node not having Session Exec capabilities when hpc_interactive is false' do
48
+ with_test_platform(nodes: { 'node' => { meta: { host_ip: '192.168.42.42', ssh_session_exec: 'false' } } }) do
49
+ ENV['hpc_interactive'] = 'false'
50
+ with_cmd_runner_mocked(
51
+ [
52
+ ['which env', proc { [0, "/usr/bin/env\n", ''] }],
53
+ ['ssh -V 2>&1', proc { [0, "OpenSSH_7.4p1 Debian-10+deb9u7, OpenSSL 1.0.2u 20 Dec 2019\n", ''] }]
54
+ ] + ssh_expected_commands_for(
55
+ { 'node' => { connection: '192.168.42.42', user: 'test_user' } },
56
+ with_control_master_create: false,
57
+ with_control_master_destroy: false
58
+ )
59
+ ) do
60
+ test_connector.ssh_user = 'test_user'
61
+ expect do
62
+ test_connector.with_connection_to(['node']) do
63
+ end
64
+ end.to raise_error 'Can\'t spawn interactive ControlMaster to node in non-interactive mode. You may want to change the hpc_interactive env variable.'
65
+ end
66
+ end
67
+ end
68
+
69
+ it 'fails without creating exception when creating an SSH master to 1 node not having Session Exec capabilities when hpc_interactive is false and we use no_exception' do
70
+ with_test_platform(nodes: {
71
+ 'node1' => { meta: { host_ip: '192.168.42.1' } },
72
+ 'node2' => { meta: { host_ip: '192.168.42.2', ssh_session_exec: 'false' } },
73
+ 'node3' => { meta: { host_ip: '192.168.42.3' } }
74
+ }) do
75
+ ENV['hpc_interactive'] = 'false'
76
+ with_cmd_runner_mocked(
77
+ [
78
+ ['which env', proc { [0, "/usr/bin/env\n", ''] }],
79
+ ['ssh -V 2>&1', proc { [0, "OpenSSH_7.4p1 Debian-10+deb9u7, OpenSSL 1.0.2u 20 Dec 2019\n", ''] }]
80
+ ] + ssh_expected_commands_for(
81
+ 'node1' => { connection: '192.168.42.1', user: 'test_user' },
82
+ 'node3' => { connection: '192.168.42.3', user: 'test_user' }
83
+ ) + ssh_expected_commands_for(
84
+ {
85
+ 'node2' => { connection: '192.168.42.2', user: 'test_user' }
86
+ },
87
+ with_control_master_create: false,
88
+ with_control_master_destroy: false
89
+ )
90
+ ) do
91
+ test_connector.ssh_user = 'test_user'
92
+ test_connector.with_connection_to(%w[node1 node2 node3], no_exception: true) do |connected_nodes|
93
+ expect(connected_nodes.sort).to eq %w[node1 node3].sort
94
+ end
95
+ end
96
+ end
97
+ end
98
+
47
99
  it 'creates SSH master to several nodes' do
48
100
  with_test_platform(nodes: {
49
101
  'node1' => { meta: { host_ip: '192.168.42.1' } },
@@ -123,6 +175,8 @@ describe HybridPlatformsConductor::ActionsExecutor do
123
175
  'node3' => { connection: '192.168.42.3', user: 'test_user' }
124
176
  },
125
177
  # Here the threads for node1's and node3's ControlMasters might not trigger before the one for node2, so they will not destroy it.
178
+ # Sometimes they don't even have time to create the Control Masters that node2 has already failed.
179
+ with_control_master_create_optional: true,
126
180
  with_control_master_destroy_optional: true
127
181
  ) + ssh_expected_commands_for(
128
182
  {
@@ -17,8 +17,8 @@ describe HybridPlatformsConductor::Deployer do
17
17
  block.call
18
18
  end
19
19
  provisioner = nil
20
- test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |test_deployer, test_instance|
21
- expect(test_deployer.local_environment).to eq true
20
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
21
+ expect(sub_test_deployer.local_environment).to eq true
22
22
  provisioner = test_instance
23
23
  expect(test_instance.node).to eq 'node'
24
24
  expect(test_instance.environment).to match /^#{Regexp.escape(`whoami`.strip)}_hpc_testing_provisioner_\d+_\d+_\w+$/
@@ -40,8 +40,8 @@ describe HybridPlatformsConductor::Deployer do
40
40
  block.call
41
41
  end
42
42
  provisioner = nil
43
- test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |test_deployer, test_instance|
44
- expect(test_deployer.local_environment).to eq true
43
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
44
+ expect(sub_test_deployer.local_environment).to eq true
45
45
  provisioner = test_instance
46
46
  expect(test_instance.node).to eq 'node'
47
47
  expect(test_instance.environment).to match /^#{Regexp.escape(`whoami`.strip)}_hpc_testing_provisioner_\d+_\d+_\w+$/
@@ -50,6 +50,70 @@ describe HybridPlatformsConductor::Deployer do
50
50
  end
51
51
  end
52
52
 
53
+ it 'gives a new test instance ready to be used in place of the node without SSH transformations' do
54
+ with_test_platform(
55
+ {
56
+ nodes: {
57
+ 'node1' => { meta: { host_ip: '192.168.42.1', ssh_session_exec: 'false' } },
58
+ 'node2' => { meta: { host_ip: '192.168.42.2', ssh_session_exec: 'false' } }
59
+ }
60
+ },
61
+ false,
62
+ '
63
+ for_nodes(%w[node1 node2]) do
64
+ transform_ssh_connection do |node, connection, connection_user, gateway, gateway_user|
65
+ ["#{connection}_#{node}", "#{connection_user}_#{node}", "#{gateway}_#{node}", "#{gateway_user}_#{node}"]
66
+ end
67
+ end
68
+ '
69
+ ) do |repository|
70
+ register_plugins(:provisioner, { test_provisioner: HybridPlatformsConductorTest::TestProvisioner })
71
+ File.write("#{test_config.hybrid_platforms_dir}/dummy_secrets.json", '{}')
72
+ HybridPlatformsConductorTest::TestProvisioner.mocked_states = %i[created created running exited]
73
+ HybridPlatformsConductorTest::TestProvisioner.mocked_ip = '172.17.0.1'
74
+ expect(Socket).to receive(:tcp).with('172.17.0.1', 22, { connect_timeout: 1 }) do |&block|
75
+ block.call
76
+ end
77
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node1', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
78
+ expect(sub_test_deployer.instance_eval { @nodes_handler.get_ssh_session_exec_of('node1') }).to eq 'true'
79
+ expect(sub_test_deployer.instance_eval { @nodes_handler.get_ssh_session_exec_of('node2') }).to eq 'false'
80
+ ssh_transforms = test_instance.instance_eval { @config.ssh_connection_transforms }
81
+ expect(ssh_transforms.size).to eq 1
82
+ expect(ssh_transforms[0][:nodes_selectors_stack]).to eq [%w[node2]]
83
+ end
84
+ end
85
+ end
86
+
87
+ it 'gives a new test instance ready to be used in place of the node without sudo specificities' do
88
+ with_test_platform(
89
+ {
90
+ nodes: {
91
+ 'node1' => { meta: { host_ip: '192.168.42.1' } },
92
+ 'node2' => { meta: { host_ip: '192.168.42.2' } }
93
+ }
94
+ },
95
+ false,
96
+ '
97
+ for_nodes(%w[node1 node2]) do
98
+ sudo_for { |user| "other_sudo --user #{user}" }
99
+ end
100
+ '
101
+ ) do |repository|
102
+ register_plugins(:provisioner, { test_provisioner: HybridPlatformsConductorTest::TestProvisioner })
103
+ File.write("#{test_config.hybrid_platforms_dir}/dummy_secrets.json", '{}')
104
+ HybridPlatformsConductorTest::TestProvisioner.mocked_states = %i[created created running exited]
105
+ HybridPlatformsConductorTest::TestProvisioner.mocked_ip = '172.17.0.1'
106
+ expect(Socket).to receive(:tcp).with('172.17.0.1', 22, { connect_timeout: 1 }) do |&block|
107
+ block.call
108
+ end
109
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node1', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
110
+ sudo_procs = test_instance.instance_eval { @config.sudo_procs }
111
+ expect(sudo_procs.size).to eq 1
112
+ expect(sudo_procs[0][:nodes_selectors_stack]).to eq [%w[node2]]
113
+ end
114
+ end
115
+ end
116
+
53
117
  it 'does not destroy instances when asked to reuse' do
54
118
  with_test_platform(
55
119
  nodes: { 'node' => { meta: { host_ip: '192.168.42.42' } } }
@@ -62,8 +126,8 @@ describe HybridPlatformsConductor::Deployer do
62
126
  block.call
63
127
  end
64
128
  provisioner = nil
65
- test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner', reuse_instance: true) do |test_deployer, test_instance|
66
- expect(test_deployer.local_environment).to eq true
129
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner', reuse_instance: true) do |sub_test_deployer, test_instance|
130
+ expect(sub_test_deployer.local_environment).to eq true
67
131
  provisioner = test_instance
68
132
  expect(test_instance.node).to eq 'node'
69
133
  expect(test_instance.environment).to eq "#{`whoami`.strip}_hpc_testing_provisioner"
@@ -84,8 +148,8 @@ describe HybridPlatformsConductor::Deployer do
84
148
  block.call
85
149
  end
86
150
  provisioner = nil
87
- test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner', reuse_instance: true) do |test_deployer, test_instance|
88
- expect(test_deployer.local_environment).to eq true
151
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner', reuse_instance: true) do |sub_test_deployer, test_instance|
152
+ expect(sub_test_deployer.local_environment).to eq true
89
153
  provisioner = test_instance
90
154
  expect(test_instance.node).to eq 'node'
91
155
  expect(test_instance.environment).to eq "#{`whoami`.strip}_hpc_testing_provisioner"
@@ -102,7 +166,7 @@ describe HybridPlatformsConductor::Deployer do
102
166
  File.write("#{test_config.hybrid_platforms_dir}/dummy_secrets.json", '{}')
103
167
  HybridPlatformsConductorTest::TestProvisioner.mocked_states = %i[created created created exited exited]
104
168
  expect do
105
- test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |test_deployer, test_instance|
169
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
106
170
  end
107
171
  end.to raise_error /\[ node\/#{Regexp.escape(`whoami`.strip)}_hpc_testing_provisioner_\d+_\d+_\w+ \] - Instance fails to be in a state among \(running\) with timeout 1\. Currently in state exited/
108
172
  end
@@ -120,7 +184,7 @@ describe HybridPlatformsConductor::Deployer do
120
184
  raise Errno::ETIMEDOUT, 'Timeout while reading from port 22'
121
185
  end
122
186
  expect do
123
- test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |test_deployer, test_instance|
187
+ test_deployer.with_test_provisioned_instance(:test_provisioner, 'node', environment: 'hpc_testing_provisioner') do |sub_test_deployer, test_instance|
124
188
  end
125
189
  end.to raise_error /\[ node\/#{Regexp.escape(`whoami`.strip)}_hpc_testing_provisioner_\d+_\d+_\w+ \] - Instance fails to have port 22 opened with timeout 1\./
126
190
  end
@@ -28,6 +28,26 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
28
28
  end
29
29
  end
30
30
 
31
+ it 'retries a few times before ending in error for a 5xx API error' do
32
+ with_sync_node do
33
+ mock_proxmox(mocked_pve_nodes: [{ 'pve_node_name' => { error_strings: ['NOK: error code = 500'] * 5 } }])
34
+ result = call_reserve_proxmox_container(2, 1024, 4, config: { api_max_retries: 4 })
35
+ expect(result[:error]).not_to eq nil
36
+ expect(result[:error]).to match /Unhandled exception from reserve_proxmox_container: Proxmox API get nodes\/pve_node_name\/lxc returns NOK: error code = 500 continuously \(tried 5 times\)/
37
+ end
38
+ end
39
+
40
+ it 'retries API errors a few times until it gets resolved' do
41
+ with_sync_node do
42
+ mock_proxmox(mocked_pve_nodes: [{ 'pve_node_name' => { error_strings: ['NOK: error code = 500'] * 3 } }])
43
+ expect(call_reserve_proxmox_container(2, 1024, 4, config: { api_max_retries: 4 })).to eq(
44
+ pve_node: 'pve_node_name',
45
+ vm_id: 1000,
46
+ vm_ip: '192.168.0.100'
47
+ )
48
+ end
49
+ end
50
+
31
51
  end
32
52
 
33
53
  end
@@ -39,16 +39,7 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
39
39
  mock_proxmox_to_start_node(nbr_api_errors: 3)
40
40
  ]
41
41
  instance.create
42
- # To speed up the test, alter the wait time between retries.
43
- old_wait_secs = HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_get(:RETRY_WAIT_TIME_SECS)
44
- begin
45
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
46
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, 1)
47
- instance.start
48
- ensure
49
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
50
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, old_wait_secs)
51
- end
42
+ instance.start
52
43
  end
53
44
  end
54
45
 
@@ -58,19 +49,10 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
58
49
  # 1 - The info on existing containers
59
50
  mock_proxmox_to_get_nodes_info,
60
51
  # 2 - The start of the container - fail too many times
61
- mock_proxmox_to_start_node(nbr_api_errors: HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_get(:NBR_RETRIES_MAX), task_status: nil)
52
+ mock_proxmox_to_start_node(nbr_api_errors: 4, task_status: nil)
62
53
  ]
63
54
  instance.create
64
- # To speed up the test, alter the wait time between retries.
65
- old_wait_secs = HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_get(:RETRY_WAIT_TIME_SECS)
66
- begin
67
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
68
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, 1)
69
- expect { instance.start }.to raise_error '[ node/test ] - Proxmox API call post nodes/pve_node_name/lxc/1024/status/start [] is constantly failing. Giving up.'
70
- ensure
71
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
72
- HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, old_wait_secs)
73
- end
55
+ expect { instance.start }.to raise_error '[ node/test ] - Proxmox API call post nodes/pve_node_name/lxc/1024/status/start [] is constantly failing. Giving up.'
74
56
  end
75
57
  end
76
58
 
@@ -23,6 +23,46 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
23
23
  end
24
24
  end
25
25
 
26
+
27
+ it '' do
28
+ with_test_proxmox_platform do |instance|
29
+ mock_proxmox_calls_with [
30
+ # 1 - The info on existing containers
31
+ mock_proxmox_to_get_nodes_info,
32
+ # 2 - The start of the container - fail a few times
33
+ mock_proxmox_to_start_node(nbr_api_errors: 2)
34
+ ]
35
+ instance.create
36
+ instance.start
37
+ end
38
+ end
39
+
40
+ it 'retries calls to the API when getting back errors 5xx' do
41
+ with_test_proxmox_platform do |instance|
42
+ mock_proxmox_calls_with [
43
+ # 1 - The info on existing containers
44
+ mock_proxmox_to_get_nodes_info,
45
+ # 2 - The status of the container
46
+ mock_proxmox_to_status_node(nbr_api_errors: 3)
47
+ ]
48
+ instance.create
49
+ expect(instance.state).to eq :created
50
+ end
51
+ end
52
+
53
+ it 'fails to get an instance\'s status when the Proxmox API fails too many times' do
54
+ with_test_proxmox_platform do |instance|
55
+ mock_proxmox_calls_with [
56
+ # 1 - The info on existing containers
57
+ mock_proxmox_to_get_nodes_info,
58
+ # 2 - The status of the container
59
+ mock_proxmox_to_status_node(nbr_api_errors: 4, status: nil)
60
+ ]
61
+ instance.create
62
+ expect { instance.state }.to raise_error '[ node/test ] - Proxmox API call get nodes/pve_node_name/lxc returns NOK: error code = 500 continuously (tried 4 times)'
63
+ end
64
+ end
65
+
26
66
  end
27
67
 
28
68
  end
@@ -69,7 +69,7 @@ describe HybridPlatformsConductor::TestsRunner do
69
69
  'node12' => { 'test_node12.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node12', stdout, stderr, exit_code] } },
70
70
  'node21' => { 'test_node21.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node21', stdout, stderr, exit_code] } },
71
71
  'node22' => { 'test_node22.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node22', stdout, stderr, exit_code] } }
72
- }}
72
+ } }
73
73
  expect(test_tests_runner.run_tests([{ all: true }])).to eq 0
74
74
  expect(ssh_executions.sort).to eq [
75
75
  ['node11', ['stdout11'], ['stderr11'], 0],
@@ -88,7 +88,7 @@ describe HybridPlatformsConductor::TestsRunner do
88
88
  HybridPlatformsConductorTest::TestPlugins::NodeSsh.node_tests = { node_ssh_test: {
89
89
  'node12' => { 'test_node12.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node12', stdout, stderr, exit_code] } },
90
90
  'node22' => { 'test_node22.sh' => proc { |stdout, stderr, exit_code| ssh_executions << ['node22', stdout, stderr, exit_code] } }
91
- }}
91
+ } }
92
92
  expect(test_tests_runner.run_tests(%w[node12 node22])).to eq 0
93
93
  expect(ssh_executions.sort).to eq [
94
94
  ['node12', ['stdout12'], ['stderr12'], 0],
@@ -97,6 +97,19 @@ describe HybridPlatformsConductor::TestsRunner do
97
97
  end
98
98
  end
99
99
 
100
+ it 'does not execute anything when the tests report no command' do
101
+ with_test_platform_for_node_connection_tests do
102
+ test_tests_runner.tests = [:node_ssh_test]
103
+ ssh_executions = []
104
+ HybridPlatformsConductorTest::TestPlugins::NodeSsh.node_tests = { node_ssh_test: {
105
+ 'node12' => {},
106
+ 'node22' => {}
107
+ } }
108
+ expect(test_tests_runner.run_tests(%w[node12 node22])).to eq 0
109
+ expect(ssh_executions).to eq []
110
+ end
111
+ end
112
+
100
113
  it 'executes several SSH node tests once per node with the correct command, grouping commands' do
101
114
  with_test_platform_for_node_connection_tests do
102
115
  expect_actions_executor_runs([proc do |actions|
@@ -89,7 +89,11 @@ module HybridPlatformsConductorTest
89
89
  remaining_expected_commands.select do |(_expected_command, _command_code, options)|
90
90
  !options[:optional]
91
91
  end
92
- ).to eq([]), "Expected CmdRunner commands were not run:\n#{remaining_expected_commands.map(&:first).join("\n")}"
92
+ ).to eq([]), "Expected CmdRunner commands were not run:\n#{
93
+ remaining_expected_commands.map do |(expected_command, _command_code, options)|
94
+ "#{options[:optional] ? '[Optional] ' : ''}#{expected_command}"
95
+ end.join("\n")
96
+ }"
93
97
  # Un-mock the command runner
94
98
  allow(cmd_runner).to receive(:run_cmd).and_call_original
95
99
  end
@@ -15,6 +15,7 @@ module HybridPlatformsConductorTest
15
15
  # * *times* (Integer): Number of times this connection should be used [default: 1]
16
16
  # * *control_master_create_error* (String or nil): Error to simulate during the SSH ControlMaster creation, or nil for none [default: nil]
17
17
  # * *with_control_master_create* (Boolean): Do we create the control master? [default: true]
18
+ # * *with_control_master_create_optional* (Boolean): If true, then consider the ControlMaster creation to be optional [default: false]
18
19
  # * *with_control_master_check* (Boolean): Do we check the control master? [default: false]
19
20
  # * *with_control_master_destroy* (Boolean): Do we destroy the control master? [default: true]
20
21
  # * *with_control_master_destroy_optional* (Boolean): If true, then consider the ControlMaster destruction to be optional [default: false]
@@ -26,6 +27,7 @@ module HybridPlatformsConductorTest
26
27
  def ssh_expected_commands_for(
27
28
  nodes_connections,
28
29
  with_control_master_create: true,
30
+ with_control_master_create_optional: false,
29
31
  with_control_master_check: false,
30
32
  with_control_master_destroy: true,
31
33
  with_control_master_destroy_optional: false,
@@ -52,13 +54,15 @@ module HybridPlatformsConductorTest
52
54
  if with_session_exec
53
55
  /^.+\/ssh #{with_batch_mode ? '-o BatchMode=yes ' : ''}-o ControlMaster=yes -o ControlPersist=yes hpc\.#{Regexp.escape(node)} true$/
54
56
  else
55
- # Mock the user hitting enter as the Control Master will be created in another thread and the main thread waits for user input.
56
- expect($stdin).to receive(:gets) do
57
- # We have to wait for the Control Master creation thread to actually create the Control Master before hitting Enter.
58
- while !control_master_created do
59
- sleep 0.1
57
+ unless ENV['hpc_interactive'] == 'false'
58
+ # Mock the user hitting enter as the Control Master will be created in another thread and the main thread waits for user input.
59
+ expect($stdin).to receive(:gets) do
60
+ # We have to wait for the Control Master creation thread to actually create the Control Master before hitting Enter.
61
+ while !control_master_created do
62
+ sleep 0.1
63
+ end
64
+ "\n"
60
65
  end
61
- "\n"
62
66
  end
63
67
  /^xterm -e '.+\/ssh -o ControlMaster=yes -o ControlPersist=yes hpc\.#{Regexp.escape(node)}'$/
64
68
  end,
@@ -78,7 +82,8 @@ module HybridPlatformsConductorTest
78
82
  else
79
83
  [255, '', node_connection_info[:control_master_create_error]]
80
84
  end
81
- end
85
+ end,
86
+ { optional: with_control_master_create_optional }
82
87
  ]
83
88
  end
84
89
  if with_control_master_check
@@ -23,6 +23,8 @@ module HybridPlatformsConductorTest
23
23
  test_platform path: '#{repository}'
24
24
  proxmox(
25
25
  api_url: 'https://my-proxmox.my-domain.com:8006',
26
+ api_max_retries: 3,
27
+ api_wait_between_retries_secs: 0,
26
28
  sync_node: 'node',
27
29
  test_config: {
28
30
  pve_nodes: ['pve_node_name'],
@@ -75,12 +77,20 @@ module HybridPlatformsConductorTest
75
77
  # * *proxmox_password* (String or nil): Proxmox password used to connect to Proxmox API [default: nil]
76
78
  # * *proxmox_realm* (String or nil): Proxmox realm used to connect to Proxmox API [default: 'pam']
77
79
  # * *nodes_info* (Array<Hash>): Nodes info returned by the Proxmox API [default: []]
80
+ # * *nbr_api_errors* (Integer): Number of API errors 500 to mock before getting a successful query [defaults: 0]
78
81
  # * *extra_expects* (Proc or nil): Code called for additional expectations on the proxmox instance, or nil if none [default: nil]
79
82
  # * Parameters::
80
83
  # * *proxmox* (Double): The mocked Proxmox instance
81
84
  # Result::
82
85
  # * Proc: Code called in place of Proxmox.new. Signature is the same as Proxmox.new.
83
- def mock_proxmox_to_get_nodes_info(proxmox_user: nil, proxmox_password: nil, proxmox_realm: 'pam', nodes_info: [], extra_expects: nil)
86
+ def mock_proxmox_to_get_nodes_info(
87
+ proxmox_user: nil,
88
+ proxmox_password: nil,
89
+ proxmox_realm: 'pam',
90
+ nodes_info: [],
91
+ nbr_api_errors: 0,
92
+ extra_expects: nil
93
+ )
84
94
  proc do |url, pve_node, user, password, realm, options|
85
95
  expect(url).to eq 'https://my-proxmox.my-domain.com:8006/api2/json/'
86
96
  expect(pve_node).to eq 'my-proxmox'
@@ -97,8 +107,10 @@ module HybridPlatformsConductorTest
97
107
  # Nothing
98
108
  end
99
109
  # Mock checking existing nodes
100
- expect(proxmox).to receive(:get).with('nodes') do
101
- nodes_info
110
+ idx_try = 0
111
+ expect(proxmox).to receive(:get).exactly(nbr_api_errors + 1).times.with('nodes') do
112
+ idx_try += 1
113
+ idx_try <= nbr_api_errors ? 'NOK: error code = 500' : nodes_info
102
114
  end
103
115
  extra_expects.call(proxmox) unless extra_expects.nil?
104
116
  proxmox
@@ -243,13 +255,15 @@ module HybridPlatformsConductorTest
243
255
  # Parameters::
244
256
  # * *proxmox_user* (String or nil): Proxmox user used to connect to Proxmox API [default: nil]
245
257
  # * *proxmox_password* (String or nil): Proxmox password used to connect to Proxmox API [default: nil]
246
- # * *status* (String): Mocked status [default: 'created']
258
+ # * *status* (String or nil): Mocked status, or nil if it should not be asked [default: 'created']
259
+ # * *nbr_api_errors* (Integer): Number of API errors 500 to mock before getting a successful query [defaults: 0]
247
260
  # Result::
248
261
  # * Proc: Code called in place of Proxmox.new. Signature is the same as Proxmox.new.
249
262
  def mock_proxmox_to_status_node(
250
263
  proxmox_user: nil,
251
264
  proxmox_password: nil,
252
- task_status: 'OK'
265
+ status: 'created',
266
+ nbr_api_errors: 0
253
267
  )
254
268
  proc do |url, pve_node, user, password, realm, options|
255
269
  expect(url).to eq 'https://my-proxmox.my-domain.com:8006/api2/json/'
@@ -267,17 +281,25 @@ module HybridPlatformsConductorTest
267
281
  # Nothing
268
282
  end
269
283
  # Mock getting status of a container
270
- expect(proxmox).to receive(:get).with('nodes/pve_node_name/lxc') do
271
- [
284
+ idx_try = 0
285
+ expect(proxmox).to receive(:get).exactly(nbr_api_errors + (status.nil? ? 0 : 1)).times.with('nodes/pve_node_name/lxc') do
286
+ idx_try += 1
287
+ if idx_try <= nbr_api_errors
288
+ 'NOK: error code = 500'
289
+ else
290
+ [
291
+ {
292
+ 'vmid' => '1024'
293
+ }
294
+ ]
295
+ end
296
+ end
297
+ unless status.nil?
298
+ expect(proxmox).to receive(:get).with('nodes/pve_node_name/lxc/1024/status/current') do
272
299
  {
273
- 'vmid' => '1024'
300
+ 'status' => status
274
301
  }
275
- ]
276
- end
277
- expect(proxmox).to receive(:get).with('nodes/pve_node_name/lxc/1024/status/current') do
278
- {
279
- 'status' => 'created'
280
- }
302
+ end
281
303
  end
282
304
  proxmox
283
305
  end
@@ -548,13 +570,17 @@ module HybridPlatformsConductorTest
548
570
  ]
549
571
  when /^nodes\/([^\/]+)\/lxc$/
550
572
  pve_node_name = $1
551
- pve_nodes[pve_node_name][:lxc_containers].map do |vm_id, vm_info|
552
- {
553
- 'vmid' => vm_id.to_s,
554
- 'maxdisk' => vm_info[:maxdisk],
555
- 'maxmem' => vm_info[:maxmem],
556
- 'cpus' => vm_info[:cpus]
557
- }
573
+ if pve_nodes[pve_node_name][:error_strings].nil? || pve_nodes[pve_node_name][:error_strings].empty?
574
+ pve_nodes[pve_node_name][:lxc_containers].map do |vm_id, vm_info|
575
+ {
576
+ 'vmid' => vm_id.to_s,
577
+ 'maxdisk' => vm_info[:maxdisk],
578
+ 'maxmem' => vm_info[:maxmem],
579
+ 'cpus' => vm_info[:cpus]
580
+ }
581
+ end
582
+ else
583
+ pve_nodes[pve_node_name][:error_strings].shift
558
584
  end
559
585
  when /^nodes\/([^\/]+)\/lxc\/([^\/]+)\/config$/
560
586
  pve_node_name = $1
@@ -642,14 +668,26 @@ module HybridPlatformsConductorTest
642
668
  # * *wait_before_retry* (Integer): Specify the number of seconds to wait before retry [default: 0]
643
669
  # * *create* (Hash or nil): Create file content, or nil if none [default: nil]
644
670
  # * *destroy* (Hash or nil): Destroy file content, or nil if none [default: nil]
671
+ # * *api_max_retries* (Integer): Max number of API retries [default: 3]
672
+ # * *api_wait_between_retries_secs* (Integer): Number of seconds to wait between API retries [default: 0]
645
673
  # Result::
646
674
  # * Hash: JSON result of the call
647
- def call_reserve_proxmox_container_with(config: {}, max_retries: 1, wait_before_retry: 0, create: nil, destroy: nil)
675
+ def call_reserve_proxmox_container_with(
676
+ config: {},
677
+ max_retries: 1,
678
+ wait_before_retry: 0,
679
+ create: nil,
680
+ destroy: nil,
681
+ api_max_retries: 3,
682
+ api_wait_between_retries_secs: 0
683
+ )
648
684
  # Make sure we set default values in the config
649
685
  config = {
650
686
  proxmox_api_url: 'https://my-proxmox.my-domain.com:8006',
651
687
  futex_file: "#{@repository}/proxmox/allocations.futex",
652
688
  logs_dir: "#{Dir.tmpdir}/hpc_test_proxmox_waiter_logs",
689
+ api_max_retries: api_max_retries,
690
+ api_wait_between_retries_secs: api_wait_between_retries_secs,
653
691
  pve_nodes: ['pve_node_name'],
654
692
  vm_ips_list: %w[
655
693
  192.168.0.100
@@ -716,7 +754,14 @@ module HybridPlatformsConductorTest
716
754
  # * *wait_before_retry* (Integer): Specify the number of seconds to wait before retry [default: 0]
717
755
  # Result::
718
756
  # * Hash: JSON result of the call
719
- def call_reserve_proxmox_container(cpus, ram_mb, disk_gb, config: {}, max_retries: 1, wait_before_retry: 0)
757
+ def call_reserve_proxmox_container(
758
+ cpus,
759
+ ram_mb,
760
+ disk_gb,
761
+ config: {},
762
+ max_retries: 1,
763
+ wait_before_retry: 0
764
+ )
720
765
  call_reserve_proxmox_container_with(
721
766
  config: config,
722
767
  max_retries: max_retries,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hybrid_platforms_conductor
3
3
  version: !ruby/object:Gem::Version
4
- version: 32.7.2
4
+ version: 32.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Muriel Salvan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-12 00:00:00.000000000 Z
11
+ date: 2021-03-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: range_operators