hybrid_platforms_conductor 32.8.2 → 32.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/hybrid_platforms_conductor/common_config_dsl/idempotence_tests.rb +23 -1
- data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/proxmox.rb +50 -18
- data/lib/hybrid_platforms_conductor/hpc_plugins/provisioner/proxmox/proxmox_waiter.rb +45 -3
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/check_deploy_and_idempotence.rb +4 -1
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/divergence.rb +16 -1
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/executables.rb +27 -13
- data/lib/hybrid_platforms_conductor/hpc_plugins/test/idempotence.rb +4 -1
- data/lib/hybrid_platforms_conductor/version.rb +1 -1
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/retries_spec.rb +64 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/reserve_proxmox_container/vm_ids_assignment_spec.rb +92 -0
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/start_spec.rb +3 -21
- data/spec/hybrid_platforms_conductor_test/api/deployer/provisioners/proxmox/state_spec.rb +26 -0
- data/spec/hybrid_platforms_conductor_test/helpers/provisioner_proxmox_helpers.rb +107 -25
- metadata +12 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2d3d8ff58b63ff1dbcadaeba8bea4c5e6469ace3ef68d9b39ab1eabc8f68a209
|
4
|
+
data.tar.gz: 3b02a1707c32c436e6d353775e38d116bba42e16cc35a2854a69f052377c85a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f32880a9ab54d037caf3f9400bac31fcbc8993618c0859d22ff4d07585b8962683e295ee4a09847606a377aa343c78b09cddbc20026e34baeb03265409c66271
|
7
|
+
data.tar.gz: 2545b32a9aa07ea5cad6a01ba418d19eaa2ce28dcdd66146af07b98de6da44c30a9da8c35de33a4b054078f80f8813ceb05c12899b2f0c323b59ef383dc267da
|
@@ -11,13 +11,24 @@ module HybridPlatformsConductor
|
|
11
11
|
# Array< Hash<Symbol, Object> >
|
12
12
|
attr_reader :ignored_idempotence_tasks
|
13
13
|
|
14
|
-
#
|
14
|
+
# List of ignored tasks info. Each info has the following properties:
|
15
|
+
# * *nodes_selectors_stack* (Array<Object>): Stack of nodes selectors impacted by this rule
|
16
|
+
# * *ignored_tasks* (Hash<String, String>): List of task names for which we ignore divergence errors, with the corresponding descriptive reason for ignore.
|
17
|
+
# Array< Hash<Symbol, Object> >
|
18
|
+
attr_reader :ignored_divergent_tasks
|
19
|
+
|
20
|
+
# Initialize the DSL
|
15
21
|
def init_idempotence_tests
|
16
22
|
# List of ignored tasks info. Each info has the following properties:
|
17
23
|
# * *nodes_selectors_stack* (Array<Object>): Stack of nodes selectors impacted by this rule
|
18
24
|
# * *ignored_tasks* (Hash<String, String>): List of task names for which we ignore idempotence errors, with the corresponding descriptive reason for ignore.
|
19
25
|
# Array< Hash<Symbol, Object> >
|
20
26
|
@ignored_idempotence_tasks = []
|
27
|
+
# List of ignored tasks info. Each info has the following properties:
|
28
|
+
# * *nodes_selectors_stack* (Array<Object>): Stack of nodes selectors impacted by this rule
|
29
|
+
# * *ignored_tasks* (Hash<String, String>): List of task names for which we ignore divergence errors, with the corresponding descriptive reason for ignore.
|
30
|
+
# Array< Hash<Symbol, Object> >
|
31
|
+
@ignored_divergent_tasks = []
|
21
32
|
end
|
22
33
|
|
23
34
|
# Ignore idempotence errors on a set of tasks
|
@@ -31,6 +42,17 @@ module HybridPlatformsConductor
|
|
31
42
|
}
|
32
43
|
end
|
33
44
|
|
45
|
+
# Ignore idempotence errors on a set of tasks
|
46
|
+
#
|
47
|
+
# Parameters::
|
48
|
+
# * *tasks_to_ignore* (Hash<String, String>): Set of tasks to ignore, along with the reason
|
49
|
+
def ignore_divergent_tasks(tasks_to_ignore)
|
50
|
+
@ignored_divergent_tasks << {
|
51
|
+
ignored_tasks: tasks_to_ignore,
|
52
|
+
nodes_selectors_stack: current_nodes_selectors_stack,
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
34
56
|
end
|
35
57
|
|
36
58
|
end
|
@@ -18,11 +18,19 @@ module HybridPlatformsConductor
|
|
18
18
|
attr_accessor *%i[logger logger_stderr]
|
19
19
|
|
20
20
|
def check_response(response)
|
21
|
-
|
22
|
-
|
21
|
+
msg = "Response from Proxmox API: #{response} - #{response.net_http_res.message}"
|
22
|
+
log_debug msg
|
23
|
+
log_warn msg if response.code >= 400 && !log_debug?
|
23
24
|
super
|
24
25
|
end
|
25
26
|
|
27
|
+
# Re-authenticate the Proxmox instance
|
28
|
+
# This can be useful when the API returns errors due to invalidated tokens
|
29
|
+
def reauthenticate
|
30
|
+
log_debug 'Force re-authentication to Proxmox'
|
31
|
+
@auth_params = create_ticket
|
32
|
+
end
|
33
|
+
|
26
34
|
end
|
27
35
|
::Proxmox::Proxmox.prepend ProxmoxPatches
|
28
36
|
|
@@ -74,13 +82,13 @@ module HybridPlatformsConductor
|
|
74
82
|
# First check if we already have a test container that corresponds to this node and environment
|
75
83
|
@lxc_details = nil
|
76
84
|
with_proxmox do |proxmox|
|
77
|
-
proxmox
|
85
|
+
proxmox_get(proxmox, 'nodes').each do |node_info|
|
78
86
|
if proxmox_test_info[:test_config][:pve_nodes].include?(node_info['node']) && node_info['status'] == 'online'
|
79
|
-
proxmox
|
87
|
+
proxmox_get(proxmox, "nodes/#{node_info['node']}/lxc").each do |lxc_info|
|
80
88
|
vm_id = Integer(lxc_info['vmid'])
|
81
89
|
if vm_id.between?(*proxmox_test_info[:test_config][:vm_ids_range])
|
82
90
|
# Check if the description contains our ID
|
83
|
-
lxc_config = proxmox
|
91
|
+
lxc_config = proxmox_get(proxmox, "nodes/#{node_info['node']}/lxc/#{vm_id}/config")
|
84
92
|
vm_description_lines = (lxc_config['description'] || '').split("\n")
|
85
93
|
hpc_marker_idx = vm_description_lines.index('===== HPC info =====')
|
86
94
|
unless hpc_marker_idx.nil?
|
@@ -222,8 +230,8 @@ module HybridPlatformsConductor
|
|
222
230
|
with_proxmox do |proxmox|
|
223
231
|
vm_id_str = @lxc_details[:vm_id].to_s
|
224
232
|
status =
|
225
|
-
if proxmox
|
226
|
-
status_info = proxmox
|
233
|
+
if proxmox_get(proxmox, "nodes/#{@lxc_details[:pve_node]}/lxc").any? { |data_info| data_info['vmid'] == vm_id_str }
|
234
|
+
status_info = proxmox_get(proxmox, "nodes/#{@lxc_details[:pve_node]}/lxc/#{@lxc_details[:vm_id]}/status/current")
|
227
235
|
# Careful that it is possible that somebody destroyed the VM and so its status is missing
|
228
236
|
status = status_info.key?('status') ? status_info['status'].to_sym : :missing
|
229
237
|
status = :exited if status == :stopped
|
@@ -292,11 +300,29 @@ module HybridPlatformsConductor
|
|
292
300
|
end
|
293
301
|
end
|
294
302
|
|
295
|
-
#
|
296
|
-
|
297
|
-
|
298
|
-
#
|
299
|
-
|
303
|
+
# Perform a get operation on the API
|
304
|
+
# Protect the get API methods with a retry mechanism in case of 5xx errors.
|
305
|
+
#
|
306
|
+
# Parameters::
|
307
|
+
# * *proxmox* (Proxmox): The Proxmox instance
|
308
|
+
# * *path* (String): Path to get
|
309
|
+
# Result::
|
310
|
+
# * Object: API response
|
311
|
+
def proxmox_get(proxmox, path)
|
312
|
+
response = nil
|
313
|
+
idx_try = 0
|
314
|
+
loop do
|
315
|
+
response = proxmox.get(path)
|
316
|
+
break if !(response.is_a?(String)) || !(response =~ /^NOK: error code = 5\d\d$/)
|
317
|
+
log_warn "[ #{@node}/#{@environment} ] - Proxmox API call get #{path} returned error #{response} (attempt ##{idx_try}/#{proxmox_test_info[:api_max_retries]})"
|
318
|
+
raise "[ #{@node}/#{@environment} ] - Proxmox API call get #{path} returns #{response} continuously (tried #{idx_try + 1} times)" if idx_try >= proxmox_test_info[:api_max_retries]
|
319
|
+
idx_try += 1
|
320
|
+
# We have to reauthenticate: error 500 raised by Proxmox are often due to token being invalidated wrongly
|
321
|
+
proxmox.reauthenticate
|
322
|
+
sleep proxmox_test_info[:api_wait_between_retries_secs] + rand(5)
|
323
|
+
end
|
324
|
+
response
|
325
|
+
end
|
300
326
|
|
301
327
|
# Run a Proxmox task.
|
302
328
|
# Handle a retry mechanism in case of 5xx errors.
|
@@ -313,11 +339,13 @@ module HybridPlatformsConductor
|
|
313
339
|
while task.nil? do
|
314
340
|
task = proxmox.send(http_method, "nodes/#{pve_node}/#{sub_path}", *args)
|
315
341
|
if task =~ /^NOK: error code = 5\d\d$/
|
316
|
-
log_warn "[ #{@node}/#{@environment} ] - Proxmox API call #{http_method} nodes/#{pve_node}/#{sub_path} #{args} returned error #{task} (attempt ##{idx_try}/#{
|
342
|
+
log_warn "[ #{@node}/#{@environment} ] - Proxmox API call #{http_method} nodes/#{pve_node}/#{sub_path} #{args} returned error #{task} (attempt ##{idx_try}/#{proxmox_test_info[:api_max_retries]})"
|
317
343
|
task = nil
|
344
|
+
break if idx_try >= proxmox_test_info[:api_max_retries]
|
318
345
|
idx_try += 1
|
319
|
-
|
320
|
-
|
346
|
+
# We have to reauthenticate: error 500 raised by Proxmox are often due to token being invalidated wrongly
|
347
|
+
proxmox.reauthenticate
|
348
|
+
sleep proxmox_test_info[:api_wait_between_retries_secs] + rand(5)
|
321
349
|
end
|
322
350
|
end
|
323
351
|
if task.nil?
|
@@ -358,7 +386,7 @@ module HybridPlatformsConductor
|
|
358
386
|
# Result::
|
359
387
|
# * String: The task status
|
360
388
|
def task_status(proxmox, pve_node, task)
|
361
|
-
status_info = proxmox
|
389
|
+
status_info = proxmox_get(proxmox, "nodes/#{pve_node}/tasks/#{task}/status")
|
362
390
|
"#{status_info['status']}#{status_info['exitstatus'] ? ":#{status_info['exitstatus']}" : ''}"
|
363
391
|
end
|
364
392
|
|
@@ -377,7 +405,9 @@ module HybridPlatformsConductor
|
|
377
405
|
(proxmox_test_info[:test_config].merge(
|
378
406
|
proxmox_api_url: proxmox_test_info[:api_url],
|
379
407
|
futex_file: '/tmp/hpc_proxmox_allocations.futex',
|
380
|
-
logs_dir: '/tmp/hpc_proxmox_waiter_logs'
|
408
|
+
logs_dir: '/tmp/hpc_proxmox_waiter_logs',
|
409
|
+
api_max_retries: proxmox_test_info[:api_max_retries],
|
410
|
+
api_wait_between_retries_secs: proxmox_test_info[:api_wait_between_retries_secs]
|
381
411
|
)).to_json
|
382
412
|
)
|
383
413
|
result = nil
|
@@ -486,7 +516,7 @@ module HybridPlatformsConductor
|
|
486
516
|
# So remaining length is 255 - 13 = 242 characters.
|
487
517
|
MAX_FILE_ID_SIZE = 242
|
488
518
|
|
489
|
-
# Get an ID unique for
|
519
|
+
# Get an ID unique for this node/environment and that can be used in file names.
|
490
520
|
#
|
491
521
|
# Result::
|
492
522
|
# * String: ID
|
@@ -506,6 +536,8 @@ module HybridPlatformsConductor
|
|
506
536
|
# Result::
|
507
537
|
# * Hash<Symbol,Object>: Configuration of the Proxmox instance to be used:
|
508
538
|
# * *api_url* (String): The Proxmox API URL
|
539
|
+
# * *api_max_retries* (Integer): Max number of API retries
|
540
|
+
# * *api_wait_between_retries_secs* (Integer): Number of seconds to wait between API retries
|
509
541
|
# * *sync_node* (String): Node to be used to synchronize Proxmox resources acquisition
|
510
542
|
# * *test_config* (Hash<Symbol,Object>): The test configuration. Check ProxmoxWaiter#initialize (config_file structure) method to get details.
|
511
543
|
# * *vm_config* (Hash<Symbol,Object>): Extra configuration of a created container. Check #request_lxc_creation_for results to get details.
|
@@ -26,6 +26,8 @@ class ProxmoxWaiter
|
|
26
26
|
# * *proxmox_api_url* (String): Proxmox API URL.
|
27
27
|
# * *futex_file* (String): Path to the file serving as a futex.
|
28
28
|
# * *logs_dir* (String): Path to the directory containing logs [default: '.']
|
29
|
+
# * *api_max_retries* (Integer): Max number of API retries
|
30
|
+
# * *api_wait_between_retries_secs* (Integer): Number of seconds to wait between API retries
|
29
31
|
# * *pve_nodes* (Array<String>): List of PVE nodes allowed to spawn new containers [default: all]
|
30
32
|
# * *vm_ips_list* (Array<String>): The list of IPs that are available for the Proxomx containers.
|
31
33
|
# * *vm_ids_range* ([Integer, Integer]): Minimum and maximum reservable VM ID
|
@@ -603,10 +605,35 @@ class ProxmoxWaiter
|
|
603
605
|
# Result::
|
604
606
|
# * Array<Integer>: List of available VM IDs
|
605
607
|
def free_vm_ids
|
606
|
-
Range.new(*@config['vm_ids_range']).to_a -
|
608
|
+
vm_ids = Range.new(*@config['vm_ids_range']).to_a -
|
607
609
|
api_get('nodes').map do |pve_node_info|
|
608
610
|
api_get("nodes/#{pve_node_info['node']}/lxc").map { |lxc_info| Integer(lxc_info['vmid']) }
|
609
611
|
end.flatten
|
612
|
+
# Make sure the vm_ids that are available don't have any leftovers in the cgroups.
|
613
|
+
# This can happen with some Proxmox bugs, and make the API returns 500 errors.
|
614
|
+
# cf. https://forum.proxmox.com/threads/lxc-console-cleanup-error.38293/
|
615
|
+
# TODO: Remove this when Proxmox will have solved the issue with leftovers of destroyed vms.
|
616
|
+
(vm_ids.map(&:to_s) & vm_ids_in_cgroups).each do |vm_id_str|
|
617
|
+
# We are having a vm_id that is available but still has some leftovers in cgroups.
|
618
|
+
# Clean those to avoid 500 errors in API.
|
619
|
+
log "Found VMID #{vm_id_str} with leftovers in cgroups. Cleaning those."
|
620
|
+
Dir.glob("/sys/fs/cgroup/*/lxc/#{vm_id_str}") do |cgroup_dir|
|
621
|
+
log "Removing #{cgroup_dir}"
|
622
|
+
FileUtils.rm_rf cgroup_dir
|
623
|
+
end
|
624
|
+
end
|
625
|
+
vm_ids
|
626
|
+
end
|
627
|
+
|
628
|
+
# Return the list of VM IDs present in cgroups
|
629
|
+
#
|
630
|
+
# Result::
|
631
|
+
# * Array<String>: List of VM IDs as strings (as some are not Integers like '1010-1')
|
632
|
+
def vm_ids_in_cgroups
|
633
|
+
Dir.glob('/sys/fs/cgroup/*/lxc/*').map do |file|
|
634
|
+
basename = File.basename(file)
|
635
|
+
basename =~ /^\d.+$/ ? basename : nil
|
636
|
+
end.compact.sort.uniq
|
610
637
|
end
|
611
638
|
|
612
639
|
# Wait for a given Proxmox task completion
|
@@ -637,11 +664,26 @@ class ProxmoxWaiter
|
|
637
664
|
|
638
665
|
# Get a path from the API it returns its JSON result.
|
639
666
|
# Keep a cache of it, whose lifespan is this ProxmoxWaiter instance.
|
667
|
+
# Have a retry mechanism to make sure eventual non-deterministic 5xx errors are not an issue.
|
640
668
|
#
|
641
669
|
# Parameters::
|
642
670
|
# * *path* (String): API path to query
|
643
|
-
|
644
|
-
|
671
|
+
# Result::
|
672
|
+
# * Object: The API response
|
673
|
+
def api_get(path, nbr_retries: 3, wait_between_retry_secs: 10)
|
674
|
+
unless @gets_cache.key?(path)
|
675
|
+
idx_try = 0
|
676
|
+
loop do
|
677
|
+
@gets_cache[path] = @proxmox.get(path)
|
678
|
+
break unless @gets_cache[path].is_a?(String) && @gets_cache[path] =~ /^NOK: error code = 5\d\d$/
|
679
|
+
raise "Proxmox API get #{path} returns #{@gets_cache[path]} continuously (tried #{idx_try + 1} times)" if idx_try >= @config['api_max_retries']
|
680
|
+
idx_try += 1
|
681
|
+
# We have to reauthenticate: error 500 raised by Proxmox are often due to token being invalidated wrongly
|
682
|
+
# TODO: Provide a way to do it properly in the official gem
|
683
|
+
@proxmox.instance_variable_set(:@auth_params, @proxmox.send(:create_ticket))
|
684
|
+
sleep @config['api_wait_between_retries_secs']
|
685
|
+
end
|
686
|
+
end
|
645
687
|
@gets_cache[path]
|
646
688
|
end
|
647
689
|
|
@@ -81,7 +81,10 @@ module HybridPlatformsConductor
|
|
81
81
|
exit_status, stdout, stderr = deployer.deploy_on(@node)[@node]
|
82
82
|
assert_equal exit_status, 0, "Check-node after deployment returned error code #{exit_status}", log_debug? ? nil : deployer.stdouts_to_s
|
83
83
|
# Check that the output of the check-node returns no changes.
|
84
|
-
ignored_tasks =
|
84
|
+
ignored_tasks = (
|
85
|
+
@nodes_handler.select_confs_for_node(@node, @config.ignored_idempotence_tasks) +
|
86
|
+
@nodes_handler.select_confs_for_node(@node, @config.ignored_divergent_tasks)
|
87
|
+
).inject({}) do |merged_ignored_tasks, conf|
|
85
88
|
merged_ignored_tasks.merge(conf[:ignored_tasks])
|
86
89
|
end
|
87
90
|
@deployer.parse_deploy_output(@node, stdout, stderr).each do |task_info|
|
@@ -11,8 +11,23 @@ module HybridPlatformsConductor
|
|
11
11
|
|
12
12
|
# Check my_test_plugin.rb.sample documentation for signature details.
|
13
13
|
def test_on_check_node(stdout, stderr, exit_status)
|
14
|
+
# Check that the output of the check-node returns no changes.
|
15
|
+
ignored_tasks = @nodes_handler.select_confs_for_node(@node, @config.ignored_divergent_tasks).inject({}) do |merged_ignored_tasks, conf|
|
16
|
+
merged_ignored_tasks.merge(conf[:ignored_tasks])
|
17
|
+
end
|
14
18
|
@deployer.parse_deploy_output(@node, stdout, stderr).each do |task_info|
|
15
|
-
|
19
|
+
if task_info[:status] == :changed
|
20
|
+
if ignored_tasks.key?(task_info[:name])
|
21
|
+
# It was expected that this task is not idempotent
|
22
|
+
log_debug "Task #{task_info[:name]} was expected to be divergent. Reason: #{ignored_tasks[task_info[:name]]}"
|
23
|
+
else
|
24
|
+
extra_details = task_info.slice(*(task_info.keys - %i[name status diffs]))
|
25
|
+
error_details = []
|
26
|
+
error_details << "----- Changes:\n#{task_info[:diffs].strip}\n-----" if task_info[:diffs]
|
27
|
+
error_details << "----- Additional details:\n#{JSON.pretty_generate(extra_details)}\n-----" unless extra_details.empty?
|
28
|
+
error "Task #{task_info[:name]} has diverged", error_details.empty? ? nil : error_details.join("\n")
|
29
|
+
end
|
30
|
+
end
|
16
31
|
end
|
17
32
|
end
|
18
33
|
|
@@ -11,24 +11,38 @@ module HybridPlatformsConductor
|
|
11
11
|
|
12
12
|
# Check my_test_plugin.rb.sample documentation for signature details.
|
13
13
|
def test
|
14
|
-
|
15
|
-
example_node = example_platform.known_nodes.first
|
16
|
-
[
|
17
|
-
"#{CmdRunner.executables_prefix}check-node --node #{example_node} --show-commands",
|
18
|
-
"#{CmdRunner.executables_prefix}deploy --node #{example_node} --show-commands --why-run",
|
14
|
+
tests = [
|
19
15
|
"#{CmdRunner.executables_prefix}dump_nodes_json --help",
|
20
16
|
"#{CmdRunner.executables_prefix}free_ips",
|
21
17
|
"#{CmdRunner.executables_prefix}free_veids",
|
22
|
-
"#{CmdRunner.executables_prefix}get_impacted_nodes --platform #{example_platform.name} --show-commands",
|
23
|
-
"#{CmdRunner.executables_prefix}last_deploys --node #{example_node} --show-commands",
|
24
|
-
"#{CmdRunner.executables_prefix}nodes_to_deploy --node #{example_node} --show-commands",
|
25
|
-
"#{CmdRunner.executables_prefix}report --node #{example_node} --format stdout",
|
26
|
-
"#{CmdRunner.executables_prefix}run --node #{example_node} --show-commands --interactive",
|
27
18
|
"#{CmdRunner.executables_prefix}setup --help",
|
28
19
|
"#{CmdRunner.executables_prefix}ssh_config",
|
29
|
-
"#{CmdRunner.executables_prefix}test --help"
|
30
|
-
|
31
|
-
|
20
|
+
"#{CmdRunner.executables_prefix}test --help"
|
21
|
+
]
|
22
|
+
example_platform = PlatformsHandler.new(
|
23
|
+
logger: @logger,
|
24
|
+
logger_stderr: @logger_stderr,
|
25
|
+
config: @config,
|
26
|
+
cmd_runner: @cmd_runner
|
27
|
+
).known_platforms.first
|
28
|
+
unless example_platform.nil?
|
29
|
+
tests.concat [
|
30
|
+
"#{CmdRunner.executables_prefix}get_impacted_nodes --platform #{example_platform.name} --show-commands",
|
31
|
+
]
|
32
|
+
example_node = example_platform.known_nodes.first
|
33
|
+
unless example_node.nil?
|
34
|
+
tests.concat [
|
35
|
+
"#{CmdRunner.executables_prefix}check-node --node #{example_node} --show-commands",
|
36
|
+
"#{CmdRunner.executables_prefix}deploy --node #{example_node} --show-commands --why-run",
|
37
|
+
"#{CmdRunner.executables_prefix}last_deploys --node #{example_node} --show-commands",
|
38
|
+
"#{CmdRunner.executables_prefix}nodes_to_deploy --node #{example_node} --show-commands",
|
39
|
+
"#{CmdRunner.executables_prefix}report --node #{example_node} --format stdout",
|
40
|
+
"#{CmdRunner.executables_prefix}run --node #{example_node} --show-commands --interactive",
|
41
|
+
"#{CmdRunner.executables_prefix}topograph --from \"--node #{example_node}\" --to \"--node #{example_node}\" --skip-run --output graphviz:graph.gv"
|
42
|
+
]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
tests.sort.each do |cmd|
|
32
46
|
log_debug "Testing #{cmd}"
|
33
47
|
exit_status, stdout, _stderr = @cmd_runner.run_cmd "#{cmd} 2>&1", no_exception: true, log_to_stdout: log_debug?
|
34
48
|
assert_equal(exit_status, 0, "Command #{cmd} returned code #{exit_status}:\n#{stdout}")
|
@@ -43,7 +43,10 @@ module HybridPlatformsConductor
|
|
43
43
|
assert_equal tested_node, @node, "Wrong node being tested: #{tested_node} should be #{@node}"
|
44
44
|
assert_equal exit_status, 0, "Check-node returned error code #{exit_status}"
|
45
45
|
# Check that the output of the check-node returns no changes.
|
46
|
-
ignored_tasks =
|
46
|
+
ignored_tasks = (
|
47
|
+
@nodes_handler.select_confs_for_node(@node, @config.ignored_idempotence_tasks) +
|
48
|
+
@nodes_handler.select_confs_for_node(@node, @config.ignored_divergent_tasks)
|
49
|
+
).inject({}) do |merged_ignored_tasks, conf|
|
47
50
|
merged_ignored_tasks.merge(conf[:ignored_tasks])
|
48
51
|
end
|
49
52
|
@deployer.parse_deploy_output(@node, stdout, stderr).each do |task_info|
|
@@ -10,6 +10,7 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
|
|
10
10
|
with_sync_node do
|
11
11
|
mock_proxmox(mocked_pve_nodes: [{ 'pve_node_name' => {} }] * 5)
|
12
12
|
expect(call_reserve_proxmox_container(2, 128 * 1024, 4, max_retries: 5)).to eq(error: 'not_enough_resources')
|
13
|
+
expect_proxmox_actions_to_be []
|
13
14
|
end
|
14
15
|
end
|
15
16
|
|
@@ -25,6 +26,69 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
|
|
25
26
|
vm_id: 1000,
|
26
27
|
vm_ip: '192.168.0.100'
|
27
28
|
)
|
29
|
+
expect_proxmox_actions_to_be [
|
30
|
+
[
|
31
|
+
:post,
|
32
|
+
'nodes/pve_node_name/lxc',
|
33
|
+
{
|
34
|
+
'ostemplate' => 'test_template.iso',
|
35
|
+
'hostname' => 'test.hostname.my-domain.com',
|
36
|
+
'description' => /node: test_node\nenvironment: test_env/,
|
37
|
+
'cores' => 2,
|
38
|
+
'cpulimit' => 2,
|
39
|
+
'memory' => 1024,
|
40
|
+
'rootfs' => 'local-lvm:4',
|
41
|
+
'net0' => 'name=eth0,bridge=vmbr0,gw=172.16.16.16,ip=192.168.0.100/32',
|
42
|
+
'vmid' => 1000
|
43
|
+
}
|
44
|
+
]
|
45
|
+
]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'retries a few times before ending in error for a 5xx API error' do
|
50
|
+
with_sync_node do
|
51
|
+
mock_proxmox(mocked_pve_nodes: [{ 'pve_node_name' => { error_strings: ['NOK: error code = 500'] * 5 } }])
|
52
|
+
result = call_reserve_proxmox_container(2, 1024, 4, config: { api_max_retries: 4 })
|
53
|
+
expect(result[:error]).not_to eq nil
|
54
|
+
expect(result[:error]).to match /Unhandled exception from reserve_proxmox_container: Proxmox API get nodes\/pve_node_name\/lxc returns NOK: error code = 500 continuously \(tried 5 times\)/
|
55
|
+
expect_proxmox_actions_to_be [
|
56
|
+
[:create_ticket],
|
57
|
+
[:create_ticket],
|
58
|
+
[:create_ticket],
|
59
|
+
[:create_ticket]
|
60
|
+
]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'retries API errors a few times until it gets resolved' do
|
65
|
+
with_sync_node do
|
66
|
+
mock_proxmox(mocked_pve_nodes: [{ 'pve_node_name' => { error_strings: ['NOK: error code = 500'] * 3 } }])
|
67
|
+
expect(call_reserve_proxmox_container(2, 1024, 4, config: { api_max_retries: 4 })).to eq(
|
68
|
+
pve_node: 'pve_node_name',
|
69
|
+
vm_id: 1000,
|
70
|
+
vm_ip: '192.168.0.100'
|
71
|
+
)
|
72
|
+
expect_proxmox_actions_to_be [
|
73
|
+
[:create_ticket],
|
74
|
+
[:create_ticket],
|
75
|
+
[:create_ticket],
|
76
|
+
[
|
77
|
+
:post,
|
78
|
+
'nodes/pve_node_name/lxc',
|
79
|
+
{
|
80
|
+
'ostemplate' => 'test_template.iso',
|
81
|
+
'hostname' => 'test.hostname.my-domain.com',
|
82
|
+
'description' => /node: test_node\nenvironment: test_env/,
|
83
|
+
'cores' => 2,
|
84
|
+
'cpulimit' => 2,
|
85
|
+
'memory' => 1024,
|
86
|
+
'rootfs' => 'local-lvm:4',
|
87
|
+
'net0' => 'name=eth0,bridge=vmbr0,gw=172.16.16.16,ip=192.168.0.100/32',
|
88
|
+
'vmid' => 1000
|
89
|
+
}
|
90
|
+
]
|
91
|
+
]
|
28
92
|
end
|
29
93
|
end
|
30
94
|
|
@@ -60,6 +60,98 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
|
|
60
60
|
end
|
61
61
|
end
|
62
62
|
|
63
|
+
it 'makes sure to remove cgroup files that are leftovers of removed containers' do
|
64
|
+
with_sync_node(leftovers: [
|
65
|
+
'/sys/fs/cgroup/memory/lxc/1003'
|
66
|
+
]) do
|
67
|
+
mock_proxmox(mocked_pve_nodes: {
|
68
|
+
'pve_node_name' => {
|
69
|
+
lxc_containers: {
|
70
|
+
1000 => { ip: '192.168.1.100' },
|
71
|
+
1001 => { ip: '192.168.1.101' }
|
72
|
+
}
|
73
|
+
}
|
74
|
+
})
|
75
|
+
expect(call_reserve_proxmox_container(2, 1024, 1, config: { vm_ids_range: [1000, 1100] })).to eq(
|
76
|
+
pve_node: 'pve_node_name',
|
77
|
+
vm_id: 1002,
|
78
|
+
vm_ip: '192.168.0.100'
|
79
|
+
)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'makes sure to remove cgroup files that are leftovers of removed containers even when they are reusing the VM ID' do
|
84
|
+
with_sync_node(leftovers: [
|
85
|
+
'/sys/fs/cgroup/memory/lxc/1002'
|
86
|
+
]) do
|
87
|
+
mock_proxmox(mocked_pve_nodes: {
|
88
|
+
'pve_node_name' => {
|
89
|
+
lxc_containers: {
|
90
|
+
1000 => { ip: '192.168.1.100' },
|
91
|
+
1001 => { ip: '192.168.1.101' }
|
92
|
+
}
|
93
|
+
}
|
94
|
+
})
|
95
|
+
expect(call_reserve_proxmox_container(2, 1024, 1, config: { vm_ids_range: [1000, 1100] })).to eq(
|
96
|
+
pve_node: 'pve_node_name',
|
97
|
+
vm_id: 1002,
|
98
|
+
vm_ip: '192.168.0.100'
|
99
|
+
)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
it 'makes sure to remove cgroup files that are leftovers of removed containers when several cgroups contain files' do
|
104
|
+
with_sync_node(leftovers: [
|
105
|
+
'/sys/fs/cgroup/memory/lxc/1003',
|
106
|
+
'/sys/fs/cgroup/network/lxc/1003',
|
107
|
+
'/sys/fs/cgroup/cpu/lxc/1003'
|
108
|
+
]) do
|
109
|
+
mock_proxmox(mocked_pve_nodes: {
|
110
|
+
'pve_node_name' => {
|
111
|
+
lxc_containers: {
|
112
|
+
1000 => { ip: '192.168.1.100' },
|
113
|
+
1001 => { ip: '192.168.1.101' }
|
114
|
+
}
|
115
|
+
}
|
116
|
+
})
|
117
|
+
expect(call_reserve_proxmox_container(2, 1024, 1, config: { vm_ids_range: [1000, 1100] })).to eq(
|
118
|
+
pve_node: 'pve_node_name',
|
119
|
+
vm_id: 1002,
|
120
|
+
vm_ip: '192.168.0.100'
|
121
|
+
)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
it 'makes sure to remove only cgroup files that are leftovers of removed containers inside our VM ID range' do
|
126
|
+
with_sync_node(
|
127
|
+
leftovers: [
|
128
|
+
'/sys/fs/cgroup/memory/lxc/100',
|
129
|
+
'/sys/fs/cgroup/memory/lxc/1001',
|
130
|
+
'/sys/fs/cgroup/memory/lxc/1002',
|
131
|
+
'/sys/fs/cgroup/memory/lxc/1003'
|
132
|
+
],
|
133
|
+
expect_remaining_leftovers: [
|
134
|
+
'/sys/fs/cgroup/memory/lxc/100',
|
135
|
+
'/sys/fs/cgroup/memory/lxc/1001'
|
136
|
+
]
|
137
|
+
) do
|
138
|
+
mock_proxmox(mocked_pve_nodes: {
|
139
|
+
'pve_node_name' => {
|
140
|
+
lxc_containers: {
|
141
|
+
100 => { ip: '192.168.1.10' },
|
142
|
+
1000 => { ip: '192.168.1.100' },
|
143
|
+
1001 => { ip: '192.168.1.101' }
|
144
|
+
}
|
145
|
+
}
|
146
|
+
})
|
147
|
+
expect(call_reserve_proxmox_container(2, 1024, 1, config: { vm_ids_range: [1000, 1100] })).to eq(
|
148
|
+
pve_node: 'pve_node_name',
|
149
|
+
vm_id: 1002,
|
150
|
+
vm_ip: '192.168.0.100'
|
151
|
+
)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
63
155
|
end
|
64
156
|
|
65
157
|
end
|
@@ -39,16 +39,7 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
|
|
39
39
|
mock_proxmox_to_start_node(nbr_api_errors: 3)
|
40
40
|
]
|
41
41
|
instance.create
|
42
|
-
|
43
|
-
old_wait_secs = HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_get(:RETRY_WAIT_TIME_SECS)
|
44
|
-
begin
|
45
|
-
HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
|
46
|
-
HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, 1)
|
47
|
-
instance.start
|
48
|
-
ensure
|
49
|
-
HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
|
50
|
-
HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, old_wait_secs)
|
51
|
-
end
|
42
|
+
instance.start
|
52
43
|
end
|
53
44
|
end
|
54
45
|
|
@@ -58,19 +49,10 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
|
|
58
49
|
# 1 - The info on existing containers
|
59
50
|
mock_proxmox_to_get_nodes_info,
|
60
51
|
# 2 - The start of the container - fail too many times
|
61
|
-
mock_proxmox_to_start_node(nbr_api_errors:
|
52
|
+
mock_proxmox_to_start_node(nbr_api_errors: 4, task_status: nil)
|
62
53
|
]
|
63
54
|
instance.create
|
64
|
-
|
65
|
-
old_wait_secs = HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_get(:RETRY_WAIT_TIME_SECS)
|
66
|
-
begin
|
67
|
-
HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
|
68
|
-
HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, 1)
|
69
|
-
expect { instance.start }.to raise_error '[ node/test ] - Proxmox API call post nodes/pve_node_name/lxc/1024/status/start [] is constantly failing. Giving up.'
|
70
|
-
ensure
|
71
|
-
HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.send(:remove_const, :RETRY_WAIT_TIME_SECS)
|
72
|
-
HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox.const_set(:RETRY_WAIT_TIME_SECS, old_wait_secs)
|
73
|
-
end
|
55
|
+
expect { instance.start }.to raise_error '[ node/test ] - Proxmox API call post nodes/pve_node_name/lxc/1024/status/start [] is constantly failing. Giving up.'
|
74
56
|
end
|
75
57
|
end
|
76
58
|
|
@@ -23,6 +23,32 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
+
it 'retries calls to the API when getting back errors 5xx' do
|
27
|
+
with_test_proxmox_platform do |instance|
|
28
|
+
mock_proxmox_calls_with [
|
29
|
+
# 1 - The info on existing containers
|
30
|
+
mock_proxmox_to_get_nodes_info,
|
31
|
+
# 2 - The status of the container
|
32
|
+
mock_proxmox_to_status_node(nbr_api_errors: 3)
|
33
|
+
]
|
34
|
+
instance.create
|
35
|
+
expect(instance.state).to eq :created
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'fails to get an instance\'s status when the Proxmox API fails too many times' do
|
40
|
+
with_test_proxmox_platform do |instance|
|
41
|
+
mock_proxmox_calls_with [
|
42
|
+
# 1 - The info on existing containers
|
43
|
+
mock_proxmox_to_get_nodes_info,
|
44
|
+
# 2 - The status of the container
|
45
|
+
mock_proxmox_to_status_node(nbr_api_errors: 4, status: nil)
|
46
|
+
]
|
47
|
+
instance.create
|
48
|
+
expect { instance.state }.to raise_error '[ node/test ] - Proxmox API call get nodes/pve_node_name/lxc returns NOK: error code = 500 continuously (tried 4 times)'
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
26
52
|
end
|
27
53
|
|
28
54
|
end
|
@@ -23,6 +23,8 @@ module HybridPlatformsConductorTest
|
|
23
23
|
test_platform path: '#{repository}'
|
24
24
|
proxmox(
|
25
25
|
api_url: 'https://my-proxmox.my-domain.com:8006',
|
26
|
+
api_max_retries: 3,
|
27
|
+
api_wait_between_retries_secs: 0,
|
26
28
|
sync_node: 'node',
|
27
29
|
test_config: {
|
28
30
|
pve_nodes: ['pve_node_name'],
|
@@ -75,12 +77,20 @@ module HybridPlatformsConductorTest
|
|
75
77
|
# * *proxmox_password* (String or nil): Proxmox password used to connect to Proxmox API [default: nil]
|
76
78
|
# * *proxmox_realm* (String or nil): Proxmox realm used to connect to Proxmox API [default: 'pam']
|
77
79
|
# * *nodes_info* (Array<Hash>): Nodes info returned by the Proxmox API [default: []]
|
80
|
+
# * *nbr_api_errors* (Integer): Number of API errors 500 to mock before getting a successful query [defaults: 0]
|
78
81
|
# * *extra_expects* (Proc or nil): Code called for additional expectations on the proxmox instance, or nil if none [default: nil]
|
79
82
|
# * Parameters::
|
80
83
|
# * *proxmox* (Double): The mocked Proxmox instance
|
81
84
|
# Result::
|
82
85
|
# * Proc: Code called in place of Proxmox.new. Signature is the same as Proxmox.new.
|
83
|
-
def mock_proxmox_to_get_nodes_info(
|
86
|
+
def mock_proxmox_to_get_nodes_info(
|
87
|
+
proxmox_user: nil,
|
88
|
+
proxmox_password: nil,
|
89
|
+
proxmox_realm: 'pam',
|
90
|
+
nodes_info: [],
|
91
|
+
nbr_api_errors: 0,
|
92
|
+
extra_expects: nil
|
93
|
+
)
|
84
94
|
proc do |url, pve_node, user, password, realm, options|
|
85
95
|
expect(url).to eq 'https://my-proxmox.my-domain.com:8006/api2/json/'
|
86
96
|
expect(pve_node).to eq 'my-proxmox'
|
@@ -97,8 +107,10 @@ module HybridPlatformsConductorTest
|
|
97
107
|
# Nothing
|
98
108
|
end
|
99
109
|
# Mock checking existing nodes
|
100
|
-
|
101
|
-
|
110
|
+
idx_try = 0
|
111
|
+
expect(proxmox).to receive(:get).exactly(nbr_api_errors + 1).times.with('nodes') do
|
112
|
+
idx_try += 1
|
113
|
+
idx_try <= nbr_api_errors ? 'NOK: error code = 500' : nodes_info
|
102
114
|
end
|
103
115
|
extra_expects.call(proxmox) unless extra_expects.nil?
|
104
116
|
proxmox
|
@@ -144,6 +156,7 @@ module HybridPlatformsConductorTest
|
|
144
156
|
idx_try += 1
|
145
157
|
idx_try <= nbr_api_errors ? 'NOK: error code = 500' : task_name
|
146
158
|
end
|
159
|
+
expect(proxmox).to receive(:reauthenticate).exactly(nbr_api_errors - (task_status.nil? ? 1 : 0)).times
|
147
160
|
# Mock checking task status
|
148
161
|
unless task_status.nil?
|
149
162
|
# Mock checking task status
|
@@ -243,13 +256,15 @@ module HybridPlatformsConductorTest
|
|
243
256
|
# Parameters::
|
244
257
|
# * *proxmox_user* (String or nil): Proxmox user used to connect to Proxmox API [default: nil]
|
245
258
|
# * *proxmox_password* (String or nil): Proxmox password used to connect to Proxmox API [default: nil]
|
246
|
-
# * *status* (String): Mocked status [default: 'created']
|
259
|
+
# * *status* (String or nil): Mocked status, or nil if it should not be asked [default: 'created']
|
260
|
+
# * *nbr_api_errors* (Integer): Number of API errors 500 to mock before getting a successful query [defaults: 0]
|
247
261
|
# Result::
|
248
262
|
# * Proc: Code called in place of Proxmox.new. Signature is the same as Proxmox.new.
|
249
263
|
def mock_proxmox_to_status_node(
|
250
264
|
proxmox_user: nil,
|
251
265
|
proxmox_password: nil,
|
252
|
-
|
266
|
+
status: 'created',
|
267
|
+
nbr_api_errors: 0
|
253
268
|
)
|
254
269
|
proc do |url, pve_node, user, password, realm, options|
|
255
270
|
expect(url).to eq 'https://my-proxmox.my-domain.com:8006/api2/json/'
|
@@ -267,17 +282,26 @@ module HybridPlatformsConductorTest
|
|
267
282
|
# Nothing
|
268
283
|
end
|
269
284
|
# Mock getting status of a container
|
270
|
-
|
271
|
-
|
285
|
+
idx_try = 0
|
286
|
+
expect(proxmox).to receive(:get).exactly(nbr_api_errors + (status.nil? ? 0 : 1)).times.with('nodes/pve_node_name/lxc') do
|
287
|
+
idx_try += 1
|
288
|
+
if idx_try <= nbr_api_errors
|
289
|
+
'NOK: error code = 500'
|
290
|
+
else
|
291
|
+
[
|
292
|
+
{
|
293
|
+
'vmid' => '1024'
|
294
|
+
}
|
295
|
+
]
|
296
|
+
end
|
297
|
+
end
|
298
|
+
expect(proxmox).to receive(:reauthenticate).exactly(nbr_api_errors - (status.nil? ? 1 : 0)).times
|
299
|
+
unless status.nil?
|
300
|
+
expect(proxmox).to receive(:get).with('nodes/pve_node_name/lxc/1024/status/current') do
|
272
301
|
{
|
273
|
-
'
|
302
|
+
'status' => status
|
274
303
|
}
|
275
|
-
|
276
|
-
end
|
277
|
-
expect(proxmox).to receive(:get).with('nodes/pve_node_name/lxc/1024/status/current') do
|
278
|
-
{
|
279
|
-
'status' => 'created'
|
280
|
-
}
|
304
|
+
end
|
281
305
|
end
|
282
306
|
proxmox
|
283
307
|
end
|
@@ -548,13 +572,17 @@ module HybridPlatformsConductorTest
|
|
548
572
|
]
|
549
573
|
when /^nodes\/([^\/]+)\/lxc$/
|
550
574
|
pve_node_name = $1
|
551
|
-
pve_nodes[pve_node_name][:
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
575
|
+
if pve_nodes[pve_node_name][:error_strings].nil? || pve_nodes[pve_node_name][:error_strings].empty?
|
576
|
+
pve_nodes[pve_node_name][:lxc_containers].map do |vm_id, vm_info|
|
577
|
+
{
|
578
|
+
'vmid' => vm_id.to_s,
|
579
|
+
'maxdisk' => vm_info[:maxdisk],
|
580
|
+
'maxmem' => vm_info[:maxmem],
|
581
|
+
'cpus' => vm_info[:cpus]
|
582
|
+
}
|
583
|
+
end
|
584
|
+
else
|
585
|
+
pve_nodes[pve_node_name][:error_strings].shift
|
558
586
|
end
|
559
587
|
when /^nodes\/([^\/]+)\/lxc\/([^\/]+)\/config$/
|
560
588
|
pve_node_name = $1
|
@@ -615,6 +643,10 @@ module HybridPlatformsConductorTest
|
|
615
643
|
raise "Unknown Proxmox API post call: #{path}. Please adapt the test framework."
|
616
644
|
end
|
617
645
|
end
|
646
|
+
# Mock create_ticket
|
647
|
+
allow(proxmox).to receive(:create_ticket) do
|
648
|
+
@proxmox_actions << [:create_ticket]
|
649
|
+
end
|
618
650
|
proxmox
|
619
651
|
end
|
620
652
|
end,
|
@@ -625,11 +657,36 @@ module HybridPlatformsConductorTest
|
|
625
657
|
# Prepare a repository to test reserve_proxmox_container
|
626
658
|
#
|
627
659
|
# Parameters::
|
660
|
+
# * *leftovers* (Array<String>): List of leftover files among cgroups [default: []]
|
661
|
+
# * *expect_remaining_leftovers* (Array<String>): List of leftover files among cgroups that should remain after run [default: []]
|
628
662
|
# * Proc: Code to be called with repository setup
|
629
|
-
def with_sync_node
|
663
|
+
def with_sync_node(leftovers: [], expect_remaining_leftovers: [])
|
630
664
|
with_repository('sync_node') do |repository|
|
665
|
+
# Mock the cgroup file system of the sync node
|
666
|
+
remaining_leftovers = leftovers.clone
|
667
|
+
allow(Dir).to receive(:glob).and_wrap_original do |original_glob, dir, &block|
|
668
|
+
case dir
|
669
|
+
when '/sys/fs/cgroup/*/lxc/*'
|
670
|
+
block.nil? ? remaining_leftovers : remaining_leftovers.each(&block)
|
671
|
+
when /^\/sys\/fs\/cgroup\/\*\/lxc\/(.+)$/
|
672
|
+
vm_id_str = $1
|
673
|
+
file_pattern = /^\/sys\/fs\/cgroup\/.+\/lxc\/#{Regexp.escape(vm_id_str)}$/
|
674
|
+
matched_files = remaining_leftovers.select { |file| file =~ file_pattern }
|
675
|
+
block.nil? ? matched_files : matched_files.each(&block)
|
676
|
+
else
|
677
|
+
original_glob.call(dir, &block)
|
678
|
+
end
|
679
|
+
end
|
680
|
+
allow(FileUtils).to receive(:rm_rf).and_wrap_original do |original_rm_rf, path|
|
681
|
+
if path.start_with?('/sys/fs/cgroup')
|
682
|
+
remaining_leftovers.delete_if { |file| file.start_with?(path) }
|
683
|
+
else
|
684
|
+
original_rm_rf.call(path)
|
685
|
+
end
|
686
|
+
end
|
631
687
|
@repository = repository
|
632
688
|
yield
|
689
|
+
expect(remaining_leftovers.sort).to eq expect_remaining_leftovers.sort
|
633
690
|
end
|
634
691
|
end
|
635
692
|
|
@@ -642,14 +699,26 @@ module HybridPlatformsConductorTest
|
|
642
699
|
# * *wait_before_retry* (Integer): Specify the number of seconds to wait before retry [default: 0]
|
643
700
|
# * *create* (Hash or nil): Create file content, or nil if none [default: nil]
|
644
701
|
# * *destroy* (Hash or nil): Destroy file content, or nil if none [default: nil]
|
702
|
+
# * *api_max_retries* (Integer): Max number of API retries [default: 3]
|
703
|
+
# * *api_wait_between_retries_secs* (Integer): Number of seconds to wait between API retries [default: 0]
|
645
704
|
# Result::
|
646
705
|
# * Hash: JSON result of the call
|
647
|
-
def call_reserve_proxmox_container_with(
|
706
|
+
def call_reserve_proxmox_container_with(
|
707
|
+
config: {},
|
708
|
+
max_retries: 1,
|
709
|
+
wait_before_retry: 0,
|
710
|
+
create: nil,
|
711
|
+
destroy: nil,
|
712
|
+
api_max_retries: 3,
|
713
|
+
api_wait_between_retries_secs: 0
|
714
|
+
)
|
648
715
|
# Make sure we set default values in the config
|
649
716
|
config = {
|
650
717
|
proxmox_api_url: 'https://my-proxmox.my-domain.com:8006',
|
651
718
|
futex_file: "#{@repository}/proxmox/allocations.futex",
|
652
719
|
logs_dir: "#{Dir.tmpdir}/hpc_test_proxmox_waiter_logs",
|
720
|
+
api_max_retries: api_max_retries,
|
721
|
+
api_wait_between_retries_secs: api_wait_between_retries_secs,
|
653
722
|
pve_nodes: ['pve_node_name'],
|
654
723
|
vm_ips_list: %w[
|
655
724
|
192.168.0.100
|
@@ -716,7 +785,14 @@ module HybridPlatformsConductorTest
|
|
716
785
|
# * *wait_before_retry* (Integer): Specify the number of seconds to wait before retry [default: 0]
|
717
786
|
# Result::
|
718
787
|
# * Hash: JSON result of the call
|
719
|
-
def call_reserve_proxmox_container(
|
788
|
+
def call_reserve_proxmox_container(
|
789
|
+
cpus,
|
790
|
+
ram_mb,
|
791
|
+
disk_gb,
|
792
|
+
config: {},
|
793
|
+
max_retries: 1,
|
794
|
+
wait_before_retry: 0
|
795
|
+
)
|
720
796
|
call_reserve_proxmox_container_with(
|
721
797
|
config: config,
|
722
798
|
max_retries: max_retries,
|
@@ -763,7 +839,13 @@ module HybridPlatformsConductorTest
|
|
763
839
|
# Parameters::
|
764
840
|
# * *expected_proxmox_actions* (Array<Array>): Expected Proxmox actions
|
765
841
|
def expect_proxmox_actions_to_be(expected_proxmox_actions)
|
766
|
-
expect(@proxmox_actions.size).to eq
|
842
|
+
expect(@proxmox_actions.size).to eq(expected_proxmox_actions.size), <<~EOS
|
843
|
+
Expected #{expected_proxmox_actions.size} Proxmox actions, but got #{@proxmox_actions.size} instead:
|
844
|
+
----- Received:
|
845
|
+
#{@proxmox_actions.map(&:inspect).join("\n")}
|
846
|
+
----- Expected:
|
847
|
+
#{expected_proxmox_actions.map(&:inspect).join("\n")}
|
848
|
+
EOS
|
767
849
|
@proxmox_actions.zip(expected_proxmox_actions).each do |proxmox_action, expected_proxmox_action|
|
768
850
|
expect(proxmox_action.size).to eq expected_proxmox_action.size
|
769
851
|
expect(proxmox_action[0..1]).to eq expected_proxmox_action[0..1]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hybrid_platforms_conductor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 32.
|
4
|
+
version: 32.11.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Muriel Salvan
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-04-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: range_operators
|
@@ -281,20 +281,20 @@ description: Provides a complete toolset to help DevOps maintain, deploy, monito
|
|
281
281
|
email:
|
282
282
|
- muriel@x-aeon.com
|
283
283
|
executables:
|
284
|
-
- topograph
|
285
|
-
- test
|
286
|
-
- free_veids
|
287
|
-
- free_ips
|
288
|
-
- nodes_to_deploy
|
289
|
-
- last_deploys
|
290
|
-
- check-node
|
291
284
|
- run
|
292
|
-
- report
|
293
285
|
- get_impacted_nodes
|
294
|
-
-
|
286
|
+
- dump_nodes_json
|
287
|
+
- check-node
|
288
|
+
- nodes_to_deploy
|
289
|
+
- free_veids
|
290
|
+
- free_ips
|
291
|
+
- test
|
295
292
|
- deploy
|
293
|
+
- report
|
294
|
+
- topograph
|
296
295
|
- setup
|
297
|
-
-
|
296
|
+
- last_deploys
|
297
|
+
- ssh_config
|
298
298
|
extensions: []
|
299
299
|
extra_rdoc_files: []
|
300
300
|
files:
|