hybrid_platforms_conductor 32.9.1 → 32.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4f1beb4de64ac4dcdc1de3e5063980ea4dab1b8ceef204baae0a9f4bf209f1f3
4
- data.tar.gz: 0c5e65553da39646554d737c8a690939497a8c89f602a4eae01a4987fff994b7
3
+ metadata.gz: 7d350aa81076d2c5a6837b7184bf903640fb0be35aae2b22f596b870b56a53b8
4
+ data.tar.gz: d75bebd5e1f6b12b30ea6f62c01c835bc9f89fba9ed3a208b3fd2dd0bc021328
5
5
  SHA512:
6
- metadata.gz: 3129d3b37140c6488b2b6a8b6c1c8cb22ce745181a643925d9bb2fdc77de97a331660c8d5193dde940535d58b545576796d22a29008d181c73bbf7bb384c2251
7
- data.tar.gz: 187c6215c37e89b924693c1819fea12f8cc444858ac575f9445b535ae9866a396b01dbc5baaf85ca4e8163d60c8865c931eca319c4824148fdd06029920b9710
6
+ metadata.gz: 37c23ac804c359f5875cb9f77f3e639b1711dc967ae664554d50b6e8aeefe26f59385a786c8ff31f56f5a3fc9eb55aaf65cfdc2946fa4c7e8b4b4372fa1ac3e0
7
+ data.tar.gz: a558f9c2b88b4a69110a870ba27549315823f2fab6c91c61068f3be1b10c7f2482d18b33883b0762d9cf2ac787fe61616b81d08535b1bc5d352e29e914a0519c
@@ -18,15 +18,16 @@ module HybridPlatformsConductor
18
18
  attr_accessor *%i[logger logger_stderr]
19
19
 
20
20
  def check_response(response)
21
- log_debug "Response from Proxmox API: #{response}"
22
- log_warn "Response from Proxmox API: #{response}" if response.code >= 400 && !log_debug?
21
+ msg = "Response from Proxmox API: #{response} - #{response.net_http_res.message}"
22
+ log_debug msg
23
+ log_warn msg if response.code >= 400 && !log_debug?
23
24
  super
24
25
  end
25
26
 
26
27
  # Re-authenticate the Proxmox instance
27
28
  # This can be useful when the API returns errors due to invalidated tokens
28
29
  def reauthenticate
29
- log_info 'Force re-authentication to Proxmox'
30
+ log_debug 'Force re-authentication to Proxmox'
30
31
  @auth_params = create_ticket
31
32
  end
32
33
 
@@ -605,10 +605,35 @@ class ProxmoxWaiter
605
605
  # Result::
606
606
  # * Array<Integer>: List of available VM IDs
607
607
  def free_vm_ids
608
- Range.new(*@config['vm_ids_range']).to_a -
608
+ vm_ids = Range.new(*@config['vm_ids_range']).to_a -
609
609
  api_get('nodes').map do |pve_node_info|
610
610
  api_get("nodes/#{pve_node_info['node']}/lxc").map { |lxc_info| Integer(lxc_info['vmid']) }
611
611
  end.flatten
612
+ # Make sure the vm_ids that are available don't have any leftovers in the cgroups.
613
+ # This can happen with some Proxmox bugs, and make the API returns 500 errors.
614
+ # cf. https://forum.proxmox.com/threads/lxc-console-cleanup-error.38293/
615
+ # TODO: Remove this when Proxmox will have solved the issue with leftovers of destroyed vms.
616
+ (vm_ids.map(&:to_s) & vm_ids_in_cgroups).each do |vm_id_str|
617
+ # We are having a vm_id that is available but still has some leftovers in cgroups.
618
+ # Clean those to avoid 500 errors in API.
619
+ log "Found VMID #{vm_id_str} with leftovers in cgroups. Cleaning those."
620
+ Dir.glob("/sys/fs/cgroup/*/lxc/#{vm_id_str}") do |cgroup_dir|
621
+ log "Removing #{cgroup_dir}"
622
+ FileUtils.rm_rf cgroup_dir
623
+ end
624
+ end
625
+ vm_ids
626
+ end
627
+
628
+ # Return the list of VM IDs present in cgroups
629
+ #
630
+ # Result::
631
+ # * Array<String>: List of VM IDs as strings (as some are not Integers like '1010-1')
632
+ def vm_ids_in_cgroups
633
+ Dir.glob('/sys/fs/cgroup/*/lxc/*').map do |file|
634
+ basename = File.basename(file)
635
+ basename =~ /^\d.+$/ ? basename : nil
636
+ end.compact.sort.uniq
612
637
  end
613
638
 
614
639
  # Wait for a given Proxmox task completion
@@ -1,5 +1,5 @@
1
1
  module HybridPlatformsConductor
2
2
 
3
- VERSION = '32.9.1'
3
+ VERSION = '32.10.0'
4
4
 
5
5
  end
@@ -60,6 +60,98 @@ describe HybridPlatformsConductor::HpcPlugins::Provisioner::Proxmox do
60
60
  end
61
61
  end
62
62
 
63
+ it 'makes sure to remove cgroup files that are leftovers of removed containers' do
64
+ with_sync_node(leftovers: [
65
+ '/sys/fs/cgroup/memory/lxc/1003'
66
+ ]) do
67
+ mock_proxmox(mocked_pve_nodes: {
68
+ 'pve_node_name' => {
69
+ lxc_containers: {
70
+ 1000 => { ip: '192.168.1.100' },
71
+ 1001 => { ip: '192.168.1.101' }
72
+ }
73
+ }
74
+ })
75
+ expect(call_reserve_proxmox_container(2, 1024, 1, config: { vm_ids_range: [1000, 1100] })).to eq(
76
+ pve_node: 'pve_node_name',
77
+ vm_id: 1002,
78
+ vm_ip: '192.168.0.100'
79
+ )
80
+ end
81
+ end
82
+
83
+ it 'makes sure to remove cgroup files that are leftovers of removed containers even when they are reusing the VM ID' do
84
+ with_sync_node(leftovers: [
85
+ '/sys/fs/cgroup/memory/lxc/1002'
86
+ ]) do
87
+ mock_proxmox(mocked_pve_nodes: {
88
+ 'pve_node_name' => {
89
+ lxc_containers: {
90
+ 1000 => { ip: '192.168.1.100' },
91
+ 1001 => { ip: '192.168.1.101' }
92
+ }
93
+ }
94
+ })
95
+ expect(call_reserve_proxmox_container(2, 1024, 1, config: { vm_ids_range: [1000, 1100] })).to eq(
96
+ pve_node: 'pve_node_name',
97
+ vm_id: 1002,
98
+ vm_ip: '192.168.0.100'
99
+ )
100
+ end
101
+ end
102
+
103
+ it 'makes sure to remove cgroup files that are leftovers of removed containers when several cgroups contain files' do
104
+ with_sync_node(leftovers: [
105
+ '/sys/fs/cgroup/memory/lxc/1003',
106
+ '/sys/fs/cgroup/network/lxc/1003',
107
+ '/sys/fs/cgroup/cpu/lxc/1003'
108
+ ]) do
109
+ mock_proxmox(mocked_pve_nodes: {
110
+ 'pve_node_name' => {
111
+ lxc_containers: {
112
+ 1000 => { ip: '192.168.1.100' },
113
+ 1001 => { ip: '192.168.1.101' }
114
+ }
115
+ }
116
+ })
117
+ expect(call_reserve_proxmox_container(2, 1024, 1, config: { vm_ids_range: [1000, 1100] })).to eq(
118
+ pve_node: 'pve_node_name',
119
+ vm_id: 1002,
120
+ vm_ip: '192.168.0.100'
121
+ )
122
+ end
123
+ end
124
+
125
+ it 'makes sure to remove only cgroup files that are leftovers of removed containers inside our VM ID range' do
126
+ with_sync_node(
127
+ leftovers: [
128
+ '/sys/fs/cgroup/memory/lxc/100',
129
+ '/sys/fs/cgroup/memory/lxc/1001',
130
+ '/sys/fs/cgroup/memory/lxc/1002',
131
+ '/sys/fs/cgroup/memory/lxc/1003'
132
+ ],
133
+ expect_remaining_leftovers: [
134
+ '/sys/fs/cgroup/memory/lxc/100',
135
+ '/sys/fs/cgroup/memory/lxc/1001'
136
+ ]
137
+ ) do
138
+ mock_proxmox(mocked_pve_nodes: {
139
+ 'pve_node_name' => {
140
+ lxc_containers: {
141
+ 100 => { ip: '192.168.1.10' },
142
+ 1000 => { ip: '192.168.1.100' },
143
+ 1001 => { ip: '192.168.1.101' }
144
+ }
145
+ }
146
+ })
147
+ expect(call_reserve_proxmox_container(2, 1024, 1, config: { vm_ids_range: [1000, 1100] })).to eq(
148
+ pve_node: 'pve_node_name',
149
+ vm_id: 1002,
150
+ vm_ip: '192.168.0.100'
151
+ )
152
+ end
153
+ end
154
+
63
155
  end
64
156
 
65
157
  end
@@ -657,11 +657,36 @@ module HybridPlatformsConductorTest
657
657
  # Prepare a repository to test reserve_proxmox_container
658
658
  #
659
659
  # Parameters::
660
+ # * *leftovers* (Array<String>): List of leftover files among cgroups [default: []]
661
+ # * *expect_remaining_leftovers* (Array<String>): List of leftover files among cgroups that should remain after run [default: []]
660
662
  # * Proc: Code to be called with repository setup
661
- def with_sync_node
663
+ def with_sync_node(leftovers: [], expect_remaining_leftovers: [])
662
664
  with_repository('sync_node') do |repository|
665
+ # Mock the cgroup file system of the sync node
666
+ remaining_leftovers = leftovers.clone
667
+ allow(Dir).to receive(:glob).and_wrap_original do |original_glob, dir, &block|
668
+ case dir
669
+ when '/sys/fs/cgroup/*/lxc/*'
670
+ block.nil? ? remaining_leftovers : remaining_leftovers.each(&block)
671
+ when /^\/sys\/fs\/cgroup\/\*\/lxc\/(.+)$/
672
+ vm_id_str = $1
673
+ file_pattern = /^\/sys\/fs\/cgroup\/.+\/lxc\/#{Regexp.escape(vm_id_str)}$/
674
+ matched_files = remaining_leftovers.select { |file| file =~ file_pattern }
675
+ block.nil? ? matched_files : matched_files.each(&block)
676
+ else
677
+ original_glob.call(dir, &block)
678
+ end
679
+ end
680
+ allow(FileUtils).to receive(:rm_rf).and_wrap_original do |original_rm_rf, path|
681
+ if path.start_with?('/sys/fs/cgroup')
682
+ remaining_leftovers.delete_if { |file| file.start_with?(path) }
683
+ else
684
+ original_rm_rf.call(path)
685
+ end
686
+ end
663
687
  @repository = repository
664
688
  yield
689
+ expect(remaining_leftovers.sort).to eq expect_remaining_leftovers.sort
665
690
  end
666
691
  end
667
692
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hybrid_platforms_conductor
3
3
  version: !ruby/object:Gem::Version
4
- version: 32.9.1
4
+ version: 32.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Muriel Salvan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-23 00:00:00.000000000 Z
11
+ date: 2021-03-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: range_operators
@@ -281,20 +281,20 @@ description: Provides a complete toolset to help DevOps maintain, deploy, monito
281
281
  email:
282
282
  - muriel@x-aeon.com
283
283
  executables:
284
- - topograph
285
- - test
284
+ - setup
286
285
  - free_veids
287
- - free_ips
288
- - nodes_to_deploy
286
+ - dump_nodes_json
287
+ - topograph
289
288
  - last_deploys
290
- - check-node
291
- - run
292
289
  - report
293
290
  - get_impacted_nodes
294
291
  - ssh_config
292
+ - test
293
+ - nodes_to_deploy
294
+ - free_ips
295
+ - check-node
295
296
  - deploy
296
- - setup
297
- - dump_nodes_json
297
+ - run
298
298
  extensions: []
299
299
  extra_rdoc_files: []
300
300
  files: