oci-logging-analytics-kubernetes-discovery 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/oci-loganalytics-kubernetes-discovery +74 -27
- data/lib/config/oci_client_retry_config.rb +11 -8
- data/lib/discover/infrastructure.rb +79 -27
- data/lib/discover/object.rb +19 -4
- data/lib/dto/infra/load_balancer_payload.rb +32 -0
- data/lib/dto/infra/node_pool_payload.rb +28 -0
- data/lib/dto/infra/{node_pool_entity_payload.rb → resource_payload.rb} +6 -6
- data/lib/dto/infra/subnet_payload.rb +34 -0
- data/lib/dto/infra_objects_payload.rb +3 -3
- data/lib/dto/kubernetes_objects_payload.rb +19 -15
- data/lib/dto/state.rb +7 -3
- data/lib/enum/infrastructure_resource_discovery.rb +1 -0
- data/lib/enum/object_client_mapping_enum.rb +1 -1
- data/lib/enum/stack_job_lifecycle_state_enum.rb +14 -0
- data/lib/enum/stack_job_operation_enum.rb +10 -0
- data/lib/infra_resources.rb +142 -41
- data/lib/objects_resources.rb +16 -6
- data/lib/oci_loganalytics_resources_discovery.rb +105 -57
- data/lib/util/kube_client.rb +1 -0
- data/lib/util/kubectl_ops.rb +1 -1
- data/lib/util/log_analytics.rb +2 -2
- data/lib/util/oci_clients.rb +137 -78
- data/lib/util/service_logs.rb +559 -0
- data/lib/util/state_manager.rb +12 -2
- data/lib/util/string_utils.rb +48 -0
- data/lib/version.rb +1 -1
- metadata +9 -7
- data/lib/dto/infra/cluster_entity_payload.rb +0 -22
- data/lib/dto/infra/load_balancers_entity_payload.rb +0 -22
- data/lib/dto/infra/subnet_entity_payload.rb +0 -22
- data/lib/dto/infra/vcn_entity_payload.rb +0 -22
@@ -0,0 +1,559 @@
|
|
1
|
+
## Copyright (c) 2024 Oracle and/or its affiliates.
|
2
|
+
## The Universal Permissive License (UPL), Version 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
3
|
+
|
4
|
+
require_relative '../enum/stack_job_operation_enum'
|
5
|
+
require_relative '../enum/stack_job_lifecycle_state_enum'
|
6
|
+
require_relative '../enum/infrastructure_resource_discovery'
|
7
|
+
|
8
|
+
require_relative 'string_utils'
|
9
|
+
require_relative '../discover/infrastructure'
|
10
|
+
|
11
|
+
module Util
|
12
|
+
module ServiceLogs
|
13
|
+
extend Util::Logging
|
14
|
+
extend Discover::Infrastructure
|
15
|
+
extend Util::StringUtils
|
16
|
+
|
17
|
+
module_function
|
18
|
+
|
19
|
+
DELETION_RETRY_ATTEMPTS = 5
|
20
|
+
|
21
|
+
# Prefix of resource manager stack (to be created via API)
|
22
|
+
RESOURCE_MANAGER_STACK_PREFIX = 'oci-kubernetes-monitoring-service-logs'.freeze
|
23
|
+
|
24
|
+
def invoke_resource_manager_stack(infra_objects_payload, app_config_hash,
|
25
|
+
oci_la_log_group_id, onm_compartment_id, oci_region, oci_domain)
|
26
|
+
|
27
|
+
rms_template_base64_encoded = app_config_hash[:rms_template_base64_encoded]
|
28
|
+
stack_operation_timeout = app_config_hash[:stack_operation_timeout].to_i
|
29
|
+
cluster_details = infra_objects_payload.to_hash[:cluster]
|
30
|
+
subnet_details = infra_objects_payload.to_hash[:subnets]
|
31
|
+
load_balancer_details = infra_objects_payload.to_hash[:loadbalancers]
|
32
|
+
|
33
|
+
existing_stack = nil
|
34
|
+
check_for_existing_service_discovery_stack = false
|
35
|
+
|
36
|
+
# Fetch stack ID from configmap
|
37
|
+
cfgmap_stack_id_entry = Util::StateManager.state.svc_log_stack_id
|
38
|
+
|
39
|
+
if cfgmap_stack_id_entry.nil? || cfgmap_stack_id_entry.empty?
|
40
|
+
# If there is no entry in config map, use fallback method to check if stack exists
|
41
|
+
check_for_existing_service_discovery_stack = true
|
42
|
+
else
|
43
|
+
# If there is entry in config map, double check if the stack exists
|
44
|
+
begin
|
45
|
+
existing_stack = fetch_stack_details(cfgmap_stack_id_entry)
|
46
|
+
rescue StandardError => e
|
47
|
+
logger.error("Unable to fetch stack details to confirm if it exists. Stack OCID: [#{cfgmap_stack_id_entry}], Error: [#{e}]")
|
48
|
+
check_for_existing_service_discovery_stack = true
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
if check_for_existing_service_discovery_stack
|
53
|
+
# Prepare stack name using cluster OCID
|
54
|
+
logger.debug('Preparing stack name using cluster OCID')
|
55
|
+
stack_unique_identifier = prepare_stack_unique_identifier(cluster_details[:ocid])
|
56
|
+
|
57
|
+
# Check if the stack already exists
|
58
|
+
logger.info("Checking if stack name [#{stack_unique_identifier}] already exists")
|
59
|
+
available_stack_list = fetch_stack_list(onm_compartment_id)
|
60
|
+
available_stack_list.each do |item|
|
61
|
+
if item.display_name == stack_unique_identifier
|
62
|
+
existing_stack = item
|
63
|
+
break
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Prepare map object (in the form expected by stack)
|
69
|
+
subnet_map = prepare_map_using_resource_list(subnet_details) unless subnet_details.nil?
|
70
|
+
load_balancer_map = prepare_map_using_resource_list(load_balancer_details) unless load_balancer_details.nil?
|
71
|
+
cluster_map = prepare_map_using_resource_list(Array.new(1) { cluster_details }) unless cluster_details.nil?
|
72
|
+
|
73
|
+
# Set variables
|
74
|
+
rms_input_variables = {}
|
75
|
+
oci_tags_json = app_config_hash[:oci_tags_json]
|
76
|
+
oci_tags_str = app_config_hash[:oci_tags_str]
|
77
|
+
|
78
|
+
rms_input_variables['load_balancers'] = load_balancer_map.nil? ? '{}' : load_balancer_map.to_json
|
79
|
+
rms_input_variables['subnets'] = subnet_map.nil? ? '{}' : subnet_map.to_json
|
80
|
+
rms_input_variables['cluster'] = cluster_map.nil? ? '{}' : cluster_map.to_json
|
81
|
+
rms_input_variables['oci_region'] = oci_region
|
82
|
+
rms_input_variables['log_analytics_log_group'] = oci_la_log_group_id
|
83
|
+
rms_input_variables['onm_compartment_id'] = onm_compartment_id
|
84
|
+
rms_input_variables['oci_domain'] = oci_domain unless oci_domain.nil?
|
85
|
+
rms_input_variables['tags'] = oci_tags_str
|
86
|
+
|
87
|
+
if existing_stack.nil?
|
88
|
+
logger.info('Stack does not exists. Creating...')
|
89
|
+
stack_details = create_stack_using_zip_upload(onm_compartment_id,
|
90
|
+
stack_unique_identifier,
|
91
|
+
rms_template_base64_encoded.chomp,
|
92
|
+
rms_input_variables, oci_tags_json)
|
93
|
+
logger.info("Successfully created the stack. Stack ID: [#{stack_details.id}]")
|
94
|
+
|
95
|
+
# Create an entry of stack ID in the configmap
|
96
|
+
Util::StateManager.state.svc_log_stack_id = stack_details.id
|
97
|
+
Util::StateManager.update_state_configmap
|
98
|
+
|
99
|
+
# Run apply job
|
100
|
+
logger.info('Initiating apply job.')
|
101
|
+
job_detail = create_apply_job(stack_details.id, oci_tags_json)
|
102
|
+
|
103
|
+
# Wait for job status check till timeout
|
104
|
+
job_status_check(job_detail.id, stack_operation_timeout)
|
105
|
+
else
|
106
|
+
logger.info("Existing stack found. Stack ID: [#{existing_stack.id}]")
|
107
|
+
|
108
|
+
if check_for_existing_service_discovery_stack
|
109
|
+
# Create an entry of stack ID in the configmap
|
110
|
+
Util::StateManager.state.svc_log_stack_id = existing_stack.id
|
111
|
+
Util::StateManager.update_state_configmap
|
112
|
+
end
|
113
|
+
|
114
|
+
# Fetch stack details to compare the existing list of resources with the new ones
|
115
|
+
logger.info("Fetching variables. Stack ID: [#{existing_stack.id}]")
|
116
|
+
|
117
|
+
stack_details = fetch_stack_details(existing_stack.id)
|
118
|
+
existing_load_balancer_details = JSON.parse(stack_details.variables['load_balancers'])
|
119
|
+
existing_subnets_details = JSON.parse(stack_details.variables['subnets'])
|
120
|
+
|
121
|
+
# Check if an update to resource list is required
|
122
|
+
load_balancer_diffs = calculate_deltas(existing_load_balancer_details, load_balancer_map)
|
123
|
+
subnet_diffs = calculate_deltas(existing_subnets_details, subnet_map)
|
124
|
+
|
125
|
+
load_balancer_diffs[:deletions] = verify_deleted_resources(load_balancer_diffs[:deletions], Enum::InfrastructureResourceDiscovery::LOAD_BALANCER)
|
126
|
+
subnet_diffs[:deletions] = verify_deleted_resources(subnet_diffs[:deletions], Enum::InfrastructureResourceDiscovery::SUBNET)
|
127
|
+
|
128
|
+
# Cleanup retry eligible resource map in config map. The resources which were assumed to be deleted in previous
|
129
|
+
# runs might not have been actually deleted but their retry references might still be there in config map
|
130
|
+
retry_eligible_resource_map = parse_key_value_pairs(Util::StateManager.state.retry_eligible_resources)
|
131
|
+
|
132
|
+
load_balancer_map.each do |key, value|
|
133
|
+
retry_eligible_resource_map.delete(key)
|
134
|
+
end
|
135
|
+
|
136
|
+
subnet_map.each do |key, value|
|
137
|
+
retry_eligible_resource_map.delete(key)
|
138
|
+
end
|
139
|
+
|
140
|
+
retry_eligible_resource_map_str = to_key_value_string(retry_eligible_resource_map)
|
141
|
+
Util::StateManager.state.retry_eligible_resources = retry_eligible_resource_map_str
|
142
|
+
Util::StateManager.update_state_configmap
|
143
|
+
|
144
|
+
skip_apply_job = false
|
145
|
+
|
146
|
+
if subnet_diffs[:deletions].length.positive? || load_balancer_diffs[:deletions].length.positive?
|
147
|
+
# Scenario 1: Resources were deleted. Even if new resources are discovered,
|
148
|
+
# retries for deleted resources should be exhausted before updating the
|
149
|
+
# resource list and proceeding with stack operations.
|
150
|
+
logger.info('Resource deletion detected. Performing a double-check to confirm the deletion status.')
|
151
|
+
|
152
|
+
lb_deletion_status_check = deletion_retries_exhausted?(load_balancer_diffs[:deletions])
|
153
|
+
subnet_deletion_status_check = deletion_retries_exhausted?(subnet_diffs[:deletions])
|
154
|
+
|
155
|
+
if lb_deletion_status_check && subnet_deletion_status_check
|
156
|
+
logger.info('Resource(s) confirmed as deleted. Proceeding to delete associated logs.')
|
157
|
+
rms_input_variables['load_balancers'] = update_resource_list(existing_load_balancer_details, load_balancer_diffs).to_json
|
158
|
+
rms_input_variables['subnets'] = update_resource_list(existing_subnets_details, subnet_diffs).to_json
|
159
|
+
|
160
|
+
# Clear the retry eligible resource list in config map
|
161
|
+
retry_eligible_resource_map = {}
|
162
|
+
retry_eligible_resource_map_str = to_key_value_string(retry_eligible_resource_map)
|
163
|
+
logger.info('Stack updated with deleted resources')
|
164
|
+
logger.debug("Updating 'retry_eligible_resources' in Config Map [#{retry_eligible_resource_map_str}]")
|
165
|
+
|
166
|
+
Util::StateManager.state.retry_eligible_resources = retry_eligible_resource_map_str
|
167
|
+
Util::StateManager.update_state_configmap
|
168
|
+
skip_apply_job = false
|
169
|
+
else
|
170
|
+
logger.info('Resource log deletion skipped due to required wait time; skipping stack apply.')
|
171
|
+
skip_apply_job = true
|
172
|
+
end
|
173
|
+
elsif subnet_diffs[:additions].length.positive? || load_balancer_diffs[:additions].length.positive?
|
174
|
+
# Scenario 2: Only additions occurred. Update resource list and proceed with stack APPLY.
|
175
|
+
logger.info('New resources have been discovered; Updating resource list.')
|
176
|
+
rms_input_variables['load_balancers'] = update_resource_list(existing_load_balancer_details, load_balancer_diffs).to_json
|
177
|
+
rms_input_variables['subnets'] = update_resource_list(existing_subnets_details, subnet_diffs).to_json
|
178
|
+
skip_apply_job = false
|
179
|
+
else
|
180
|
+
# Scenario 3: No new resources discovered.
|
181
|
+
logger.info('No new resources have been discovered.')
|
182
|
+
# Fetch stack job list and validate the job status of the latest job run
|
183
|
+
logger.info('Checking the status of last 3 stack operations.')
|
184
|
+
job_list = list_jobs(existing_stack.id)
|
185
|
+
|
186
|
+
failed_apply_job_limit = 3
|
187
|
+
failed_job_count = 0
|
188
|
+
|
189
|
+
# Will iterate through the job list; we have last 3 stack operation records
|
190
|
+
job_list.each_with_index do |job, index|
|
191
|
+
if index == 0
|
192
|
+
# If the most recent job is success
|
193
|
+
if job.operation == Enum::StackJobOperationEnum::APPLY and job.lifecycle_state == Enum::StackJobLifecycleStateEnum::SUCCEEDED
|
194
|
+
logger.info("Latest Job operation: #{job.operation} is successful. Stack apply operation skipped.")
|
195
|
+
skip_apply_job = true
|
196
|
+
break
|
197
|
+
end
|
198
|
+
|
199
|
+
# If the most recent job is still running
|
200
|
+
if [Enum::StackJobLifecycleStateEnum::IN_PROGRESS, Enum::StackJobLifecycleStateEnum::ACCEPTED].include?(job.lifecycle_state)
|
201
|
+
logger.info('The latest job is in progress or accepted state. Stack apply operation skipped.')
|
202
|
+
skip_apply_job = true
|
203
|
+
break
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
logger.info("Job #{index}: #{job.operation} - #{job.lifecycle_state} - #{job.time_created}")
|
208
|
+
|
209
|
+
if job.lifecycle_state == Enum::StackJobLifecycleStateEnum::FAILED && job.operation == Enum::StackJobOperationEnum::APPLY
|
210
|
+
failed_job_count += 1
|
211
|
+
logger.info("Failed Apply Job Count: #{failed_job_count}")
|
212
|
+
end
|
213
|
+
|
214
|
+
if failed_job_count >= failed_apply_job_limit
|
215
|
+
logger.error('Multiple sequential FAILED Stack APPLY operations detected. Stack apply operation skipped.')
|
216
|
+
skip_apply_job = true
|
217
|
+
break
|
218
|
+
end
|
219
|
+
|
220
|
+
if index == 2
|
221
|
+
break # We only track latest 3 jobs
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
unless skip_apply_job
|
227
|
+
logger.info('Updating stack terraform and input variables.')
|
228
|
+
update_stack(existing_stack.id, rms_input_variables, rms_template_base64_encoded)
|
229
|
+
|
230
|
+
logger.info('Proceeding with stack apply operation.')
|
231
|
+
logger.info("Stack variables updated. Stack ID: [#{existing_stack.id}]")
|
232
|
+
# Run apply job
|
233
|
+
logger.info('Initiating apply job.')
|
234
|
+
job_detail = create_apply_job(existing_stack.id, oci_tags_json)
|
235
|
+
# Wait for job status check till timeout
|
236
|
+
job_status_check(job_detail.id, stack_operation_timeout)
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
def prepare_map_using_resource_list(resource_list)
|
242
|
+
resource_map = {}
|
243
|
+
resource_list.each do |item|
|
244
|
+
# OCID is set a key
|
245
|
+
resource_map[item[:ocid]] = { name: item[:name], ocid: item[:ocid], compartment_id: item[:compartmentId] }
|
246
|
+
end
|
247
|
+
resource_map
|
248
|
+
end
|
249
|
+
|
250
|
+
def update_resource_list(existing_resource_list, resource_list_diff)
|
251
|
+
# Loop through additions and add to existing list
|
252
|
+
resource_list_diff[:additions].each do |key, value|
|
253
|
+
existing_resource_list[key] = value
|
254
|
+
end
|
255
|
+
|
256
|
+
# Loop through deletions and remove from existing list
|
257
|
+
resource_list_diff[:deletions].each do |key, value|
|
258
|
+
existing_resource_list.delete(key)
|
259
|
+
end
|
260
|
+
|
261
|
+
return existing_resource_list
|
262
|
+
end
|
263
|
+
|
264
|
+
def calculate_deltas(previous_resource_list, current_resource_list)
|
265
|
+
resource_additions = {}
|
266
|
+
resource_deletions = {}
|
267
|
+
|
268
|
+
# Loop through current list and check if it has new resource as compared to previous list
|
269
|
+
current_resource_list.each do |key, value|
|
270
|
+
if previous_resource_list[key].nil?
|
271
|
+
resource_additions[key] = value
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
# Loop through previous list and check if it does not have resource in the current list
|
276
|
+
previous_resource_list.each do |key, value|
|
277
|
+
if current_resource_list[key].nil?
|
278
|
+
resource_deletions[key] = value
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
{ additions: resource_additions, deletions: resource_deletions }
|
283
|
+
end
|
284
|
+
|
285
|
+
def verify_deleted_resources(deleted_resource_list, resource_type)
|
286
|
+
false_positive_resources = []
|
287
|
+
|
288
|
+
# Check resource status
|
289
|
+
deleted_resource_list.each do |key, value|
|
290
|
+
resource_details = nil
|
291
|
+
|
292
|
+
case resource_type
|
293
|
+
when Enum::InfrastructureResourceDiscovery::SUBNET
|
294
|
+
resource_details = Discover::Infrastructure.fetch_subnet_details(key)
|
295
|
+
when Enum::InfrastructureResourceDiscovery::LOAD_BALANCER
|
296
|
+
resource_details = Discover::Infrastructure.fetch_load_balancer_details(key)
|
297
|
+
end
|
298
|
+
|
299
|
+
# Resources with 200 are not actually deleted.
|
300
|
+
if resource_details[:status] == 200
|
301
|
+
logger.trace("#{resource_details[:status]} encountered for [#{key}]. Resource actually not deleted.")
|
302
|
+
false_positive_resources.push(key)
|
303
|
+
|
304
|
+
# Remove the resource from config map if it exists
|
305
|
+
retry_eligible_resource_map = parse_key_value_pairs(Util::StateManager.state.retry_eligible_resources)
|
306
|
+
retry_eligible_resource_map.delete(key)
|
307
|
+
retry_eligible_resource_map_str = to_key_value_string(retry_eligible_resource_map)
|
308
|
+
Util::StateManager.state.retry_eligible_resources = retry_eligible_resource_map_str
|
309
|
+
Util::StateManager.update_state_configmap
|
310
|
+
|
311
|
+
next
|
312
|
+
end
|
313
|
+
|
314
|
+
# Wait only for 404 and give it retries. Handle 401 same as 401
|
315
|
+
if resource_details[:status] == 404 || resource_details[:status] == 401
|
316
|
+
logger.trace("#{resource_details[:status]} encountered for [#{key}]. Verified deletion.")
|
317
|
+
next
|
318
|
+
end
|
319
|
+
|
320
|
+
# For 429 and 5xx we don't know the correct state of resource.
|
321
|
+
# Don't count these statuses in retries and skip the STACK apply.
|
322
|
+
if resource_details[:status] == 429 || resource_details[:status] >= 500
|
323
|
+
logger.error("#{resource_details[:status]} encountered for [#{key}]. Skipping Stack APPLY; Unable to verify correct resource state.")
|
324
|
+
raise StandardError, 'Skipping Stack APPLY; Unable to verify correct resource state.'
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
false_positive_resources.each do |false_positive_resource|
|
329
|
+
deleted_resource_list.delete(false_positive_resource)
|
330
|
+
end
|
331
|
+
|
332
|
+
deleted_resource_list
|
333
|
+
end
|
334
|
+
|
335
|
+
def deletion_retries_exhausted?(resource_list)
|
336
|
+
# Check the number of retries remaining for retry eligible resources. Fetch data from config map
|
337
|
+
return true if resource_list.empty?
|
338
|
+
|
339
|
+
can_delete = true
|
340
|
+
retry_eligible_resource_map = parse_key_value_pairs(Util::StateManager.state.retry_eligible_resources)
|
341
|
+
|
342
|
+
resource_list.each do |key, value|
|
343
|
+
if retry_eligible_resource_map.has_key?(key)
|
344
|
+
retry_remaining = retry_eligible_resource_map[key]
|
345
|
+
retry_remaining -= 1
|
346
|
+
if retry_remaining.positive?
|
347
|
+
logger.info("#{retry_remaining} deletion retry attempts remaining for [#{key}]")
|
348
|
+
can_delete = false
|
349
|
+
else
|
350
|
+
logger.info("[#{key}] deletion retry attempts exhausted. Marked for deletion.")
|
351
|
+
end
|
352
|
+
retry_eligible_resource_map[key] = retry_remaining
|
353
|
+
else
|
354
|
+
logger.info("First occurrence of [#{key}] as retry eligible resource, #{DELETION_RETRY_ATTEMPTS} attempts remaining")
|
355
|
+
retry_eligible_resource_map[key] = DELETION_RETRY_ATTEMPTS
|
356
|
+
can_delete = false
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
can_delete = retry_eligible_resource_map.values.all? { |value| value <= 0 }
|
361
|
+
|
362
|
+
# Write back to config map
|
363
|
+
retry_eligible_resource_map_str = to_key_value_string(retry_eligible_resource_map)
|
364
|
+
logger.debug("Updating 'retry_eligible_resources' in Config Map [#{retry_eligible_resource_map_str}]")
|
365
|
+
|
366
|
+
Util::StateManager.state.retry_eligible_resources = retry_eligible_resource_map_str
|
367
|
+
Util::StateManager.update_state_configmap
|
368
|
+
|
369
|
+
can_delete
|
370
|
+
end
|
371
|
+
|
372
|
+
def prepare_stack_unique_identifier(cluster_ocid)
|
373
|
+
substring_length = -11
|
374
|
+
# Pick the last characters of cluster OCID indicated by 'substring_length'
|
375
|
+
truncated_cluster_ocid = cluster_ocid[substring_length..-1]
|
376
|
+
format('%<stack_prefix>s-%<cluster_ocid>s', stack_prefix: RESOURCE_MANAGER_STACK_PREFIX,
|
377
|
+
cluster_ocid: truncated_cluster_ocid)
|
378
|
+
end
|
379
|
+
|
380
|
+
def fetch_stack_list(compartment_id)
|
381
|
+
opts = { compartment_id: compartment_id }
|
382
|
+
begin
|
383
|
+
client = Util::OCIClients.get_clients[:rms_client]
|
384
|
+
response = client.list_stacks(opts)
|
385
|
+
response.data
|
386
|
+
rescue StandardError => e
|
387
|
+
logger.error("Error while fetching list of stacks: #{e}")
|
388
|
+
raise StandardError, 'Error while fetching list of stacks.'
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
def create_stack_using_zip_upload(compartment_id, stack_unique_identifier, rms_template_base64_encoded,
|
393
|
+
rms_input_variables, oci_tags)
|
394
|
+
begin
|
395
|
+
client = Util::OCIClients.get_clients[:rms_client]
|
396
|
+
response =
|
397
|
+
client.create_stack(
|
398
|
+
OCI::ResourceManager::Models::CreateStackDetails.new(
|
399
|
+
compartment_id: compartment_id,
|
400
|
+
terraform_version: '1.2.x',
|
401
|
+
display_name: stack_unique_identifier,
|
402
|
+
description: stack_unique_identifier,
|
403
|
+
config_source:
|
404
|
+
OCI::ResourceManager::Models::CreateZipUploadConfigSourceDetails
|
405
|
+
.new(
|
406
|
+
zip_file_base64_encoded: rms_template_base64_encoded
|
407
|
+
),
|
408
|
+
variables: rms_input_variables,
|
409
|
+
freeform_tags: oci_tags['freeformTags'],
|
410
|
+
defined_tags: oci_tags['definedTags']
|
411
|
+
)
|
412
|
+
)
|
413
|
+
@stack_creation_response = response.data
|
414
|
+
rescue StandardError => e
|
415
|
+
logger.error("Error while creating stack: #{e}")
|
416
|
+
raise StandardError, 'Error while creating stack.'
|
417
|
+
end
|
418
|
+
@stack_creation_response
|
419
|
+
end
|
420
|
+
|
421
|
+
def list_jobs(stack_id)
|
422
|
+
# Note - :limit option return inconsistent number of results, regardless of the limit size
|
423
|
+
# without :limit, API returns 63 records
|
424
|
+
opts = { stack_id: stack_id, sort_by: 'TIMECREATED', sort_order: 'DESC' }
|
425
|
+
begin
|
426
|
+
client = Util::OCIClients.get_clients[:rms_client]
|
427
|
+
response = client.list_jobs(opts)
|
428
|
+
@list_jobs_response = response.data
|
429
|
+
rescue StandardError => e
|
430
|
+
logger.error("Error while fetching list of jobs: #{e}")
|
431
|
+
raise StandardError, 'Error while fetching list of jobs.'
|
432
|
+
end
|
433
|
+
@list_jobs_response
|
434
|
+
end
|
435
|
+
|
436
|
+
def job_status(job_id)
|
437
|
+
begin
|
438
|
+
client = Util::OCIClients.get_clients[:rms_client]
|
439
|
+
response = client.get_job(job_id)
|
440
|
+
@job_status = response.data
|
441
|
+
rescue StandardError => e
|
442
|
+
logger.error("Error while fetching stack job status: #{e}")
|
443
|
+
raise StandardError, 'Error while fetching stack job status.'
|
444
|
+
end
|
445
|
+
@job_status
|
446
|
+
end
|
447
|
+
|
448
|
+
def job_status_check(job_id, operation_timeout)
|
449
|
+
initial_delay_in_sec = 5
|
450
|
+
interval_between_checks = 10
|
451
|
+
|
452
|
+
logger.info("Checking job status. Job ID: [#{job_id}]")
|
453
|
+
sleep(initial_delay_in_sec)
|
454
|
+
|
455
|
+
status_check_time = Time.now.to_i
|
456
|
+
|
457
|
+
loop do
|
458
|
+
status_details = job_status(job_id)
|
459
|
+
|
460
|
+
lifecycle_state = status_details.lifecycle_state
|
461
|
+
log_msg = "#{status_details.operation} job run #{lifecycle_state.downcase}. Job OCID: #{status_details.id}, Stack OCID: #{status_details.stack_id}"
|
462
|
+
|
463
|
+
if lifecycle_state == Enum::StackJobLifecycleStateEnum::SUCCEEDED
|
464
|
+
logger.info(log_msg)
|
465
|
+
return
|
466
|
+
end
|
467
|
+
|
468
|
+
if not [Enum::StackJobLifecycleStateEnum::ACCEPTED, Enum::StackJobLifecycleStateEnum::IN_PROGRESS].include?(lifecycle_state)
|
469
|
+
logger.error("Job lifecycle state [#{lifecycle_state}] suggests termination")
|
470
|
+
raise StandardError, "Job lifecycle state [#{lifecycle_state}] suggests termination"
|
471
|
+
end
|
472
|
+
|
473
|
+
logger.info("Waiting for job to conclude. State: [#{lifecycle_state}]; Job ID: [#{job_id}]")
|
474
|
+
sleep(interval_between_checks)
|
475
|
+
|
476
|
+
if Time.now.to_i - status_check_time > operation_timeout
|
477
|
+
logger.warn("TIMEOUT: Job did not conclude within stipulated time. Job ID: [#{job_id}]")
|
478
|
+
break
|
479
|
+
end
|
480
|
+
end
|
481
|
+
end
|
482
|
+
|
483
|
+
def create_apply_job(stack_id, oci_tags)
|
484
|
+
operation = Enum::StackJobOperationEnum::APPLY
|
485
|
+
display_name = operation.downcase + '-job-' + Time.now.to_i.to_s
|
486
|
+
|
487
|
+
terraform_advanced_options = OCI::ResourceManager::Models::TerraformAdvancedOptions.new(
|
488
|
+
detailed_log_level: 'INFO'
|
489
|
+
)
|
490
|
+
|
491
|
+
job_operation_details = OCI::ResourceManager::Models::CreateApplyJobOperationDetails.new(
|
492
|
+
operation: operation,
|
493
|
+
execution_plan_strategy: 'AUTO_APPROVED',
|
494
|
+
is_provider_upgrade_required: true,
|
495
|
+
terraform_advanced_options: terraform_advanced_options
|
496
|
+
)
|
497
|
+
|
498
|
+
begin
|
499
|
+
client = Util::OCIClients.get_clients[:rms_client]
|
500
|
+
response = client.create_job(
|
501
|
+
OCI::ResourceManager::Models::CreateJobDetails.new(
|
502
|
+
stack_id: stack_id,
|
503
|
+
display_name: display_name,
|
504
|
+
operation: operation,
|
505
|
+
job_operation_details: job_operation_details,
|
506
|
+
freeform_tags: oci_tags['freeformTags'],
|
507
|
+
defined_tags: oci_tags['definedTags']
|
508
|
+
)
|
509
|
+
)
|
510
|
+
@job_creation_response = response.data
|
511
|
+
rescue StandardError => e
|
512
|
+
logger.error("Error while creating job: #{e}")
|
513
|
+
raise StandardError, 'Error while creating job.'
|
514
|
+
end
|
515
|
+
@job_creation_response
|
516
|
+
end
|
517
|
+
|
518
|
+
def update_stack(stack_ocid, rms_input_variables, rms_template_base64_encoded)
|
519
|
+
begin
|
520
|
+
client = Util::OCIClients.get_clients[:rms_client]
|
521
|
+
response = client.update_stack(stack_ocid,
|
522
|
+
OCI::ResourceManager::Models::UpdateStackDetails.new(
|
523
|
+
variables: rms_input_variables,
|
524
|
+
config_source: OCI::ResourceManager::Models::UpdateZipUploadConfigSourceDetails.new(zip_file_base64_encoded: rms_template_base64_encoded)
|
525
|
+
)
|
526
|
+
)
|
527
|
+
@stack_update_response = response.data
|
528
|
+
rescue StandardError => e
|
529
|
+
logger.error("Error while updating stack: #{e}")
|
530
|
+
raise StandardError, 'Error while updating stack.'
|
531
|
+
end
|
532
|
+
@stack_update_response
|
533
|
+
end
|
534
|
+
|
535
|
+
def fetch_stack_details(stack_ocid)
|
536
|
+
begin
|
537
|
+
client = Util::OCIClients.get_clients[:rms_client]
|
538
|
+
response = client.get_stack(stack_ocid)
|
539
|
+
@stack_details_response = response.data
|
540
|
+
rescue StandardError => e
|
541
|
+
logger.error("Error while retrieving stack details: #{e}")
|
542
|
+
raise StandardError, 'Error while retrieving stack details.'
|
543
|
+
end
|
544
|
+
@stack_details_response
|
545
|
+
end
|
546
|
+
|
547
|
+
def fetch_log_analytics_entity_details(oci_la_namespace, oci_la_cluster_entity_id)
|
548
|
+
begin
|
549
|
+
client = Util::OCIClients.get_clients[:la_client]
|
550
|
+
response = client.get_log_analytics_entity(oci_la_namespace, oci_la_cluster_entity_id)
|
551
|
+
@log_analytics_entity_details = response.data
|
552
|
+
rescue StandardError => e
|
553
|
+
logger.error("Error while fetching log analytics entity details: #{e}")
|
554
|
+
raise StandardError, 'Error while fetching log analytics entity details.'
|
555
|
+
end
|
556
|
+
@log_analytics_entity_details
|
557
|
+
end
|
558
|
+
end
|
559
|
+
end
|
data/lib/util/state_manager.rb
CHANGED
@@ -28,6 +28,8 @@ module Util
|
|
28
28
|
# therefore, events tracker configmap does not exist in cluster
|
29
29
|
logger.info("configmap/#{@events_tracker_configmap} not found.")
|
30
30
|
@state.last_timestamp = 0 # 1 January 1970 00:00
|
31
|
+
@state.retry_eligible_resources = ''
|
32
|
+
@state.svc_log_stack_id = ''
|
31
33
|
begin
|
32
34
|
Util::KubectlOps.create_configmap(@events_tracker_configmap, @namespace, @state.to_hash)
|
33
35
|
rescue KubeException => e
|
@@ -37,14 +39,22 @@ module Util
|
|
37
39
|
logger.info("Created new Configmap - #{@events_tracker_configmap}.")
|
38
40
|
else
|
39
41
|
last_timestamp = state_config_map[:data][:last_timestamp]
|
40
|
-
logger.info("
|
42
|
+
logger.info("Timestamp fetched from configmap - #{last_timestamp} - #{Time.at(last_timestamp.to_i).getutc}")
|
41
43
|
@state.last_timestamp = last_timestamp.to_i
|
44
|
+
|
45
|
+
retry_eligible_resource = state_config_map[:data][:retry_eligible_resources]
|
46
|
+
logger.debug("Non 2xx resource list fetched from configmap [#{retry_eligible_resource}]")
|
47
|
+
@state.retry_eligible_resources = retry_eligible_resource
|
48
|
+
|
49
|
+
svc_log_stack_id = state_config_map[:data][:svc_log_stack_id]
|
50
|
+
logger.debug("Service log stack OCID fetched from configmap [#{svc_log_stack_id}]")
|
51
|
+
@state.svc_log_stack_id = svc_log_stack_id
|
42
52
|
end
|
43
53
|
@is_intialized = true
|
44
54
|
end
|
45
55
|
|
46
56
|
def init_check
|
47
|
-
raise StandardError, 'Method call before
|
57
|
+
raise StandardError, 'Method call before initializing Module - StateManager' unless @is_intialized
|
48
58
|
end
|
49
59
|
|
50
60
|
def state
|
data/lib/util/string_utils.rb
CHANGED
@@ -12,5 +12,53 @@ module Util
|
|
12
12
|
option_selections.prepend('{')
|
13
13
|
option_selections.concat('}')
|
14
14
|
end
|
15
|
+
|
16
|
+
def parse_key_value_pairs(input_string)
|
17
|
+
result = {}
|
18
|
+
|
19
|
+
return result if input_string.nil?
|
20
|
+
|
21
|
+
pairs = input_string.split(';')
|
22
|
+
pairs.each do |pair|
|
23
|
+
key, value = pair.split(':')
|
24
|
+
result[key.strip] = value.strip.to_i
|
25
|
+
end
|
26
|
+
result
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_key_value_string(hash)
|
30
|
+
hash.map { |key, value| "#{key}:#{value}" }.join(';')
|
31
|
+
end
|
32
|
+
|
33
|
+
def parse_tags_to_hash(input_string)
|
34
|
+
formatted_string = input_string.gsub('=', ':')
|
35
|
+
JSON.parse(formatted_string)
|
36
|
+
rescue StandardError => e
|
37
|
+
logger.error('Failed to parse tags. Not a valid JSON')
|
38
|
+
raise e
|
39
|
+
end
|
40
|
+
|
41
|
+
def json_key_value_pair?(json_string)
|
42
|
+
# Check if the parsed data is a hash
|
43
|
+
return false unless json_string.is_a?(Hash)
|
44
|
+
|
45
|
+
# Validate that all keys are strings
|
46
|
+
json_string.keys.all? { |key| key.is_a?(String) }
|
47
|
+
json_string.values.all? { |value| value.is_a?(String) }
|
48
|
+
end
|
49
|
+
|
50
|
+
def parse_defined_tags(defined_tags)
|
51
|
+
defined_tags.each_with_object({}) do |(key, value), result|
|
52
|
+
# Split the key into main key and subkey
|
53
|
+
main_key, sub_key = key.split('.', 2)
|
54
|
+
|
55
|
+
raise StandardError, 'Key of defined tag not formatted correctly' if main_key.nil? || sub_key.nil?
|
56
|
+
raise StandardError, 'Value of defined tag found to be empty' if value.nil?
|
57
|
+
|
58
|
+
# Assign the value to the nested structure
|
59
|
+
result[main_key] ||= {}
|
60
|
+
result[main_key][sub_key] = value
|
61
|
+
end
|
62
|
+
end
|
15
63
|
end
|
16
64
|
end
|
data/lib/version.rb
CHANGED