oci-logging-analytics-kubernetes-discovery 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/oci-loganalytics-kubernetes-discovery +74 -27
- data/lib/config/oci_client_retry_config.rb +11 -8
- data/lib/discover/infrastructure.rb +79 -27
- data/lib/discover/object.rb +19 -4
- data/lib/dto/infra/load_balancer_payload.rb +32 -0
- data/lib/dto/infra/node_pool_payload.rb +28 -0
- data/lib/dto/infra/{node_pool_entity_payload.rb → resource_payload.rb} +6 -6
- data/lib/dto/infra/subnet_payload.rb +34 -0
- data/lib/dto/infra_objects_payload.rb +3 -3
- data/lib/dto/kubernetes_objects_payload.rb +19 -15
- data/lib/dto/state.rb +7 -3
- data/lib/enum/infrastructure_resource_discovery.rb +1 -0
- data/lib/enum/object_client_mapping_enum.rb +1 -1
- data/lib/enum/stack_job_lifecycle_state_enum.rb +14 -0
- data/lib/enum/stack_job_operation_enum.rb +10 -0
- data/lib/infra_resources.rb +142 -41
- data/lib/objects_resources.rb +16 -6
- data/lib/oci_loganalytics_resources_discovery.rb +105 -57
- data/lib/util/kube_client.rb +1 -0
- data/lib/util/kubectl_ops.rb +1 -1
- data/lib/util/log_analytics.rb +2 -2
- data/lib/util/oci_clients.rb +137 -78
- data/lib/util/service_logs.rb +559 -0
- data/lib/util/state_manager.rb +12 -2
- data/lib/util/string_utils.rb +48 -0
- data/lib/version.rb +1 -1
- metadata +9 -7
- data/lib/dto/infra/cluster_entity_payload.rb +0 -22
- data/lib/dto/infra/load_balancers_entity_payload.rb +0 -22
- data/lib/dto/infra/subnet_entity_payload.rb +0 -22
- data/lib/dto/infra/vcn_entity_payload.rb +0 -22
@@ -8,24 +8,27 @@ module Dto
|
|
8
8
|
include Enum::KubernetesObjectsEnum
|
9
9
|
extend Util::Logging
|
10
10
|
|
11
|
-
attr_accessor :cluster, :nodes, :pods, :services, :
|
12
|
-
:
|
11
|
+
attr_accessor :cluster, :nodes, :pods, :services, :endpointslices, :deployments,
|
12
|
+
:replicasets, :daemonsets, :cronjobs, :jobs, :statefulsets, :events, :namespaces,
|
13
|
+
:infra
|
13
14
|
|
14
|
-
def initialize(cluster, nodes, pods, services,
|
15
|
-
|
15
|
+
def initialize(cluster, nodes, pods, services, endpointslices, deployments,
|
16
|
+
replicasets, daemonsets, cronjobs, jobs, statefulsets, events, namespaces,
|
17
|
+
infra)
|
16
18
|
@cluster = cluster
|
17
19
|
@nodes = nodes
|
18
20
|
@pods = pods
|
19
21
|
@services = services
|
20
|
-
@
|
22
|
+
@endpointslices = endpointslices
|
21
23
|
@deployments = deployments
|
22
|
-
@
|
23
|
-
@
|
24
|
-
@
|
24
|
+
@replicasets = replicasets
|
25
|
+
@daemonsets = daemonsets
|
26
|
+
@cronjobs = cronjobs
|
25
27
|
@jobs = jobs
|
26
|
-
@
|
28
|
+
@statefulsets = statefulsets
|
27
29
|
@events = events
|
28
30
|
@namespaces = namespaces
|
31
|
+
@infra = infra
|
29
32
|
end
|
30
33
|
|
31
34
|
def get_hash(obj, obj_name)
|
@@ -43,15 +46,16 @@ module Dto
|
|
43
46
|
nodes: get_hash(@nodes, NODES.to_s),
|
44
47
|
pods: get_hash(@pods, PODS.to_s),
|
45
48
|
services: get_hash(@services, SERVICES.to_s),
|
46
|
-
endpointslices: get_hash(@
|
49
|
+
endpointslices: get_hash(@endpointslices, ENDPOINT_SLICES.to_s),
|
47
50
|
deployments: get_hash(@deployments, DEPLOYMENTS.to_s),
|
48
|
-
replicasets: get_hash(@
|
49
|
-
daemonsets: get_hash(@
|
50
|
-
cronjobs: get_hash(@
|
51
|
+
replicasets: get_hash(@replicasets, REPLICA_SETS.to_s),
|
52
|
+
daemonsets: get_hash(@daemonsets, DAEMON_SETS.to_s),
|
53
|
+
cronjobs: get_hash(@cronjobs, CRON_JOBS.to_s),
|
51
54
|
jobs: get_hash(@jobs, JOBS.to_s),
|
52
|
-
statefulsets: get_hash(@
|
55
|
+
statefulsets: get_hash(@statefulsets, STATEFUL_SETS.to_s),
|
53
56
|
events: get_hash(@events, EVENTS.to_s),
|
54
|
-
namespaces: get_hash(@namespaces, NAMESPACES.to_s)
|
57
|
+
namespaces: get_hash(@namespaces, NAMESPACES.to_s),
|
58
|
+
infra: @infra.nil? ? nil : @infra.to_hash
|
55
59
|
}.compact
|
56
60
|
end
|
57
61
|
end
|
data/lib/dto/state.rb
CHANGED
@@ -3,16 +3,20 @@
|
|
3
3
|
|
4
4
|
module Dto
|
5
5
|
class State
|
6
|
-
attr_accessor :last_timestamp
|
6
|
+
attr_accessor :last_timestamp, :retry_eligible_resources, :svc_log_stack_id
|
7
7
|
|
8
|
-
def initialize(last_timestamp = nil)
|
8
|
+
def initialize(last_timestamp = nil, retry_eligible_resources = nil, svc_log_stack_id = nil)
|
9
9
|
@last_timestamp = last_timestamp unless last_timestamp.nil?
|
10
|
+
@retry_eligible_resources = retry_eligible_resources unless retry_eligible_resources.nil?
|
11
|
+
@svc_log_stack_id = svc_log_stack_id unless svc_log_stack_id.nil?
|
10
12
|
end
|
11
13
|
|
12
14
|
# make sure all values are strings
|
13
15
|
def to_hash
|
14
16
|
{
|
15
|
-
last_timestamp: @last_timestamp.to_s
|
17
|
+
last_timestamp: @last_timestamp.to_s,
|
18
|
+
retry_eligible_resources: @retry_eligible_resources.to_s,
|
19
|
+
svc_log_stack_id: @svc_log_stack_id.to_s
|
16
20
|
}.compact
|
17
21
|
end
|
18
22
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
## Copyright (c) 2024 Oracle and/or its affiliates.
|
2
|
+
## The Universal Permissive License (UPL), Version 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
3
|
+
|
4
|
+
module Enum
|
5
|
+
module StackJobLifecycleStateEnum
|
6
|
+
# Documentation: https://docs.oracle.com/en-us/iaas/Content/ResourceManager/Tasks/jobs.htm
|
7
|
+
ACCEPTED = 'ACCEPTED'.freeze
|
8
|
+
IN_PROGRESS = 'IN_PROGRESS'.freeze
|
9
|
+
FAILED = 'FAILED'.freeze
|
10
|
+
SUCCEEDED = 'SUCCEEDED'.freeze
|
11
|
+
CANCELING = 'CANCELING'.freeze
|
12
|
+
CANCELED = 'CANCELED'.freeze
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
## Copyright (c) 2024 Oracle and/or its affiliates.
|
2
|
+
## The Universal Permissive License (UPL), Version 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
3
|
+
|
4
|
+
module Enum
|
5
|
+
module StackJobOperationEnum
|
6
|
+
APPLY = 'APPLY'.freeze
|
7
|
+
PLAN = 'PLAN'.freeze
|
8
|
+
DESTROY = 'DESTROY'.freeze
|
9
|
+
end
|
10
|
+
end
|
data/lib/infra_resources.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
## Copyright (c) 2024 Oracle and/or its affiliates.
|
2
2
|
## The Universal Permissive License (UPL), Version 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
3
3
|
|
4
|
+
require 'set'
|
5
|
+
|
4
6
|
# Infra
|
5
7
|
require_relative './discover/infrastructure'
|
6
8
|
|
@@ -8,11 +10,10 @@ require_relative './discover/infrastructure'
|
|
8
10
|
require_relative './dto/infra_objects_payload'
|
9
11
|
|
10
12
|
# DTOs
|
11
|
-
require_relative './dto/infra/
|
12
|
-
require_relative './dto/infra/
|
13
|
-
require_relative './dto/infra/
|
14
|
-
require_relative './dto/infra/
|
15
|
-
require_relative './dto/infra/node_pool_entity_payload'
|
13
|
+
require_relative './dto/infra/resource_payload'
|
14
|
+
require_relative './dto/infra/subnet_payload'
|
15
|
+
require_relative './dto/infra/node_pool_payload'
|
16
|
+
require_relative './dto/infra/load_balancer_payload'
|
16
17
|
|
17
18
|
# Util
|
18
19
|
require_relative './util/logging'
|
@@ -23,66 +24,166 @@ module InfraResources
|
|
23
24
|
|
24
25
|
module_function
|
25
26
|
|
26
|
-
|
27
|
+
IP_Address = Struct.new(:ip, :isPublic)
|
28
|
+
Nodes = Struct.new(:id, :name, :subnetId, :privateIp, :publicIp, :lifecycleState)
|
29
|
+
|
30
|
+
def get_infra_resources(app_config, cluster_config, cluster_id, lb_ip_array)
|
31
|
+
|
27
32
|
# Fetch cluster details using cluster OCID provided as argument during program run
|
28
33
|
logger.debug('Fetching cluster details.')
|
29
|
-
ce_response = Discover::Infrastructure.fetch_cluster_details(
|
30
|
-
|
34
|
+
ce_response = Discover::Infrastructure.fetch_cluster_details(cluster_id)
|
35
|
+
cluster_details = Dto::Infra::ResourcePayload.new(ce_response.name, ce_response.id, ce_response.compartment_id)
|
31
36
|
|
32
37
|
# Fetch VCN associated with cluster
|
33
38
|
logger.debug('Fetching virtual cloud network details.')
|
34
|
-
vcn_response = Discover::Infrastructure.fetch_vcn_response(
|
35
|
-
|
39
|
+
vcn_response = Discover::Infrastructure.fetch_vcn_response(ce_response.vcn_id)
|
40
|
+
vcn_details = Dto::Infra::ResourcePayload.new(vcn_response.display_name, vcn_response.id, vcn_response.compartment_id)
|
36
41
|
|
37
|
-
#
|
38
|
-
logger.debug('Fetching
|
39
|
-
subnet_response = Discover::Infrastructure.fetch_subnet_list_response(auth_object, ce_response.compartment_id, vcn_response.id)
|
42
|
+
# Use resource search query to fetch load balancers associated with cluster OCID
|
43
|
+
logger.debug('Fetching load balancers associated with cluster.')
|
40
44
|
|
41
|
-
|
42
|
-
subnet_entity_payload = []
|
43
|
-
subnet_response.each do |subnet|
|
44
|
-
next if subnet.nil?
|
45
|
+
load_balancer_details = []
|
45
46
|
|
46
|
-
|
47
|
-
|
47
|
+
lb_ip_array.each do |ip|
|
48
|
+
structured_query = "query loadbalancer resources matching '#{ip}'"
|
48
49
|
|
49
|
-
|
50
|
-
|
51
|
-
|
50
|
+
resource_query_search_response = Discover::Infrastructure.search_with_structured_query(structured_query)
|
51
|
+
|
52
|
+
# TODO: We only expect one LB per IP address, remove looping over result ?
|
52
53
|
|
53
|
-
|
54
|
-
|
55
|
-
|
54
|
+
# Load Balancer Lifecycle State: CREATING, ACTIVE, DELETING
|
55
|
+
resource_query_search_response.items.each do |item|
|
56
|
+
lb_response = Discover::Infrastructure.fetch_load_balancer_details(item.identifier)
|
57
|
+
next if lb_response[:data].nil?
|
58
|
+
lb_ip_addresses = []
|
56
59
|
|
57
|
-
|
60
|
+
lb_response[:data].ip_addresses.each do |item|
|
61
|
+
lb_ip_addresses.push(IP_Address.new(item.ip_address, item.is_public))
|
62
|
+
end
|
58
63
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
+
subnet_ids = lb_response[:data].subnet_ids.nil? ? [] : lb_response[:data].subnet_ids
|
65
|
+
|
66
|
+
load_balancer_details.push(Dto::Infra::LoadBalancerPayload.new(lb_response[:data].display_name,
|
67
|
+
lb_response[:data].id,
|
68
|
+
lb_response[:data].compartment_id,
|
69
|
+
lb_response[:data].is_private,
|
70
|
+
lb_ip_addresses,
|
71
|
+
lb_response[:data].lifecycle_state,
|
72
|
+
subnet_ids))
|
73
|
+
|
74
|
+
logger.debug("LB detected : #{lb_response}")
|
64
75
|
end
|
65
76
|
end
|
66
77
|
|
78
|
+
logger.info("Discovered load balancer count: #{load_balancer_details.length}")
|
79
|
+
|
67
80
|
# Iterate through all the compartments (starting from root tenancy level) and fetch node pools
|
68
81
|
# associated with the cluster OCID
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
82
|
+
node_pool_details = []
|
83
|
+
node_pool_subnet_ids = []
|
84
|
+
worker_node_subnet_ids = []
|
85
|
+
|
86
|
+
logger.debug('Fetching node pools associated with cluster.')
|
87
|
+
|
88
|
+
node_pool_compartment_id = nil
|
89
|
+
scoped_at_tenancy = false
|
90
|
+
if app_config[:probe_all_compartments]
|
91
|
+
# Tenancy OCID is compulsory input if probe_all_compartments flag is set
|
92
|
+
logger.info('Node pool discovery scope set at tenancy level.')
|
93
|
+
|
94
|
+
node_pool_compartment_id = app_config[:tenancy_ocid]
|
95
|
+
scoped_at_tenancy = true
|
96
|
+
else
|
97
|
+
logger.info('Node pool discovery scoped to cluster compartment.')
|
98
|
+
node_pool_compartment_id = ce_response.compartment_id
|
99
|
+
end
|
100
|
+
|
101
|
+
begin
|
102
|
+
compartment_list_response = Discover::Infrastructure.fetch_compartment_list(node_pool_compartment_id, scoped_at_tenancy)
|
103
|
+
rescue StandardError => e
|
104
|
+
logger.error("Error while fetching compartment list. Error: [#{e}]")
|
105
|
+
raise StandardError, 'Error while fetching compartment list.'
|
106
|
+
end
|
107
|
+
|
108
|
+
compartment_list = []
|
109
|
+
compartment_list_response[:data]&.each do |compartment|
|
110
|
+
compartment_list.push(compartment.id)
|
111
|
+
end
|
112
|
+
|
113
|
+
if scoped_at_tenancy
|
114
|
+
compartment_list.push(app_config[:tenancy_ocid])
|
115
|
+
else
|
116
|
+
compartment_list.push(ce_response.compartment_id)
|
117
|
+
end
|
118
|
+
|
119
|
+
compartment_list.each do |compartment|
|
120
|
+
|
121
|
+
node_pool_list_response = Discover::Infrastructure.fetch_node_pool_lists(compartment, ce_response.id)
|
122
|
+
|
123
|
+
node_pool_list_response.each do |node_pool|
|
124
|
+
|
125
|
+
subnet_ids = node_pool.subnet_ids.nil? ? [] : node_pool.subnet_ids
|
126
|
+
|
127
|
+
subnet_ids.each do |node_pool_subnet|
|
128
|
+
node_pool_subnet_ids.push(node_pool_subnet)
|
129
|
+
end
|
130
|
+
|
131
|
+
node_pool_response = Discover::Infrastructure.fetch_node_pool(node_pool.id)
|
132
|
+
|
133
|
+
nodes = []
|
134
|
+
node_pool_response&.nodes&.each do |node_instance|
|
135
|
+
nodes.push(Nodes.new(node_instance.id, node_instance.name, node_instance&.subnet_id,
|
136
|
+
node_instance&.private_ip, node_instance&.public_ip, node_instance.lifecycle_state))
|
137
|
+
end
|
138
|
+
|
139
|
+
node_pool_details.push(Dto::Infra::NodePoolPayload.new(node_pool.name, node_pool.id,
|
140
|
+
node_pool.compartment_id, subnet_ids, nodes))
|
141
|
+
|
142
|
+
# Fetch worker node subnet details from placement configurations
|
143
|
+
placement_configs_details = node_pool.node_config_details.placement_configs
|
144
|
+
placement_configs_details.each do |placement_config|
|
145
|
+
next if placement_config.nil?
|
146
|
+
|
147
|
+
worker_node_subnet_ids.push(placement_config.subnet_id)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
# Fetch details of subnets associated with cluster
|
153
|
+
# Subnet Lifecycle State: AVAILABLE, PROVISIONING, TERMINATED, TERMINATING, UPDATING
|
154
|
+
logger.debug('Fetching subnet details associated with cluster.')
|
155
|
+
|
156
|
+
subnet_resource_details = []
|
157
|
+
cluster_subnet_ids = []
|
158
|
+
|
159
|
+
kubernetes_api_endpoint_subnet_id = ce_response.endpoint_config.subnet_id
|
160
|
+
service_loadbalancer_subnet_ids = ce_response.options.service_lb_subnet_ids
|
161
|
+
|
162
|
+
cluster_subnet_ids.push(kubernetes_api_endpoint_subnet_id)
|
163
|
+
cluster_subnet_ids += service_loadbalancer_subnet_ids
|
164
|
+
cluster_subnet_ids += node_pool_subnet_ids
|
165
|
+
cluster_subnet_ids += worker_node_subnet_ids
|
166
|
+
|
167
|
+
# Remove duplicates. The | operator is used to compute the union of two arrays
|
168
|
+
# and ensures there are no duplicates. Adding the union of an array with [] does
|
169
|
+
# not change the original array but removes the duplicates
|
170
|
+
cluster_subnet_ids = cluster_subnet_ids | []
|
73
171
|
|
74
|
-
|
172
|
+
cluster_subnet_ids.each do |subnet_id|
|
75
173
|
|
76
|
-
|
77
|
-
next if node_pool.nil?
|
174
|
+
subnet_details = Discover::Infrastructure.fetch_subnet_details(subnet_id)
|
78
175
|
|
79
|
-
|
176
|
+
if !subnet_details[:data].nil? && subnet_details[:status] == 200
|
177
|
+
subnet = subnet_details[:data]
|
178
|
+
subnet_resource_details.push(Dto::Infra::SubnetPayload.new(subnet.display_name, subnet.id, subnet.compartment_id,
|
179
|
+
subnet.prohibit_internet_ingress, subnet.prohibit_public_ip_on_vnic,
|
180
|
+
subnet.cidr_block, subnet.lifecycle_state))
|
80
181
|
end
|
81
182
|
end
|
82
183
|
|
83
184
|
Dto::InfraObjectsPayLoad.new(
|
84
|
-
|
85
|
-
|
185
|
+
cluster_details, vcn_details, subnet_resource_details,
|
186
|
+
load_balancer_details, node_pool_details
|
86
187
|
)
|
87
188
|
rescue StandardError => e
|
88
189
|
logger.error("Error in getting infrastructure resources: #{e}")
|
data/lib/objects_resources.rb
CHANGED
@@ -56,7 +56,7 @@ module ObjectsResources
|
|
56
56
|
|
57
57
|
begin
|
58
58
|
payload_executors = Enum::KubernetesObjectsEnum.constants(false).map do |object_type|
|
59
|
-
if Enum::
|
59
|
+
if Enum::ObjectClientMappingEnum.const_defined? object_type
|
60
60
|
if enable_threading == true
|
61
61
|
Concurrent::Future.execute({ executor: thread_pool }) do
|
62
62
|
get_objects_resources(object_type)
|
@@ -80,11 +80,21 @@ module ObjectsResources
|
|
80
80
|
end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
81
81
|
logger.debug("Time elapsed in collecting objects: #{(end_time - start_time).round(2)} seconds.")
|
82
82
|
|
83
|
-
kubernetes_objects_processed =
|
84
|
-
cluster
|
85
|
-
|
86
|
-
|
87
|
-
|
83
|
+
kubernetes_objects_processed = {
|
84
|
+
cluster: cluster,
|
85
|
+
nodes: @nodes[:processed],
|
86
|
+
pods: @pods[:processed],
|
87
|
+
services: @services[:processed],
|
88
|
+
endpoint_slices: @endpoint_slices[:processed],
|
89
|
+
deployments: @deployments[:processed],
|
90
|
+
replica_sets: @replica_sets[:processed],
|
91
|
+
daemon_sets: @daemon_sets[:processed],
|
92
|
+
cron_jobs: @cron_jobs[:processed],
|
93
|
+
jobs: @jobs[:processed],
|
94
|
+
stateful_sets: @stateful_sets[:processed],
|
95
|
+
events: @events[:processed],
|
96
|
+
namespaces: @namespaces[:processed]
|
97
|
+
}
|
88
98
|
|
89
99
|
kubernetes_objects_raw_responses = {
|
90
100
|
nodes: @nodes[:raw],
|
@@ -16,6 +16,7 @@ require_relative './util/kube_client'
|
|
16
16
|
require_relative './util/oci_clients'
|
17
17
|
require_relative './util/log_analytics'
|
18
18
|
require_relative './util/state_manager'
|
19
|
+
require_relative './util/service_logs'
|
19
20
|
|
20
21
|
# DTO
|
21
22
|
require_relative './dto/payload/log_events'
|
@@ -39,7 +40,7 @@ module OciLogAnalyticsResourcesDiscovery
|
|
39
40
|
module_function
|
40
41
|
|
41
42
|
attr_accessor :cluster_config_hash, :auth_config_hash, :kube_config_hash, :app_config_hash,
|
42
|
-
:auth_object, :kube_clients, :oci_clients, :snapshot_id
|
43
|
+
:auth_object, :kube_clients, :oci_clients, :oci_region, :oci_domain, :snapshot_id
|
43
44
|
|
44
45
|
@@oci_la_log_source_name = 'Kubernetes Objects Discovery Payload Logs'
|
45
46
|
@@oci_la_log_path = 'UNDEFINED'
|
@@ -49,59 +50,78 @@ module OciLogAnalyticsResourcesDiscovery
|
|
49
50
|
@auth_config_hash = auth_config_hash
|
50
51
|
@kube_config_hash = kube_config_hash
|
51
52
|
@app_config_hash = app_config_hash
|
53
|
+
end
|
52
54
|
|
53
|
-
|
54
|
-
|
55
|
-
options = { mode: app_config_hash[:mode] }
|
56
|
-
Util::OCIClients.initialize(@auth_config_hash, options)
|
55
|
+
def initiate_discovery
|
56
|
+
Util::OCIClients.initialize(@auth_config_hash)
|
57
57
|
|
58
58
|
@auth_object = Util::OCIClients.get_auth_config_object
|
59
59
|
@oci_clients = Util::OCIClients.get_clients
|
60
|
+
@oci_region = Util::OCIClients.get_region
|
61
|
+
@oci_domain = Util::OCIClients.get_domain
|
60
62
|
|
61
|
-
|
62
|
-
@snapshot_id = Time.now.to_i
|
63
|
-
get_kube_clients
|
64
|
-
Util::StateManager.init(@cluster_config_hash[:kubernetes_resourcename_prefix], @cluster_config_hash[:kubernetes_cluster_namespace])
|
65
|
-
Util::KubectlOps.set_chunk_limit @app_config_hash[:chunk_limit]
|
66
|
-
logger.debug("Chunk limit set to - #{@app_config_hash[:chunk_limit]}")
|
67
|
-
end
|
68
|
-
end
|
63
|
+
@kube_clients = Util::KubeClient.create_clients(@kube_config_hash)
|
69
64
|
|
70
|
-
|
71
|
-
logger.info('Initiating Kubernetes infrastructure discovery.')
|
72
|
-
begin
|
73
|
-
infra_objects_payload = get_infra_resources_payload
|
74
|
-
rescue StandardError => e
|
75
|
-
logger.error("Error occurred while fetching infrastructure resources. Error: #{e}")
|
76
|
-
raise StandardError, 'Error occurred while fetching infrastructure resources.'
|
77
|
-
end
|
65
|
+
Util::StateManager.init(@cluster_config_hash[:kubernetes_resourcename_prefix], @cluster_config_hash[:kubernetes_cluster_namespace])
|
78
66
|
|
79
|
-
|
80
|
-
|
67
|
+
# Collect infrastructure discovery data
|
68
|
+
infra_objects_payload = nil
|
69
|
+
infra_discovery_failed = false
|
81
70
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
logger.warn('No infrastructure objects discovered.')
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
71
|
+
logger.info('Initiating Kubernetes Objects discovery.')
|
72
|
+
|
73
|
+
# Collect platform discovery data
|
74
|
+
@snapshot_id = Time.now.to_i
|
75
|
+
Util::KubectlOps.set_chunk_limit @app_config_hash[:chunk_limit]
|
76
|
+
logger.debug("Chunk limit set to - #{@app_config_hash[:chunk_limit]}")
|
92
77
|
|
93
|
-
def initiate_object_discovery
|
94
78
|
kubernetes_objects_payload = get_objects_resources_payload
|
79
|
+
|
95
80
|
if @app_config_hash[:collect_warning_events_only]
|
96
81
|
logger.info('Only Warning event logs will be collected as the following flag is set: --collect_warning_events_only')
|
97
82
|
end
|
83
|
+
|
98
84
|
kubernetes_objects_payload = filter_event_logs(kubernetes_objects_payload,
|
99
85
|
@app_config_hash[:collect_warning_events_only])
|
100
86
|
|
101
87
|
logger.info('Kubernetes objects discovery payload created.')
|
102
|
-
logger.trace("Object Discovery Payload: \n#{Yajl.dump(kubernetes_objects_payload[:processed_objects].to_hash)}")
|
103
88
|
logger.trace("Object Log Collection Payload: \n#{Yajl.dump(kubernetes_objects_payload[:raw_response].to_hash)}")
|
104
89
|
|
90
|
+
if @cluster_config_hash[:enable_infra_discovery]
|
91
|
+
logger.info('Initiating kubernetes infrastructure discovery.')
|
92
|
+
begin
|
93
|
+
lb_ip_array = parse_lb_ips(kubernetes_objects_payload)
|
94
|
+
logger.info("Discovered Load Balancer IPs: #{lb_ip_array}")
|
95
|
+
infra_objects_payload = InfraResources.get_infra_resources(
|
96
|
+
@app_config_hash,
|
97
|
+
@cluster_config_hash,
|
98
|
+
@cluster_config_hash[:kubernetes_cluster_id],
|
99
|
+
lb_ip_array
|
100
|
+
)
|
101
|
+
logger.trace("Infrastructure discovery payload: \n#{Yajl.dump(infra_objects_payload.to_hash)}")
|
102
|
+
logger.info('Infrastructure discovery payload created.')
|
103
|
+
rescue StandardError => e
|
104
|
+
# Not raising an error as platform discovery still needs to continue.
|
105
|
+
infra_discovery_failed = true
|
106
|
+
logger.error("Error occurred while fetching infrastructure resources. Error: #{e}")
|
107
|
+
logger.info('Proceeding with object discovery.')
|
108
|
+
end
|
109
|
+
else
|
110
|
+
logger.info('--kubernetes_cluster_id not provided. Infrastructure discovery skipped.') # TODO: should be Warn ?
|
111
|
+
end
|
112
|
+
|
113
|
+
# Combine discovery and infrastructure data
|
114
|
+
processed_objects = kubernetes_objects_payload[:processed_objects]
|
115
|
+
combined_data = Dto::KubernetesObjectsPayLoad.new(
|
116
|
+
processed_objects[:cluster], processed_objects[:nodes], processed_objects[:pods], processed_objects[:services], processed_objects[:endpoint_slices],
|
117
|
+
processed_objects[:deployments], processed_objects[:replica_sets], processed_objects[:daemon_sets], processed_objects[:cron_jobs],
|
118
|
+
processed_objects[:jobs], processed_objects[:stateful_sets], processed_objects[:events], processed_objects[:namespaces], infra_objects_payload
|
119
|
+
)
|
120
|
+
|
121
|
+
logger.info('Combined discovery payload created.')
|
122
|
+
logger.trace("Combined discovery payload: \n#{Yajl.dump(combined_data.to_hash)}")
|
123
|
+
|
124
|
+
# Send collected data
|
105
125
|
if @app_config_hash[:skip_upload]
|
106
126
|
logger.warn('--skip_upload Flag is set: Skipping payload upload to OCI logging analytics.')
|
107
127
|
else
|
@@ -119,7 +139,8 @@ module OciLogAnalyticsResourcesDiscovery
|
|
119
139
|
upload_data_via_discovery_api(zip_object_log_payload, object_logs_opts) unless zip_object_log_payload.nil?
|
120
140
|
logger.info('Successfully uploaded object logs to OCI')
|
121
141
|
|
122
|
-
# Update the Events tracker config map only after successful objects logs upload
|
142
|
+
# DEV NOTE: Update the Events tracker config map only after successful objects logs upload
|
143
|
+
# Carefully check how last_timestamp update is managed if you wish to move the below code
|
123
144
|
Util::StateManager.update_state_configmap
|
124
145
|
rescue StandardError
|
125
146
|
logger.error('Error occurred while uploading object logs to OCI')
|
@@ -129,22 +150,53 @@ module OciLogAnalyticsResourcesDiscovery
|
|
129
150
|
unless @cluster_config_hash[:oci_la_cluster_entity_id].nil?
|
130
151
|
begin
|
131
152
|
# Upload object discovery payload with relevant header parameters
|
132
|
-
logger.info('Uploading
|
153
|
+
logger.info('Uploading Discovery Payload to OCI')
|
133
154
|
object_discovery_opts = {
|
134
155
|
payload_type: 'JSON',
|
135
156
|
discovery_data_type: 'K8S_OBJECTS',
|
136
157
|
opc_meta_properties: "cluster-entity-id:#{@cluster_config_hash[:oci_la_cluster_entity_id]};sub-type:discovery_payload;cluster-id:#{@cluster_config_hash[:kubernetes_cluster_id]};cluster-name:#{@cluster_config_hash[:kubernetes_cluster_name]}"
|
137
158
|
}
|
138
|
-
json_object_discovery_payload = Yajl.dump(
|
159
|
+
json_object_discovery_payload = Yajl.dump(combined_data.to_hash)
|
139
160
|
upload_data_via_discovery_api(json_object_discovery_payload, object_discovery_opts)
|
140
|
-
logger.info('Successfully uploaded
|
161
|
+
logger.info('Successfully uploaded discovery payload to OCI')
|
141
162
|
rescue StandardError
|
142
|
-
logger.error('Error occurred while uploading
|
163
|
+
logger.error('Error occurred while uploading discovery payload to OCI')
|
143
164
|
upload_failure = true
|
144
165
|
end
|
145
166
|
end
|
146
167
|
|
147
|
-
raise StandardError, 'Error
|
168
|
+
raise StandardError, 'Error uploading payloads to OCI' if upload_failure
|
169
|
+
end
|
170
|
+
|
171
|
+
# Execute Resource Manager Stack
|
172
|
+
if !infra_discovery_failed && @app_config_hash[:enable_service_log]
|
173
|
+
logger.info('Initiating service logs collection.')
|
174
|
+
log_analytics_entity_details = nil
|
175
|
+
begin
|
176
|
+
log_analytics_entity_details = Util::ServiceLogs.fetch_log_analytics_entity_details(
|
177
|
+
@cluster_config_hash[:oci_la_namespace],
|
178
|
+
@cluster_config_hash[:oci_la_cluster_entity_id]
|
179
|
+
)
|
180
|
+
rescue StandardError => e
|
181
|
+
logger.error('Error fetching log analytics entity details.')
|
182
|
+
end
|
183
|
+
unless log_analytics_entity_details.nil?
|
184
|
+
begin
|
185
|
+
logger.info('Invoking resource manager stack for service log creation')
|
186
|
+
Util::ServiceLogs.invoke_resource_manager_stack(infra_objects_payload,
|
187
|
+
@app_config_hash,
|
188
|
+
@cluster_config_hash[:oci_la_log_group_id],
|
189
|
+
log_analytics_entity_details.compartment_id,
|
190
|
+
@oci_region,
|
191
|
+
@oci_domain)
|
192
|
+
rescue StandardError => e
|
193
|
+
logger.error('Error invoking/updating service logs collection')
|
194
|
+
raise e
|
195
|
+
end
|
196
|
+
end
|
197
|
+
else
|
198
|
+
log_msg = infra_discovery_failed ? 'No infrastructure objects discovered' : 'Flag --enable_service_log not provided'
|
199
|
+
logger.info("#{log_msg}. Stack operations skipped")
|
148
200
|
end
|
149
201
|
end
|
150
202
|
|
@@ -155,22 +207,9 @@ module OciLogAnalyticsResourcesDiscovery
|
|
155
207
|
logger.error("Error occurred in creating kubeclients - #{e}")
|
156
208
|
raise e
|
157
209
|
end
|
158
|
-
@kube_clients =
|
159
|
-
logger.debug('
|
160
|
-
|
161
|
-
|
162
|
-
def get_infra_resources_payload
|
163
|
-
logger.debug('Discovering Infrastructure Resources')
|
164
|
-
infra_resources_payload = nil
|
165
|
-
|
166
|
-
unless @auth_object.nil?
|
167
|
-
begin
|
168
|
-
infra_resources_payload = InfraResources.get_infra_resources(@auth_object, @cluster_config_hash[:oci_la_cluster_entity_id])
|
169
|
-
rescue StandardError => e
|
170
|
-
logger.error("Error in getting infrastructure resources: #{e}")
|
171
|
-
end
|
172
|
-
end
|
173
|
-
infra_resources_payload
|
210
|
+
@kube_clients =
|
211
|
+
logger.debug('Kube client created successfully.')
|
212
|
+
Util::KubeClient.get_clients
|
174
213
|
end
|
175
214
|
|
176
215
|
def get_objects_resources_payload
|
@@ -184,6 +223,15 @@ module OciLogAnalyticsResourcesDiscovery
|
|
184
223
|
)
|
185
224
|
end
|
186
225
|
|
226
|
+
def parse_lb_ips(kubernetes_objects_payload)
|
227
|
+
ip_array = []
|
228
|
+
lbs = kubernetes_objects_payload[:processed_objects][:services].select { |service| service[:serviceType] == "LoadBalancer" }
|
229
|
+
lbs.each do |lb|
|
230
|
+
ip_array.append lb.loadBalancerIP unless lb.loadBalancerIP.nil?
|
231
|
+
end
|
232
|
+
ip_array
|
233
|
+
end
|
234
|
+
|
187
235
|
def filter_event_logs(kubernetes_objects_payload, collect_warning_events_only)
|
188
236
|
cluster_events_processed_time = Util::StateManager.state.last_timestamp
|
189
237
|
new_processed_time = cluster_events_processed_time
|
data/lib/util/kube_client.rb
CHANGED
data/lib/util/kubectl_ops.rb
CHANGED
@@ -48,7 +48,7 @@ module Util
|
|
48
48
|
case method
|
49
49
|
when :get_objects
|
50
50
|
if client.nil?
|
51
|
-
client = Util::KubeClient.get_clients[Enum::
|
51
|
+
client = Util::KubeClient.get_clients[Enum::ObjectClientMappingEnum.const_get(object_type.upcase.to_s)]
|
52
52
|
end
|
53
53
|
logger.debug("Fetching '#{object_type}' details from all namespaces")
|
54
54
|
method_verb = 'GET'
|