fluent-plugin-vadimberezniker-gcp 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,399 @@
1
+ # Copyright 2020 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ require 'uri'
16
+
17
+ module Common
18
+ # Constants for service names, resource types and etc.
19
+ module ServiceConstants
20
+ APPENGINE_CONSTANTS = {
21
+ service: 'appengine.googleapis.com',
22
+ resource_type: 'gae_app',
23
+ metadata_attributes: %w[gae_backend_name gae_backend_version]
24
+ }.freeze
25
+ COMPUTE_CONSTANTS = {
26
+ service: 'compute.googleapis.com',
27
+ resource_type: 'gce_instance'
28
+ }.freeze
29
+ GKE_CONSTANTS = {
30
+ service: 'container.googleapis.com',
31
+ resource_type: 'gke_container',
32
+ extra_resource_labels: %w[namespace_id pod_id container_name],
33
+ extra_common_labels: %w[namespace_name pod_name],
34
+ metadata_attributes: %w[cluster-name cluster-location],
35
+ stream_severity_map: {
36
+ 'stdout' => 'INFO',
37
+ 'stderr' => 'ERROR'
38
+ }
39
+ }.freeze
40
+ K8S_CONTAINER_CONSTANTS = {
41
+ resource_type: 'k8s_container'
42
+ }.freeze
43
+ K8S_POD_CONSTANTS = {
44
+ resource_type: 'k8s_pod'
45
+ }.freeze
46
+ K8S_NODE_CONSTANTS = {
47
+ resource_type: 'k8s_node'
48
+ }.freeze
49
+ DATAFLOW_CONSTANTS = {
50
+ service: 'dataflow.googleapis.com',
51
+ resource_type: 'dataflow_step',
52
+ extra_resource_labels: %w[region job_name job_id step_id]
53
+ }.freeze
54
+ DATAPROC_CONSTANTS = {
55
+ service: 'cluster.dataproc.googleapis.com',
56
+ resource_type: 'cloud_dataproc_cluster',
57
+ metadata_attributes: %w[dataproc-cluster-uuid dataproc-cluster-name]
58
+ }.freeze
59
+ EC2_CONSTANTS = {
60
+ service: 'ec2.amazonaws.com',
61
+ resource_type: 'aws_ec2_instance'
62
+ }.freeze
63
+ ML_CONSTANTS = {
64
+ service: 'ml.googleapis.com',
65
+ resource_type: 'ml_job',
66
+ extra_resource_labels: %w[job_id task_name]
67
+ }.freeze
68
+
69
+ # The map between a subservice name and a resource type.
70
+ SUBSERVICE_MAP =
71
+ [APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAFLOW_CONSTANTS,
72
+ DATAPROC_CONSTANTS, ML_CONSTANTS]
73
+ .map { |consts| [consts[:service], consts[:resource_type]] }.to_h
74
+ # Default back to GCE if invalid value is detected.
75
+ SUBSERVICE_MAP.default = COMPUTE_CONSTANTS[:resource_type]
76
+ SUBSERVICE_MAP.freeze
77
+
78
+ # The map between a resource type and expected subservice attributes.
79
+ SUBSERVICE_METADATA_ATTRIBUTES =
80
+ [APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAPROC_CONSTANTS].map do |consts|
81
+ [consts[:resource_type], consts[:metadata_attributes].to_set]
82
+ end.to_h.freeze
83
+ end
84
+
85
+ # Name of the the Google cloud logging write scope.
86
+ LOGGING_SCOPE = 'https://www.googleapis.com/auth/logging.write'.freeze
87
+
88
+ # Address of the metadata service.
89
+ METADATA_SERVICE_ADDR = '169.254.169.254'.freeze
90
+
91
+ # "enum" of Platform values
92
+ module Platform
93
+ OTHER = 0 # Other/unkown platform
94
+ GCE = 1 # Google Compute Engine
95
+ EC2 = 2 # Amazon EC2
96
+ end
97
+
98
+ # Utilities for managing the resource used when writing to the
99
+ # Google API.
100
+ class Utils
101
+ include Common::ServiceConstants
102
+
103
+ def initialize(log)
104
+ @log = log
105
+ end
106
+
107
+ # Determine what platform we are running on by consulting the metadata
108
+ # service (unless the user has explicitly disabled using that).
109
+ def detect_platform(use_metadata_service)
110
+ unless use_metadata_service
111
+ @log.info 'use_metadata_service is false; not detecting platform'
112
+ return Platform::OTHER
113
+ end
114
+
115
+ begin
116
+ URI.open("http://#{METADATA_SERVICE_ADDR}", proxy: false) do |f|
117
+ if f.meta['metadata-flavor'] == 'Google'
118
+ @log.info 'Detected GCE platform'
119
+ return Platform::GCE
120
+ end
121
+ if f.meta['server'] == 'EC2ws'
122
+ @log.info 'Detected EC2 platform'
123
+ return Platform::EC2
124
+ end
125
+ end
126
+ rescue StandardError => e
127
+ @log.error 'Failed to access metadata service: ', error: e
128
+ end
129
+
130
+ @log.info 'Unable to determine platform'
131
+ Platform::OTHER
132
+ end
133
+
134
+ def fetch_gce_metadata(platform, metadata_path)
135
+ raise "Called fetch_gce_metadata with platform=#{platform}" unless
136
+ platform == Platform::GCE
137
+
138
+ # See https://cloud.google.com/compute/docs/metadata
139
+ URI.open("http://#{METADATA_SERVICE_ADDR}/computeMetadata/v1/#{metadata_path}",
140
+ 'Metadata-Flavor' => 'Google', :proxy => false, &:read)
141
+ end
142
+
143
+ # EC2 Metadata server returns everything in one call. Store it after the
144
+ # first fetch to avoid making multiple calls.
145
+ def ec2_metadata(platform)
146
+ raise "Called ec2_metadata with platform=#{platform}" unless
147
+ platform == Platform::EC2
148
+
149
+ unless @ec2_metadata
150
+ # See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html
151
+ URI.open("http://#{METADATA_SERVICE_ADDR}/latest/dynamic/instance-identity/document", proxy: false) do |f|
152
+ contents = f.read
153
+ @ec2_metadata = JSON.parse(contents)
154
+ end
155
+ end
156
+
157
+ @ec2_metadata
158
+ end
159
+
160
+ # Check required variables like @project_id, @vm_id, @vm_name and @zone.
161
+ def check_required_metadata_variables(platform, project_id, zone, vm_id)
162
+ missing = []
163
+ missing << 'project_id' unless project_id
164
+ if platform != Platform::OTHER
165
+ missing << 'zone' unless zone
166
+ missing << 'vm_id' unless vm_id
167
+ end
168
+ return if missing.empty?
169
+
170
+ raise Fluent::ConfigError,
171
+ "Unable to obtain metadata parameters: #{missing.join(' ')}"
172
+ end
173
+
174
+ # 1. Return the value if it is explicitly set in the config already.
175
+ # 2. If not, try to retrieve it by calling metadata server directly.
176
+ # 3. If still not set, try to obtain it from the credentials.
177
+ def get_project_id(platform, project_id)
178
+ project_id ||= CredentialsInfo.project_id
179
+ project_id ||= fetch_gce_metadata(platform, 'project/project-id') if
180
+ platform == Platform::GCE
181
+ project_id
182
+ end
183
+
184
+ # 1. Return the value if it is explicitly set in the config already.
185
+ # 2. If not, try to retrieve it by calling metadata servers directly.
186
+ def get_vm_id(platform, vm_id)
187
+ vm_id ||= fetch_gce_metadata(platform, 'instance/id') if
188
+ platform == Platform::GCE
189
+ vm_id ||= ec2_metadata(platform)['instanceId'] if
190
+ platform == Platform::EC2
191
+ vm_id
192
+ rescue StandardError => e
193
+ @log.error 'Failed to obtain vm_id: ', error: e
194
+ end
195
+
196
+ # 1. Return the value if it is explicitly set in the config already.
197
+ # 2. If not, try to retrieve it locally.
198
+ def get_vm_name(vm_name)
199
+ vm_name ||= Socket.gethostname
200
+ vm_name
201
+ rescue StandardError => e
202
+ @log.error 'Failed to obtain vm name: ', error: e
203
+ end
204
+
205
+ # 1. Return the value if it is explicitly set in the config already.
206
+ # 2. If not, try to retrieve it locally.
207
+ def get_location(platform, zone, use_aws_availability_zone)
208
+ # Response format: "projects/<number>/zones/<zone>"
209
+ if platform == Platform::GCE
210
+ zone ||= fetch_gce_metadata(platform,
211
+ 'instance/zone').rpartition('/')[2]
212
+ end
213
+ aws_location_key = if use_aws_availability_zone
214
+ 'availabilityZone'
215
+ else
216
+ 'region'
217
+ end
218
+ zone ||= "aws:#{ec2_metadata(platform)[aws_location_key]}" if
219
+ platform == Platform::EC2 &&
220
+ ec2_metadata(platform).key?(aws_location_key)
221
+ zone
222
+ rescue StandardError => e
223
+ @log.error 'Failed to obtain location: ', error: e
224
+ end
225
+
226
+ # Create a monitored resource from type and labels.
227
+ def create_monitored_resource(type, labels)
228
+ Google::Apis::LoggingV2::MonitoredResource.new(
229
+ type: type, labels: labels.to_h
230
+ )
231
+ end
232
+
233
+ # Retrieve monitored resource via the legacy way.
234
+ #
235
+ # Note: This is just a failover plan if we fail to get metadata from
236
+ # Metadata Agent. Thus it should be equivalent to what Metadata Agent
237
+ # returns.
238
+ def determine_agent_level_monitored_resource_via_legacy(
239
+ platform, subservice_name, detect_subservice, vm_id, zone
240
+ )
241
+ resource_type = determine_agent_level_monitored_resource_type(
242
+ platform, subservice_name, detect_subservice
243
+ )
244
+ create_monitored_resource(
245
+ resource_type,
246
+ determine_agent_level_monitored_resource_labels(
247
+ platform, resource_type, vm_id, zone
248
+ )
249
+ )
250
+ end
251
+
252
+ # Determine agent level monitored resource type.
253
+ def determine_agent_level_monitored_resource_type(
254
+ platform, subservice_name, detect_subservice
255
+ )
256
+ case platform
257
+ when Platform::OTHER
258
+ # Unknown platform will be defaulted to GCE instance.
259
+ COMPUTE_CONSTANTS[:resource_type]
260
+
261
+ when Platform::EC2
262
+ EC2_CONSTANTS[:resource_type]
263
+
264
+ when Platform::GCE
265
+ # Resource types determined by subservice_name config.
266
+ return SUBSERVICE_MAP[subservice_name] if subservice_name
267
+
268
+ # Resource types determined by detect_subservice config.
269
+ if detect_subservice
270
+ begin
271
+ attributes = fetch_gce_metadata(platform,
272
+ 'instance/attributes/').split.to_set
273
+ SUBSERVICE_METADATA_ATTRIBUTES.each do |resource_type, expected|
274
+ return resource_type if attributes.superset?(expected)
275
+ end
276
+ rescue StandardError => e
277
+ @log.error 'Failed to detect subservice: ', error: e
278
+ end
279
+ end
280
+
281
+ # GCE instance.
282
+ COMPUTE_CONSTANTS[:resource_type]
283
+ end
284
+ end
285
+
286
+ # Determine agent level monitored resource labels based on the resource
287
+ # type. Each resource type has its own labels that need to be filled in.
288
+ def determine_agent_level_monitored_resource_labels(
289
+ platform, type, vm_id, zone
290
+ )
291
+ case type
292
+ # GAE app.
293
+ when APPENGINE_CONSTANTS[:resource_type]
294
+ return {
295
+ 'module_id' =>
296
+ fetch_gce_metadata(platform,
297
+ 'instance/attributes/gae_backend_name'),
298
+ 'version_id' =>
299
+ fetch_gce_metadata(platform,
300
+ 'instance/attributes/gae_backend_version')
301
+ }
302
+
303
+ # GCE.
304
+ when COMPUTE_CONSTANTS[:resource_type]
305
+ raise "Cannot construct a #{type} resource without vm_id and zone" \
306
+ unless vm_id && zone
307
+
308
+ return {
309
+ 'instance_id' => vm_id,
310
+ 'zone' => zone
311
+ }
312
+
313
+ # GKE container.
314
+ when GKE_CONSTANTS[:resource_type]
315
+ raise "Cannot construct a #{type} resource without vm_id and zone" \
316
+ unless vm_id && zone
317
+
318
+ return {
319
+ 'instance_id' => vm_id,
320
+ 'zone' => zone,
321
+ 'cluster_name' =>
322
+ fetch_gce_metadata(platform, 'instance/attributes/cluster-name')
323
+ }
324
+
325
+ # Cloud Dataproc.
326
+ when DATAPROC_CONSTANTS[:resource_type]
327
+ return {
328
+ 'cluster_uuid' =>
329
+ fetch_gce_metadata(platform,
330
+ 'instance/attributes/dataproc-cluster-uuid'),
331
+ 'cluster_name' =>
332
+ fetch_gce_metadata(platform,
333
+ 'instance/attributes/dataproc-cluster-name'),
334
+ 'region' =>
335
+ fetch_gce_metadata(platform,
336
+ 'instance/attributes/dataproc-region')
337
+ }
338
+
339
+ # EC2.
340
+ when EC2_CONSTANTS[:resource_type]
341
+ raise "Cannot construct a #{type} resource without vm_id and zone" \
342
+ unless vm_id && zone
343
+
344
+ labels = {
345
+ 'instance_id' => vm_id,
346
+ 'region' => zone
347
+ }
348
+ labels['aws_account'] = ec2_metadata(platform)['accountId'] if
349
+ ec2_metadata(platform).key?('accountId')
350
+ return labels
351
+ end
352
+
353
+ {}
354
+ rescue StandardError => e
355
+ if [Platform::GCE, Platform::EC2].include?(platform)
356
+ @log.error "Failed to set monitored resource labels for #{type}: ",
357
+ error: e
358
+ end
359
+ {}
360
+ end
361
+
362
+ # TODO: This functionality should eventually be available in another
363
+ # library, but implement it ourselves for now.
364
+ module CredentialsInfo
365
+ # Determine the project ID from the credentials, if possible.
366
+ # Returns the project ID (as a string) on success, or nil on failure.
367
+ def self.project_id
368
+ creds = Google::Auth.get_application_default(LOGGING_SCOPE)
369
+ return creds.project_id if creds.respond_to?(:project_id) && creds.project_id
370
+
371
+ if creds.issuer
372
+ id = extract_project_id(creds.issuer)
373
+ return id unless id.nil?
374
+ end
375
+ if creds.client_id
376
+ id = extract_project_id(creds.client_id)
377
+ return id unless id.nil?
378
+ end
379
+ nil
380
+ end
381
+
382
+ # Extracts the project id (either name or number) from str and returns
383
+ # it (as a string) on success, or nil on failure.
384
+ #
385
+ # Recognizes IAM format (account@project-name.iam.gserviceaccount.com)
386
+ # as well as the legacy format with a project number at the front of the
387
+ # string, terminated by a dash (-) which is not part of the ID, i.e.:
388
+ # <PROJECT_ID>-<OTHER_PARTS>.apps.googleusercontent.com
389
+ def self.extract_project_id(str)
390
+ [/^.*@(?<project_id>.+)\.iam\.gserviceaccount\.com/,
391
+ /^(?<project_id>\d+)-/].each do |exp|
392
+ match_data = exp.match(str)
393
+ return match_data['project_id'] unless match_data.nil?
394
+ end
395
+ nil
396
+ end
397
+ end
398
+ end
399
+ end
@@ -0,0 +1,86 @@
1
+ # Copyright 2018 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Fluent
16
+ # Fluentd filter plugin for adding insertIds to guarantee log entry order
17
+ # and uniqueness.
18
+ # Sample log entries enriched by this plugin:
19
+ # {
20
+ # "timestamp": "2017-08-22 13:35:28",
21
+ # "message": "1",
22
+ # "logging.googleapis.com/insertId": "aye7eakuf23h41aef0"
23
+ # }
24
+ # {
25
+ # "timestamp": "2017-08-22 13:35:28",
26
+ # "message": "2",
27
+ # "logging.googleapis.com/insertId": "aye7eakuf23h41aef1"
28
+ # }
29
+ # {
30
+ # "timestamp": "2017-08-22 13:35:28",
31
+ # "message": "3",
32
+ # "logging.googleapis.com/insertId": "aye7eakuf23h41aef2"
33
+ # }
34
+ class AddInsertIdsFilter < Filter
35
+ Fluent::Plugin.register_filter('add_insert_ids', self)
36
+
37
+ # Constants for configuration.
38
+ module ConfigConstants
39
+ # The default field name of insertIds in the log entry.
40
+ DEFAULT_INSERT_ID_KEY = 'logging.googleapis.com/insertId'.freeze
41
+ # The character size of the insertIds. This matches the setup in the
42
+ # Stackdriver Logging backend.
43
+ INSERT_ID_SIZE = 17
44
+ # The characters that are allowed in the insertIds. This matches the
45
+ # allowed collection by the Stackdriver Logging Backend.
46
+ ALLOWED_CHARS = (Array(0..9) + Array('a'..'z')).freeze
47
+ end
48
+
49
+ include self::ConfigConstants
50
+
51
+ desc 'The field name for insertIds in the log record.'
52
+ config_param :insert_id_key, :string, default: DEFAULT_INSERT_ID_KEY
53
+
54
+ def start
55
+ super
56
+ @log = $log # rubocop:disable Style/GlobalVars
57
+
58
+ # Initialize the insertID.
59
+ @log.info "Started the add_insert_ids plugin with #{@insert_id_key}" \
60
+ ' as the insert ID key.'
61
+ @insert_id = generate_initial_insert_id
62
+ @log.info "Initialized the insert ID key to #{@insert_id}."
63
+ end
64
+
65
+ # rubocop:disable Lint/UnusedMethodArgument
66
+ def filter(tag, time, record)
67
+ # Only generate and add an insertId field if the record is a hash and
68
+ # the insert ID field is not already set (or set to an empty string).
69
+ record[@insert_id_key] = increment_insert_id if record.is_a?(Hash) && record[@insert_id_key].to_s.empty?
70
+ record
71
+ end
72
+ # rubocop:enable Lint/UnusedMethodArgument
73
+
74
+ private
75
+
76
+ # Generate a random string as the initial insertId.
77
+ def generate_initial_insert_id
78
+ Array.new(INSERT_ID_SIZE) { ALLOWED_CHARS.sample }.join
79
+ end
80
+
81
+ # Increment the insertId and return the new value.
82
+ def increment_insert_id
83
+ @insert_id = @insert_id.next
84
+ end
85
+ end
86
+ end