fluent-plugin-vadimberezniker-gcp 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,2209 @@
1
+ # Copyright 2014 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ require 'cgi'
15
+ require 'erb'
16
+ require 'grpc'
17
+ require 'json'
18
+ require 'open-uri'
19
+ require 'socket'
20
+ require 'time'
21
+ require 'yaml'
22
+ require 'google/apis'
23
+ require 'google/cloud/errors'
24
+ require 'google/apis/logging_v2'
25
+ require 'google/cloud/logging/v2'
26
+ require 'google/logging/v2/logging_pb'
27
+ require 'google/logging/v2/logging_services_pb'
28
+ require 'google/logging/v2/log_entry_pb'
29
+ require 'googleauth'
30
+
31
+ require_relative 'common'
32
+ require_relative 'monitoring'
33
+ require_relative 'statusz'
34
+
35
+ module Google
36
+ module Protobuf
37
+ # Alias the has_key? method to have the same interface as a regular map.
38
+ class Map
39
+ alias key? has_key?
40
+ alias to_hash to_h
41
+ end
42
+ end
43
+ end
44
+
45
+ module Google
46
+ module Auth
47
+ # Disable gcloud lookup in googleauth to avoid picking up its project id.
48
+ module CredentialsLoader
49
+ # Set $VERBOSE to nil to mute the "already initialized constant" warnings.
50
+ warn_level = $VERBOSE
51
+ begin
52
+ $VERBOSE = nil
53
+ # These constants are used to invoke gcloud on Linux and Windows,
54
+ # respectively. Ideally, we would have overridden
55
+ # CredentialsLoader.load_gcloud_project_id, but we cannot catch it
56
+ # before it's invoked via "require 'googleauth'". So we override the
57
+ # constants instead.
58
+ GCLOUD_POSIX_COMMAND = '/bin/true'.freeze
59
+ GCLOUD_WINDOWS_COMMAND = 'cd .'.freeze
60
+ GCLOUD_CONFIG_COMMAND = ''.freeze
61
+ ensure
62
+ $VERBOSE = warn_level
63
+ end
64
+ end
65
+ end
66
+ end
67
+
68
+ # FluentLogger exposes the Fluent logger to the gRPC library.
69
+ module FluentLogger
70
+ def logger
71
+ $log # rubocop:disable Style/GlobalVars
72
+ end
73
+ end
74
+
75
+ # Define a gRPC module-level logger method before grpc/logconfig.rb loads.
76
+ module GRPC
77
+ extend FluentLogger
78
+ end
79
+
80
+ # Disable the nurse/strptime gem used by FluentD's TimeParser class in
81
+ # lib/fluent/time.rb. We found this gem to be slower than the builtin Ruby
82
+ # parser in recent versions of Ruby. Fortunately FluentD will fall back to the
83
+ # builtin parser.
84
+ require 'strptime'
85
+ # Dummy Strptime class.
86
+ class Strptime
87
+ def self.new(_)
88
+ # empty
89
+ end
90
+ end
91
+
92
+ module Fluent
93
+ # fluentd output plugin for the Stackdriver Logging API
94
+ class GoogleCloudOutput < BufferedOutput
95
+ # Constants for configuration.
96
+ module ConfigConstants
97
+ # Default values for JSON payload keys to set the "httpRequest",
98
+ # "operation", "sourceLocation", "trace" fields in the LogEntry.
99
+ DEFAULT_HTTP_REQUEST_KEY = 'httpRequest'.freeze
100
+ DEFAULT_INSERT_ID_KEY = 'logging.googleapis.com/insertId'.freeze
101
+ DEFAULT_LABELS_KEY = 'logging.googleapis.com/labels'.freeze
102
+ DEFAULT_OPERATION_KEY = 'logging.googleapis.com/operation'.freeze
103
+ DEFAULT_SOURCE_LOCATION_KEY =
104
+ 'logging.googleapis.com/sourceLocation'.freeze
105
+ DEFAULT_SPAN_ID_KEY = 'logging.googleapis.com/spanId'.freeze
106
+ DEFAULT_TRACE_KEY = 'logging.googleapis.com/trace'.freeze
107
+ DEFAULT_TRACE_SAMPLED_KEY = 'logging.googleapis.com/trace_sampled'.freeze
108
+ end
109
+
110
+ # Internal constants.
111
+ module InternalConstants
112
+ CREDENTIALS_PATH_ENV_VAR = 'GOOGLE_APPLICATION_CREDENTIALS'.freeze
113
+ DEFAULT_LOGGING_API_URL = 'https://logging.googleapis.com'.freeze
114
+
115
+ # The label name of local_resource_id in the json payload. When a record
116
+ # has this field in the payload, we will use the value to retrieve
117
+ # monitored resource from Stackdriver Metadata agent.
118
+ LOCAL_RESOURCE_ID_KEY = 'logging.googleapis.com/local_resource_id'.freeze
119
+
120
+ # The regexp matches stackdriver trace id format: 32-byte hex string.
121
+ # The format is documented in
122
+ # https://cloud.google.com/trace/docs/reference/v2/rpc/google.devtools.cloudtrace.v1#trace
123
+ STACKDRIVER_TRACE_ID_REGEXP = Regexp.new('^\h{32}$').freeze
124
+
125
+ # Map from each field name under LogEntry to corresponding variables
126
+ # required to perform field value extraction from the log record.
127
+ LOG_ENTRY_FIELDS_MAP = {
128
+ 'http_request' => [
129
+ # The config to specify label name for field extraction from record.
130
+ '@http_request_key',
131
+ # Map from subfields' names to their types.
132
+ [
133
+ # subfield key in the payload, destination key, cast lambda (opt)
134
+ %w[cacheFillBytes cache_fill_bytes parse_int],
135
+ %w[cacheHit cache_hit parse_bool],
136
+ %w[cacheLookup cache_lookup parse_bool],
137
+ %w[cacheValidatedWithOriginServer
138
+ cache_validated_with_origin_server parse_bool],
139
+ %w[latency latency parse_latency],
140
+ %w[protocol protocol parse_string],
141
+ %w[referer referer parse_string],
142
+ %w[remoteIp remote_ip parse_string],
143
+ %w[responseSize response_size parse_int],
144
+ %w[requestMethod request_method parse_string],
145
+ %w[requestSize request_size parse_int],
146
+ %w[requestUrl request_url parse_string],
147
+ %w[serverIp server_ip parse_string],
148
+ %w[status status parse_int],
149
+ %w[userAgent user_agent parse_string]
150
+ ],
151
+ # The grpc version class name.
152
+ 'Google::Cloud::Logging::Type::HttpRequest',
153
+ # The non-grpc version class name.
154
+ 'Google::Apis::LoggingV2::HttpRequest'
155
+ ],
156
+ 'operation' => [
157
+ '@operation_key',
158
+ [
159
+ %w[id id parse_string],
160
+ %w[producer producer parse_string],
161
+ %w[first first parse_bool],
162
+ %w[last last parse_bool]
163
+ ],
164
+ 'Google::Cloud::Logging::V2::LogEntryOperation',
165
+ 'Google::Apis::LoggingV2::LogEntryOperation'
166
+ ],
167
+ 'source_location' => [
168
+ '@source_location_key',
169
+ [
170
+ %w[file file parse_string],
171
+ %w[function function parse_string],
172
+ %w[line line parse_int]
173
+ ],
174
+ 'Google::Cloud::Logging::V2::LogEntrySourceLocation',
175
+ 'Google::Apis::LoggingV2::LogEntrySourceLocation'
176
+ ]
177
+ }.freeze
178
+
179
+ # The name of the WriteLogEntriesPartialErrors field in the error details.
180
+ PARTIAL_ERROR_FIELD =
181
+ 'type.googleapis.com/google.logging.v2.WriteLogEntriesPartialErrors' \
182
+ .freeze
183
+ end
184
+
185
+ include Common::ServiceConstants
186
+ include self::ConfigConstants
187
+ include self::InternalConstants
188
+
189
+ Fluent::Plugin.register_output('google_cloud', self)
190
+
191
+ helpers :server, :timer
192
+
193
+ PLUGIN_NAME = 'Fluentd Google Cloud Logging plugin'.freeze
194
+
195
+ # Follows semver.org format.
196
+ PLUGIN_VERSION = begin
197
+ # Extract plugin version from file path.
198
+ match_data = __FILE__.match(
199
+ %r{fluent-plugin-google-cloud-(?<version>[^/]*)/}
200
+ )
201
+ if match_data
202
+ match_data['version']
203
+ else
204
+ # Extract plugin version by finding the spec this file was loaded from.
205
+ dependency = Gem::Dependency.new('fluent-plugin-google-cloud')
206
+ all_specs, = Gem::SpecFetcher.fetcher.spec_for_dependency(dependency)
207
+ matching_version, = all_specs.grep(
208
+ proc { |spec,| __FILE__.include?(spec.full_gem_path) }
209
+ ) do |spec,|
210
+ spec.version.to_s
211
+ end
212
+ # If no matching version was found, return a valid but obviously wrong
213
+ # value.
214
+ matching_version || '0.0.0-unknown'
215
+ end
216
+ end.freeze
217
+
218
+ # Disable this warning to conform to fluentd config_param conventions.
219
+ # rubocop:disable Style/HashSyntax
220
+
221
+ # Specify project/instance metadata.
222
+ #
223
+ # project_id, zone, and vm_id are required to have valid values, which
224
+ # can be obtained from the metadata service or set explicitly.
225
+ # Otherwise, the plugin will fail to initialize.
226
+ #
227
+ # Note that while 'project id' properly refers to the alphanumeric name
228
+ # of the project, the logging service will also accept the project number,
229
+ # so either one is acceptable in this context.
230
+ #
231
+ # Whether to attempt to obtain metadata from the local metadata service.
232
+ # It is safe to specify 'true' even on platforms with no metadata service.
233
+ config_param :use_metadata_service, :bool, :default => true
234
+ # A compatibility option to enable the legacy behavior of setting the AWS
235
+ # location to the availability zone rather than the region.
236
+ config_param :use_aws_availability_zone, :bool, :default => true
237
+ # These parameters override any values obtained from the metadata service.
238
+ config_param :project_id, :string, :default => nil
239
+ config_param :zone, :string, :default => nil
240
+ config_param :vm_id, :string, :default => nil
241
+ config_param :vm_name, :string, :default => nil
242
+ # Kubernetes-specific parameters, only used to override these values in
243
+ # the fallback path when the metadata agent is temporarily unavailable.
244
+ # They have to match the configuration of the metadata agent.
245
+ config_param :k8s_cluster_name, :string, :default => nil
246
+ config_param :k8s_cluster_location, :string, :default => nil
247
+
248
+ # Map keys from a JSON payload to corresponding LogEntry fields.
249
+ config_param :http_request_key, :string, :default =>
250
+ DEFAULT_HTTP_REQUEST_KEY
251
+ config_param :insert_id_key, :string, :default => DEFAULT_INSERT_ID_KEY
252
+ config_param :labels_key, :string, :default => DEFAULT_LABELS_KEY
253
+ config_param :operation_key, :string, :default => DEFAULT_OPERATION_KEY
254
+ config_param :source_location_key, :string, :default =>
255
+ DEFAULT_SOURCE_LOCATION_KEY
256
+ config_param :span_id_key, :string, :default => DEFAULT_SPAN_ID_KEY
257
+ config_param :trace_key, :string, :default => DEFAULT_TRACE_KEY
258
+ config_param :trace_sampled_key, :string, :default =>
259
+ DEFAULT_TRACE_SAMPLED_KEY
260
+
261
+ # Whether to try to detect if the record is a text log entry with JSON
262
+ # content that needs to be parsed.
263
+ config_param :detect_json, :bool, :default => false
264
+ # TODO(igorpeshansky): Add a parameter for the text field in the payload.
265
+
266
+ # Whether to try to detect if the VM is owned by a "subservice" such as App
267
+ # Engine of Kubernetes, rather than just associating the logs with the
268
+ # compute service of the platform. This currently only has any effect when
269
+ # running on GCE.
270
+ #
271
+ # The initial motivation for this is to separate out Kubernetes node
272
+ # component (Kubelet, etc.) logs from container logs.
273
+ config_param :detect_subservice, :bool, :default => true
274
+ # The subservice_name overrides the subservice detection, if provided.
275
+ config_param :subservice_name, :string, :default => nil
276
+
277
+ # Whether to reject log entries with invalid tags. If this option is set to
278
+ # false, tags will be made valid by converting any non-string tag to a
279
+ # string, and sanitizing any non-utf8 or other invalid characters.
280
+ config_param :require_valid_tags, :bool, :default => false
281
+
282
+ # The regular expression to use on Kubernetes logs to extract some basic
283
+ # information about the log source. The regexp must contain capture groups
284
+ # for pod_name, namespace_name, and container_name.
285
+ config_param :kubernetes_tag_regexp, :string, :default =>
286
+ '\.(?<pod_name>[^_]+)_(?<namespace_name>[^_]+)_(?<container_name>.+)$'
287
+
288
+ # label_map (specified as a JSON object) is an unordered set of fluent
289
+ # field names whose values are sent as labels rather than as part of the
290
+ # struct payload.
291
+ #
292
+ # Each entry in the map is a {"field_name": "label_name"} pair. When
293
+ # the "field_name" (as parsed by the input plugin) is encountered, a label
294
+ # with the corresponding "label_name" is added to the log entry. The
295
+ # value of the field is used as the value of the label.
296
+ #
297
+ # The map gives the user additional flexibility in specifying label
298
+ # names, including the ability to use characters which would not be
299
+ # legal as part of fluent field names.
300
+ #
301
+ # Example:
302
+ # label_map {
303
+ # "field_name_1": "sent_label_name_1",
304
+ # "field_name_2": "some.prefix/sent_label_name_2"
305
+ # }
306
+ config_param :label_map, :hash, :default => nil
307
+
308
+ # labels (specified as a JSON object) is a set of custom labels
309
+ # provided at configuration time. It allows users to inject extra
310
+ # environmental information into every message or to customize
311
+ # labels otherwise detected automatically.
312
+ #
313
+ # Each entry in the map is a {"label_name": "label_value"} pair.
314
+ #
315
+ # Example:
316
+ # labels {
317
+ # "label_name_1": "label_value_1",
318
+ # "label_name_2": "label_value_2"
319
+ # }
320
+ config_param :labels, :hash, :default => nil
321
+
322
+ # Whether to use gRPC instead of REST/JSON to communicate to the
323
+ # Stackdriver Logging API.
324
+ config_param :use_grpc, :bool, :default => false
325
+
326
+ # Whether to enable gRPC compression when communicating with the Stackdriver
327
+ # Logging API. Only used if 'use_grpc' is set to true.
328
+ config_param :grpc_compression_algorithm, :enum,
329
+ list: %i[none gzip],
330
+ :default => nil
331
+
332
+ # Whether valid entries should be written even if some other entries fail
333
+ # due to INVALID_ARGUMENT or PERMISSION_DENIED errors when communicating to
334
+ # the Stackdriver Logging API. This flag is no longer used, and is kept for
335
+ # backwards compatibility, partial_success is enabled for all requests.
336
+ # TODO: Breaking change. Remove this flag in Logging Agent 2.0.0 release.
337
+ config_param :partial_success, :bool,
338
+ :default => true,
339
+ :skip_accessor => true,
340
+ :deprecated => 'This feature is permanently enabled'
341
+
342
+ # Whether to allow non-UTF-8 characters in user logs. If set to true, any
343
+ # non-UTF-8 character would be replaced by the string specified by
344
+ # 'non_utf8_replacement_string'. If set to false, any non-UTF-8 character
345
+ # would trigger the plugin to error out.
346
+ config_param :coerce_to_utf8, :bool, :default => true
347
+
348
+ # If 'coerce_to_utf8' is set to true, any non-UTF-8 character would be
349
+ # replaced by the string specified here.
350
+ config_param :non_utf8_replacement_string, :string, :default => ' '
351
+
352
+ # DEPRECATED: The following parameters, if present in the config
353
+ # indicate that the plugin configuration must be updated.
354
+ config_param :auth_method, :string, :default => nil
355
+ config_param :private_key_email, :string, :default => nil
356
+ config_param :private_key_path, :string, :default => nil
357
+ config_param :private_key_passphrase, :string,
358
+ :default => nil,
359
+ :secret => true
360
+
361
+ # The URL of Stackdriver Logging API. Right now this only works with the
362
+ # gRPC path (use_grpc = true). An unsecured channel is used if the URL
363
+ # scheme is 'http' instead of 'https'. One common use case of this config is
364
+ # to provide a mocked / stubbed Logging API, e.g., http://localhost:52000.
365
+ config_param :logging_api_url, :string, :default => DEFAULT_LOGGING_API_URL
366
+
367
+ # Whether to collect metrics about the plugin usage. The mechanism for
368
+ # collecting and exposing metrics is controlled by the monitoring_type
369
+ # parameter.
370
+ config_param :enable_monitoring, :bool, :default => false
371
+
372
+ # What system to use when collecting metrics. Possible values are:
373
+ # - 'prometheus', in this case default registry in the Prometheus
374
+ # client library is used, without actually exposing the endpoint
375
+ # to serve metrics in the Prometheus format.
376
+ # - 'opencensus', in this case the OpenCensus implementation is
377
+ # used to send metrics directly to Google Cloud Monitoring.
378
+ # - any other value will result in the absence of metrics.
379
+ config_param :monitoring_type, :string,
380
+ :default => Monitoring::PrometheusMonitoringRegistry.name
381
+
382
+ # The monitored resource to use for OpenCensus metrics. Only valid
383
+ # when monitoring_type is set to 'opencensus'. This value is a hash in
384
+ # the form:
385
+ # {"type":"gce_instance","labels":{"instance_id":"aaa","zone":"bbb"} (JSON)
386
+ # or type:gce_instance,labels.instance_id:aaa,labels.zone:bbb (Hash)
387
+ config_param :metrics_resource, :hash,
388
+ :symbolize_keys => true, :default => nil
389
+
390
+ # Whether to call metadata agent to retrieve monitored resource. This flag
391
+ # is kept for backwards compatibility, and is no longer used.
392
+ # TODO: Breaking change. Remove this flag in Logging Agent 2.0.0 release.
393
+ config_param :enable_metadata_agent, :bool,
394
+ :default => false,
395
+ :skip_accessor => true,
396
+ :deprecated => 'This feature is permanently disabled'
397
+
398
+ # The URL of the Metadata Agent. This flag is kept for backwards
399
+ # compatibility, and is no longer used.
400
+ # TODO: Breaking change. Remove this flag in Logging Agent 2.0.0 release.
401
+ config_param :metadata_agent_url, :string,
402
+ :default => nil,
403
+ :skip_accessor => true,
404
+ :deprecated => 'This feature is permanently disabled'
405
+
406
+ # Whether to split log entries with different log tags into different
407
+ # requests when talking to Stackdriver Logging API.
408
+ config_param :split_logs_by_tag, :bool, :default => false
409
+
410
+ # Whether to attempt adjusting invalid log entry timestamps.
411
+ config_param :adjust_invalid_timestamps, :bool, :default => true
412
+
413
+ # Whether to autoformat value of "logging.googleapis.com/trace" to
414
+ # comply with Stackdriver Trace format
415
+ # "projects/[PROJECT-ID]/traces/[TRACE-ID]" when setting
416
+ # LogEntry.trace.
417
+ config_param :autoformat_stackdriver_trace, :bool, :default => true
418
+
419
+ # Port for web server that exposes a /statusz endpoint with
420
+ # diagnostic information in HTML format. If the value is 0,
421
+ # the server is not created.
422
+ config_param :statusz_port, :integer, :default => 0
423
+
424
+ # Override for the Google Cloud Monitoring service hostname, or
425
+ # `nil` to leave as the default.
426
+ config_param :gcm_service_address, :string, :default => nil
427
+
428
+ # rubocop:enable Style/HashSyntax
429
+
430
+ # TODO: Add a log_name config option rather than just using the tag?
431
+
432
+ # Expose attr_readers to make testing of metadata more direct than only
433
+ # testing it indirectly through metadata sent with logs.
434
+ attr_reader :resource, :common_labels, :monitoring_resource
435
+
436
+ def initialize
437
+ super
438
+ # use the global logger
439
+ @log = $log # rubocop:disable Style/GlobalVars
440
+
441
+ @failed_requests_count = nil
442
+ @successful_requests_count = nil
443
+ @dropped_entries_count = nil
444
+ @ingested_entries_count = nil
445
+ @retried_entries_count = nil
446
+
447
+ @ok_code = nil
448
+ @uptime_update_time = Time.now.to_i
449
+ end
450
+
451
+ def configure(conf)
452
+ super
453
+
454
+ # TODO(qingling128): Remove this warning after the support is added. Also
455
+ # remove the comment in the description of this configuration.
456
+ unless @logging_api_url == DEFAULT_LOGGING_API_URL || @use_grpc
457
+ @log.warn 'Detected customized logging_api_url while use_grpc is not' \
458
+ ' enabled. Customized logging_api_url for the non-gRPC path' \
459
+ ' is not supported. The logging_api_url option will be' \
460
+ ' ignored.'
461
+ end
462
+
463
+ # Alert on old authentication configuration.
464
+ unless @auth_method.nil? && @private_key_email.nil? &&
465
+ @private_key_path.nil? && @private_key_passphrase.nil?
466
+ extra = []
467
+ extra << 'auth_method' unless @auth_method.nil?
468
+ extra << 'private_key_email' unless @private_key_email.nil?
469
+ extra << 'private_key_path' unless @private_key_path.nil?
470
+ extra << 'private_key_passphrase' unless @private_key_passphrase.nil?
471
+
472
+ raise Fluent::ConfigError,
473
+ "#{PLUGIN_NAME} no longer supports auth_method.\n" \
474
+ "Please remove configuration parameters: #{extra.join(' ')}"
475
+ end
476
+
477
+ set_regexp_patterns
478
+
479
+ @utils = Common::Utils.new(@log)
480
+
481
+ @platform = @utils.detect_platform(@use_metadata_service)
482
+
483
+ # Treat an empty setting of the credentials file path environment variable
484
+ # as unset. This way the googleauth lib could fetch the credentials
485
+ # following the fallback path.
486
+ ENV.delete(CREDENTIALS_PATH_ENV_VAR) if
487
+ ENV[CREDENTIALS_PATH_ENV_VAR] == ''
488
+
489
+ # Set required variables: @project_id, @vm_id, @vm_name and @zone.
490
+ @project_id = @utils.get_project_id(@platform, @project_id)
491
+ @vm_id = @utils.get_vm_id(@platform, @vm_id)
492
+ @vm_name = @utils.get_vm_name(@vm_name)
493
+ @zone = @utils.get_location(@platform, @zone, @use_aws_availability_zone)
494
+
495
+ # All metadata parameters must now be set.
496
+ @utils.check_required_metadata_variables(
497
+ @platform, @project_id, @zone, @vm_id
498
+ )
499
+
500
+ # Retrieve monitored resource.
501
+ # Fail over to retrieve monitored resource via the legacy path if we fail
502
+ # to get it from Metadata Agent.
503
+ @resource ||= @utils.determine_agent_level_monitored_resource_via_legacy(
504
+ @platform, @subservice_name, @detect_subservice, @vm_id, @zone
505
+ )
506
+
507
+ if @metrics_resource
508
+ unless @metrics_resource[:type].is_a?(String)
509
+ raise Fluent::ConfigError,
510
+ 'metrics_resource.type must be a string:' \
511
+ " #{@metrics_resource}."
512
+ end
513
+ if @metrics_resource.key?(:labels)
514
+ unless @metrics_resource[:labels].is_a?(Hash)
515
+ raise Fluent::ConfigError,
516
+ 'metrics_resource.labels must be a hash:' \
517
+ " #{@metrics_resource}."
518
+ end
519
+ extra_keys = @metrics_resource.reject do |k, _|
520
+ %i[type labels].include?(k)
521
+ end
522
+ unless extra_keys.empty?
523
+ raise Fluent::ConfigError,
524
+ "metrics_resource has unrecognized keys: #{extra_keys.keys}."
525
+ end
526
+ else
527
+ extra_keys = @metrics_resource.reject do |k, _|
528
+ k == :type || k.to_s.start_with?('labels.')
529
+ end
530
+ unless extra_keys.empty?
531
+ raise Fluent::ConfigError,
532
+ "metrics_resource has unrecognized keys: #{extra_keys.keys}."
533
+ end
534
+ # Transform the Hash form of the metrics_resource config if necessary.
535
+ resource_type = @metrics_resource[:type]
536
+ resource_labels = @metrics_resource.each_with_object({}) \
537
+ do |(k, v), h|
538
+ h[k.to_s.sub('labels.', '')] = v if k.to_s.start_with? 'labels.'
539
+ end
540
+ @metrics_resource = { type: resource_type, labels: resource_labels }
541
+ end
542
+ end
543
+
544
+ # If monitoring is enabled, register metrics in the default registry
545
+ # and store metric objects for future use.
546
+ if @enable_monitoring
547
+ unless Monitoring::MonitoringRegistryFactory.supports_monitoring_type(
548
+ @monitoring_type
549
+ )
550
+ @log.warn "monitoring_type '#{@monitoring_type}' is unknown; "\
551
+ 'there will be no metrics'
552
+ end
553
+ @monitoring_resource = if @metrics_resource
554
+ @utils.create_monitored_resource(
555
+ @metrics_resource[:type], @metrics_resource[:labels]
556
+ )
557
+ else
558
+ @resource
559
+ end
560
+ @registry = Monitoring::MonitoringRegistryFactory
561
+ .create(@monitoring_type, @project_id,
562
+ @monitoring_resource, @gcm_service_address)
563
+ # Export metrics every 60 seconds.
564
+ timer_execute(:export_metrics, 60) { @registry.export }
565
+ # Uptime should be a gauge, but the metric definition is a counter and
566
+ # we can't change it.
567
+ @uptime_metric = @registry.counter(
568
+ :uptime, [:version], 'Uptime of Logging agent',
569
+ 'agent.googleapis.com/agent', 'CUMULATIVE'
570
+ )
571
+ update_uptime
572
+ timer_execute(:update_uptime, 1) { update_uptime }
573
+ @successful_requests_count = @registry.counter(
574
+ :stackdriver_successful_requests_count,
575
+ %i[grpc code],
576
+ 'A number of successful requests to the Stackdriver Logging API',
577
+ 'agent.googleapis.com/agent', 'CUMULATIVE'
578
+ )
579
+ @failed_requests_count = @registry.counter(
580
+ :stackdriver_failed_requests_count,
581
+ %i[grpc code],
582
+ 'A number of failed requests to the Stackdriver Logging '\
583
+ 'API, broken down by the error code',
584
+ 'agent.googleapis.com/agent', 'CUMULATIVE'
585
+ )
586
+ @ingested_entries_count = @registry.counter(
587
+ :stackdriver_ingested_entries_count,
588
+ %i[grpc code],
589
+ 'A number of log entries ingested by Stackdriver Logging',
590
+ 'agent.googleapis.com/agent', 'CUMULATIVE'
591
+ )
592
+ @dropped_entries_count = @registry.counter(
593
+ :stackdriver_dropped_entries_count,
594
+ %i[grpc code],
595
+ 'A number of log entries dropped by the Stackdriver output plugin',
596
+ 'agent.googleapis.com/agent', 'CUMULATIVE'
597
+ )
598
+ @retried_entries_count = @registry.counter(
599
+ :stackdriver_retried_entries_count,
600
+ %i[grpc code],
601
+ 'The number of log entries that failed to be ingested by '\
602
+ 'the Stackdriver output plugin due to a transient error '\
603
+ 'and were retried',
604
+ 'agent.googleapis.com/agent', 'CUMULATIVE'
605
+ )
606
+ @ok_code = @use_grpc ? GRPC::Core::StatusCodes::OK : 200
607
+ end
608
+
609
+ # Set regexp that we should match tags against later on. Using a list
610
+ # instead of a map to ensure order.
611
+ @tag_regexp_list = []
612
+ if @resource.type == GKE_CONSTANTS[:resource_type]
613
+ @tag_regexp_list << [
614
+ GKE_CONSTANTS[:resource_type], @compiled_kubernetes_tag_regexp
615
+ ]
616
+ end
617
+
618
+ # Determine the common labels that should be added to all log entries
619
+ # processed by this logging agent.
620
+ @common_labels = determine_agent_level_common_labels(@resource)
621
+
622
+ # The resource and labels are now set up; ensure they can't be modified
623
+ # without first duping them.
624
+ @resource.freeze
625
+ @resource.labels.freeze
626
+ @common_labels.freeze
627
+
628
+ if @use_grpc
629
+ @construct_log_entry = method(:construct_log_entry_in_grpc_format)
630
+ @write_request = method(:write_request_via_grpc)
631
+ else
632
+ @construct_log_entry = method(:construct_log_entry_in_rest_format)
633
+ @write_request = method(:write_request_via_rest)
634
+ end
635
+
636
+ return unless [Common::Platform::GCE, Common::Platform::EC2].include?(@platform)
637
+
638
+ # Log an informational message containing the Logs viewer URL
639
+ @log.info 'Logs viewer address: https://console.cloud.google.com/logs/',
640
+ "viewer?project=#{@project_id}&resource=#{@resource.type}/",
641
+ "instance_id/#{@vm_id}"
642
+ end
643
+
644
+ def start
645
+ super
646
+ init_api_client
647
+ @successful_call = false
648
+ @timenanos_warning = false
649
+
650
+ return unless @statusz_port.positive?
651
+
652
+ @log.info "Starting statusz server on port #{@statusz_port}"
653
+ server_create(:out_google_cloud_statusz,
654
+ @statusz_port,
655
+ bind: '127.0.0.1') do |data, conn|
656
+ if data.split(' ')[1] == '/statusz'
657
+ write_html_response(data, conn, 200, Statusz.response(self))
658
+ else
659
+ write_html_response(data, conn, 404, "Not found\n")
660
+ end
661
+ end
662
+ end
663
+
664
+ def shutdown
665
+ super
666
+ # Export metrics on shutdown. This is a best-effort attempt, and it might
667
+ # fail, for instance if there was a recent write to the same time series.
668
+ @registry&.export
669
+ end
670
+
671
+ def write(chunk)
672
+ grouped_entries = group_log_entries_by_tag_and_local_resource_id(chunk)
673
+
674
+ requests_to_send = []
675
+ grouped_entries.each do |(tag, local_resource_id), arr|
676
+ entries = []
677
+ group_level_resource, group_level_common_labels =
678
+ determine_group_level_monitored_resource_and_labels(
679
+ tag, local_resource_id
680
+ )
681
+
682
+ arr.each do |time, record|
683
+ entry_level_resource, entry_level_common_labels =
684
+ determine_entry_level_monitored_resource_and_labels(
685
+ group_level_resource, group_level_common_labels, record
686
+ )
687
+
688
+ is_json = false
689
+ if @detect_json
690
+ # Save the following fields if available, then clear them out to
691
+ # allow for determining whether we should parse the log or message
692
+ # field.
693
+ # This list should be in sync with
694
+ # https://cloud.google.com/logging/docs/agent/configuration#special-fields.
695
+ preserved_keys = [
696
+ 'time',
697
+ 'timeNanos',
698
+ 'timestamp',
699
+ 'timestampNanos',
700
+ 'timestampSeconds',
701
+ 'severity',
702
+ @http_request_key,
703
+ @insert_id_key,
704
+ @labels_key,
705
+ @operation_key,
706
+ @source_location_key,
707
+ @span_id_key,
708
+ @trace_key,
709
+ @trace_sampled_key
710
+ ]
711
+
712
+ # If the log is json, we want to export it as a structured log
713
+ # unless there is additional metadata that would be lost.
714
+ record_json = nil
715
+ if (record.keys - preserved_keys).length == 1
716
+ %w[log message msg].each do |field|
717
+ record_json = parse_json_or_nil(record[field]) if record.key?(field)
718
+ end
719
+ end
720
+ unless record_json.nil?
721
+ # Propagate these if necessary. Note that we don't want to
722
+ # override these keys in the JSON we've just parsed.
723
+ preserved_keys.each do |key|
724
+ record_json[key] ||= record[key] if
725
+ record.key?(key) && !record_json.key?(key)
726
+ end
727
+
728
+ record = record_json
729
+ is_json = true
730
+ end
731
+ end
732
+
733
+ ts_secs, ts_nanos, timestamp = compute_timestamp(record, time)
734
+ ts_secs, ts_nanos = adjust_timestamp_if_invalid(timestamp, Time.now) \
735
+ if @adjust_invalid_timestamps && timestamp
736
+
737
+ severity = compute_severity(
738
+ entry_level_resource.type, record, entry_level_common_labels
739
+ )
740
+
741
+ dynamic_labels_from_payload = parse_labels(record)
742
+
743
+ if dynamic_labels_from_payload
744
+ entry_level_common_labels.merge!(
745
+ dynamic_labels_from_payload
746
+ )
747
+ end
748
+
749
+ entry = @construct_log_entry.call(entry_level_common_labels,
750
+ entry_level_resource,
751
+ severity,
752
+ ts_secs,
753
+ ts_nanos)
754
+
755
+ insert_id = record.delete(@insert_id_key)
756
+ entry.insert_id = insert_id if insert_id
757
+ span_id = record.delete(@span_id_key)
758
+ entry.span_id = span_id if span_id
759
+ trace = record.delete(@trace_key)
760
+ entry.trace = compute_trace(trace) if trace
761
+ trace_sampled = record.delete(@trace_sampled_key)
762
+ entry.trace_sampled = parse_bool(trace_sampled) unless
763
+ trace_sampled.nil?
764
+
765
+ set_log_entry_fields(record, entry)
766
+ set_payload(entry_level_resource.type, record, entry, is_json)
767
+
768
+ entries.push(entry)
769
+ end
770
+ # Don't send an empty request if we rejected all the entries.
771
+ next if entries.empty?
772
+
773
+ log_name = "projects/#{@project_id}/logs/#{log_name(
774
+ tag, group_level_resource
775
+ )}"
776
+
777
+ requests_to_send << {
778
+ entries: entries,
779
+ log_name: log_name,
780
+ resource: group_level_resource,
781
+ labels: group_level_common_labels
782
+ }
783
+ end
784
+
785
+ if @split_logs_by_tag
786
+ requests_to_send.each do |request|
787
+ @write_request.call(**request)
788
+ end
789
+ else
790
+ # Combine all requests into one. The request level "log_name" will be
791
+ # ported to the entry level. The request level "resource" and "labels"
792
+ # are ignored as they should have been folded into the entry level
793
+ # "resource" and "labels" already anyway.
794
+ combined_entries = []
795
+ requests_to_send.each do |request|
796
+ request[:entries].each do |entry|
797
+ # Modify entries in-place as they are not needed later on.
798
+ entry.log_name = request[:log_name]
799
+ end
800
+ combined_entries.concat(request[:entries])
801
+ end
802
+ @write_request.call(entries: combined_entries) unless
803
+ combined_entries.empty?
804
+ end
805
+ end
806
+
807
+ def multi_workers_ready?
808
+ true
809
+ end
810
+
811
+ def self.version_string
812
+ @version_string ||= "google-fluentd/#{PLUGIN_VERSION}"
813
+ end
814
+
815
+ def update_uptime
816
+ now = Time.now.to_i
817
+ @uptime_metric.increment(
818
+ by: now - @uptime_update_time,
819
+ labels: { version: Fluent::GoogleCloudOutput.version_string }
820
+ )
821
+ @uptime_update_time = now
822
+ end
823
+
824
+ private
825
+
826
+ def write_html_response(data, conn, code, response)
827
+ @log.info "#{conn.remote_host} - - " \
828
+ "#{Time.now.strftime('%d/%b/%Y:%H:%M:%S %z')} " \
829
+ "\"#{data.lines.first.strip}\" #{code} #{response.bytesize}"
830
+ conn.write "HTTP/1.1 #{code}\r\n"
831
+ conn.write "Content-Type: text/html\r\n"
832
+ conn.write "Content-Length: #{response.bytesize}\r\n"
833
+ conn.write "\r\n"
834
+ conn.write response
835
+ end
836
+
837
+ def compute_trace(trace)
838
+ return trace unless @autoformat_stackdriver_trace &&
839
+ STACKDRIVER_TRACE_ID_REGEXP.match(trace)
840
+
841
+ "projects/#{@project_id}/traces/#{trace}"
842
+ end
843
+
844
+ def construct_log_entry_in_grpc_format(labels,
845
+ resource,
846
+ severity,
847
+ ts_secs,
848
+ ts_nanos)
849
+ entry = Google::Cloud::Logging::V2::LogEntry.new(
850
+ labels: labels,
851
+ resource: Google::Api::MonitoredResource.new(
852
+ type: resource.type,
853
+ labels: resource.labels.to_h
854
+ ),
855
+ severity: grpc_severity(severity)
856
+ )
857
+ # If "seconds" is null or not an integer, we will omit the timestamp
858
+ # field and defer the decision on how to handle it to the downstream
859
+ # Logging API. If "nanos" is null or not an integer, it will be set
860
+ # to 0.
861
+ if ts_secs.is_a?(Integer)
862
+ ts_nanos = 0 unless ts_nanos.is_a?(Integer)
863
+ entry.timestamp = Google::Protobuf::Timestamp.new(
864
+ seconds: ts_secs,
865
+ nanos: ts_nanos
866
+ )
867
+ end
868
+ entry
869
+ end
870
+
871
+ def construct_log_entry_in_rest_format(labels,
872
+ resource,
873
+ severity,
874
+ ts_secs,
875
+ ts_nanos)
876
+ # Remove the labels if we didn't populate them with anything.
877
+ resource.labels = nil if resource.labels.empty?
878
+ Google::Apis::LoggingV2::LogEntry.new(
879
+ labels: labels,
880
+ resource: resource,
881
+ severity: severity,
882
+ timestamp: {
883
+ seconds: ts_secs,
884
+ nanos: ts_nanos
885
+ }
886
+ )
887
+ end
888
+
889
+ def write_request_via_grpc(entries:,
890
+ log_name: '',
891
+ resource: nil,
892
+ labels: {})
893
+ client = api_client
894
+ entries_count = entries.length
895
+ client.write_log_entries(
896
+ entries: entries,
897
+ log_name: log_name,
898
+ # Leave resource nil if it's nil.
899
+ resource: if resource
900
+ Google::Api::MonitoredResource.new(
901
+ type: resource.type,
902
+ labels: resource.labels.to_h
903
+ )
904
+ end,
905
+ labels: labels.map do |k, v|
906
+ [k.encode('utf-8'), convert_to_utf8(v)]
907
+ end.to_h,
908
+ partial_success: true
909
+ )
910
+ increment_successful_requests_count
911
+ increment_ingested_entries_count(entries_count)
912
+
913
+ # Let the user explicitly know when the first call succeeded, to
914
+ # aid with verification and troubleshooting.
915
+ unless @successful_call
916
+ @successful_call = true
917
+ @log.info 'Successfully sent gRPC to Stackdriver Logging API.'
918
+ end
919
+ rescue Google::Cloud::Error => e
920
+ # GRPC::BadStatus is wrapped in error.cause.
921
+ error = e.cause
922
+
923
+ # See the mapping between HTTP status and gRPC status code at:
924
+ # https://github.com/grpc/grpc/blob/master/src/core/lib/transport/status_conversion.cc
925
+ case error
926
+ # Server error, so retry via re-raising the error.
927
+ when \
928
+ # HTTP status 500 (Internal Server Error).
929
+ GRPC::Internal,
930
+ # HTTP status 501 (Not Implemented).
931
+ GRPC::Unimplemented,
932
+ # HTTP status 503 (Service Unavailable).
933
+ GRPC::Unavailable,
934
+ # HTTP status 504 (Gateway Timeout).
935
+ GRPC::DeadlineExceeded
936
+ increment_retried_entries_count(entries_count, error.code)
937
+ @log.debug "Retrying #{entries_count} log message(s) later.",
938
+ error: error.to_s, error_code: error.code.to_s
939
+ raise error
940
+
941
+ # Most client errors indicate a problem with the request itself and
942
+ # should not be retried.
943
+ when \
944
+ # HTTP status 401 (Unauthorized).
945
+ # These are usually solved via a `gcloud auth` call, or by modifying
946
+ # the permissions on the Google Cloud project.
947
+ GRPC::Unauthenticated,
948
+ # HTTP status 404 (Not Found).
949
+ GRPC::NotFound,
950
+ # HTTP status 409 (Conflict).
951
+ GRPC::Aborted,
952
+ # HTTP status 412 (Precondition Failed).
953
+ GRPC::FailedPrecondition,
954
+ # HTTP status 429 (Too Many Requests).
955
+ GRPC::ResourceExhausted,
956
+ # HTTP status 499 (Client Closed Request).
957
+ GRPC::Cancelled,
958
+ # the remaining http codes in both 4xx and 5xx category.
959
+ # It's debatable whether to retry or drop these log entries.
960
+ # This decision is made to avoid retrying forever due to
961
+ # client errors.
962
+ GRPC::Unknown
963
+ increment_failed_requests_count(error.code)
964
+ increment_dropped_entries_count(entries_count, error.code)
965
+ @log.warn "Dropping #{entries_count} log message(s)",
966
+ error: error.to_s, error_code: error.code.to_s
967
+
968
+ # As partial_success is enabled, valid entries should have been
969
+ # written even if some other entries fail due to InvalidArgument or
970
+ # PermissionDenied errors. Only invalid entries will be dropped.
971
+ when \
972
+ # HTTP status 400 (Bad Request).
973
+ GRPC::InvalidArgument,
974
+ # HTTP status 403 (Forbidden).
975
+ GRPC::PermissionDenied
976
+ error_details_map = construct_error_details_map_grpc(e)
977
+ if error_details_map.empty?
978
+ increment_failed_requests_count(error.code)
979
+ increment_dropped_entries_count(entries_count, error.code)
980
+ @log.warn "Dropping #{entries_count} log message(s)",
981
+ error: error.to_s, error_code: error.code.to_s
982
+ else
983
+ error_details_map.each do |(error_code, error_message), indexes|
984
+ partial_errors_count = indexes.length
985
+ increment_dropped_entries_count(partial_errors_count,
986
+ error_code)
987
+ entries_count -= partial_errors_count
988
+ @log.warn "Dropping #{partial_errors_count} log message(s)",
989
+ error: error_message, error_code: error_code.to_s
990
+ end
991
+ # Consider partially successful requests successful.
992
+ increment_successful_requests_count
993
+ increment_ingested_entries_count(entries_count)
994
+ end
995
+
996
+ else
997
+ # Assume it's a problem with the request itself and don't retry.
998
+ error_code = if error.respond_to?(:code)
999
+ error.code
1000
+ else
1001
+ GRPC::Core::StatusCodes::UNKNOWN
1002
+ end
1003
+ increment_failed_requests_count(error_code)
1004
+ increment_dropped_entries_count(entries_count, error_code)
1005
+ @log.error "Unknown response code #{error_code} from the server," \
1006
+ " dropping #{entries_count} log message(s)",
1007
+ error: error.to_s, error_code: error_code.to_s
1008
+ end
1009
+
1010
+ # Got an unexpected error (not Google::Cloud::Error) from the
1011
+ # google-cloud-logging lib.
1012
+ rescue StandardError => e
1013
+ increment_failed_requests_count(GRPC::Core::StatusCodes::UNKNOWN)
1014
+ increment_dropped_entries_count(entries_count,
1015
+ GRPC::Core::StatusCodes::UNKNOWN)
1016
+ @log.error "Unexpected error type #{e.class.name} from the client" \
1017
+ " library, dropping #{entries_count} log message(s)",
1018
+ error: e.to_s
1019
+ end
1020
+
1021
+ def write_request_via_rest(entries:,
1022
+ log_name: '',
1023
+ resource: nil,
1024
+ labels: {})
1025
+ client = api_client
1026
+ entries_count = entries.length
1027
+ client.write_entry_log_entries(
1028
+ Google::Apis::LoggingV2::WriteLogEntriesRequest.new(
1029
+ entries: entries,
1030
+ log_name: log_name,
1031
+ resource: resource,
1032
+ labels: labels,
1033
+ partial_success: true
1034
+ ),
1035
+ options: { api_format_version: '2' }
1036
+ )
1037
+ increment_successful_requests_count
1038
+ increment_ingested_entries_count(entries_count)
1039
+
1040
+ # Let the user explicitly know when the first call succeeded, to aid
1041
+ # with verification and troubleshooting.
1042
+ unless @successful_call
1043
+ @successful_call = true
1044
+ @log.info 'Successfully sent to Stackdriver Logging API.'
1045
+ end
1046
+ rescue Google::Apis::ServerError => e
1047
+ # 5xx server errors. Retry via re-raising the error.
1048
+ increment_retried_entries_count(entries_count, e.status_code)
1049
+ @log.debug "Retrying #{entries_count} log message(s) later.",
1050
+ error: e.to_s, error_code: e.status_code.to_s
1051
+ raise e
1052
+ rescue Google::Apis::AuthorizationError => e
1053
+ # 401 authorization error.
1054
+ # These are usually solved via a `gcloud auth` call, or by modifying
1055
+ # the permissions on the Google Cloud project.
1056
+ increment_failed_requests_count(e.status_code)
1057
+ increment_dropped_entries_count(entries_count, e.status_code)
1058
+ @log.warn "Dropping #{entries_count} log message(s)",
1059
+ error: e.to_s, error_code: e.status_code.to_s
1060
+ rescue Google::Apis::ClientError => e
1061
+ # 4xx client errors. Most client errors indicate a problem with the
1062
+ # request itself and should not be retried.
1063
+ error_details_map = construct_error_details_map(e)
1064
+ if error_details_map.empty?
1065
+ increment_failed_requests_count(e.status_code)
1066
+ increment_dropped_entries_count(entries_count, e.status_code)
1067
+ @log.warn "Dropping #{entries_count} log message(s)",
1068
+ error: e.to_s, error_code: e.status_code.to_s
1069
+ else
1070
+ error_details_map.each do |(error_code, error_message), indexes|
1071
+ partial_errors_count = indexes.length
1072
+ increment_dropped_entries_count(partial_errors_count, error_code)
1073
+ entries_count -= partial_errors_count
1074
+ @log.warn "Dropping #{partial_errors_count} log message(s)",
1075
+ error: error_message,
1076
+ error_code: "google.rpc.Code[#{error_code}]"
1077
+ end
1078
+ # Consider partially successful requests successful.
1079
+ increment_successful_requests_count
1080
+ increment_ingested_entries_count(entries_count)
1081
+ end
1082
+ end
1083
+
1084
+ def parse_json_or_nil(input)
1085
+ return nil unless input.is_a?(String)
1086
+
1087
+ input.each_codepoint do |c|
1088
+ if c == 123
1089
+ # left curly bracket (U+007B)
1090
+ begin
1091
+ return JSON.parse(input)
1092
+ rescue JSON::ParserError
1093
+ return nil
1094
+ end
1095
+ else
1096
+ # Break (and return nil) unless the current character is whitespace,
1097
+ # in which case we continue to look for a left curly bracket.
1098
+ # Whitespace as per the JSON spec are: tabulation (U+0009),
1099
+ # line feed (U+000A), carriage return (U+000D), and space (U+0020).
1100
+ break unless [9, 10, 13, 32].include?(c)
1101
+ end
1102
+ end
1103
+ nil
1104
+ end
1105
+
1106
+ # Set regexp patterns to parse tags and logs.
1107
+ def set_regexp_patterns
1108
+ @compiled_kubernetes_tag_regexp = Regexp.new(@kubernetes_tag_regexp) if
1109
+ @kubernetes_tag_regexp
1110
+
1111
+ @compiled_http_latency_regexp =
1112
+ /^\s*(?<seconds>\d+)(?<decimal>\.\d+)?\s*s\s*$/
1113
+ end
1114
+
1115
+ # Determine the common labels that should be added to all log entries
1116
+ # processed by this logging agent.
1117
+ def determine_agent_level_common_labels(resource)
1118
+ labels = {}
1119
+ # User can specify labels via config. We want to capture those as well.
1120
+ labels.merge!(@labels) if @labels
1121
+
1122
+ case resource.type
1123
+ # GAE, Cloud Dataflow, Cloud Dataproc and Cloud ML.
1124
+ when APPENGINE_CONSTANTS[:resource_type],
1125
+ DATAFLOW_CONSTANTS[:resource_type],
1126
+ DATAPROC_CONSTANTS[:resource_type],
1127
+ ML_CONSTANTS[:resource_type]
1128
+ labels.merge!(
1129
+ "#{COMPUTE_CONSTANTS[:service]}/resource_id" => @vm_id,
1130
+ "#{COMPUTE_CONSTANTS[:service]}/resource_name" => @vm_name,
1131
+ "#{COMPUTE_CONSTANTS[:service]}/zone" => @zone
1132
+ )
1133
+
1134
+ # GCE instance and GKE container.
1135
+ when COMPUTE_CONSTANTS[:resource_type],
1136
+ GKE_CONSTANTS[:resource_type]
1137
+ labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
1138
+
1139
+ # EC2.
1140
+ when EC2_CONSTANTS[:resource_type]
1141
+ labels["#{EC2_CONSTANTS[:service]}/resource_name"] = @vm_name
1142
+ end
1143
+ labels
1144
+ end
1145
+
1146
+ # Group the log entries by tag and local_resource_id pairs. Also filter out
1147
+ # invalid non-Hash entries.
1148
+ def group_log_entries_by_tag_and_local_resource_id(chunk)
1149
+ groups = {}
1150
+ chunk.msgpack_each do |tag, time, record|
1151
+ unless record.is_a?(Hash)
1152
+ @log.warn 'Dropping log entries with malformed record: ' \
1153
+ "'#{record.inspect}' from tag '#{tag}' at '#{time}'. " \
1154
+ 'A log record should be in JSON format.'
1155
+ next
1156
+ end
1157
+ sanitized_tag = sanitize_tag(tag)
1158
+ if sanitized_tag.nil?
1159
+ @log.warn "Dropping log entries with invalid tag: '#{tag.inspect}'." \
1160
+ ' A tag should be a string with utf8 characters.'
1161
+ next
1162
+ end
1163
+ local_resource_id = record.delete(LOCAL_RESOURCE_ID_KEY)
1164
+ # A nil local_resource_id means "fall back to legacy".
1165
+ hash_key = [sanitized_tag, local_resource_id].freeze
1166
+ groups[hash_key] ||= []
1167
+ groups[hash_key].push([time, record])
1168
+ end
1169
+ groups
1170
+ end
1171
+
1172
+ # Determine the group level monitored resource and common labels shared by a
1173
+ # collection of entries.
1174
+ def determine_group_level_monitored_resource_and_labels(tag,
1175
+ local_resource_id)
1176
+ resource = @resource.dup
1177
+ resource.labels = @resource.labels.dup
1178
+ common_labels = @common_labels.dup
1179
+
1180
+ # Change the resource type and set matched_regexp_group if the tag matches
1181
+ # certain regexp.
1182
+ matched_regexp_group = nil # @tag_regexp_list can be an empty list.
1183
+ @tag_regexp_list.each do |derived_type, tag_regexp|
1184
+ matched_regexp_group = tag_regexp.match(tag)
1185
+ if matched_regexp_group
1186
+ resource.type = derived_type
1187
+ break
1188
+ end
1189
+ end
1190
+
1191
+ # Determine the monitored resource based on the local_resource_id.
1192
+ # Different monitored resource types have unique ids in different format.
1193
+ # We will query Metadata Agent for the monitored resource. Return the
1194
+ # legacy monitored resource (either the instance resource or the resource
1195
+ # inferred from the tag) if failed to get a monitored resource from
1196
+ # Metadata Agent with this key.
1197
+ #
1198
+ # Examples:
1199
+ # // GKE Pod.
1200
+ # "k8s_pod.<namespace_name>.<pod_name>"
1201
+ # // GKE container.
1202
+ # "k8s_container.<namespace_name>.<pod_name>.<container_name>"
1203
+ if local_resource_id
1204
+ converted_resource = monitored_resource_from_local_resource_id(
1205
+ local_resource_id
1206
+ )
1207
+ resource = converted_resource if converted_resource
1208
+ end
1209
+
1210
+ # Once the resource type is settled down, determine the labels.
1211
+ case resource.type
1212
+ # GKE container.
1213
+ when GKE_CONSTANTS[:resource_type]
1214
+ if matched_regexp_group
1215
+ # We only expect one occurrence of each key in the match group.
1216
+ resource_labels_candidates =
1217
+ matched_regexp_group.names.zip(matched_regexp_group.captures).to_h
1218
+ common_labels_candidates = resource_labels_candidates.dup
1219
+ resource.labels.merge!(
1220
+ delete_and_extract_labels(
1221
+ resource_labels_candidates,
1222
+ # The kubernetes_tag_regexp is poorly named. 'namespace_name' is
1223
+ # in fact 'namespace_id'. 'pod_name' is in fact 'pod_id'.
1224
+ # TODO(qingling128): Figure out how to put this map into
1225
+ # constants like GKE_CONSTANTS[:extra_resource_labels].
1226
+ 'container_name' => 'container_name',
1227
+ 'namespace_name' => 'namespace_id',
1228
+ 'pod_name' => 'pod_id'
1229
+ )
1230
+ )
1231
+
1232
+ common_labels.merge!(
1233
+ delete_and_extract_labels(
1234
+ common_labels_candidates,
1235
+ GKE_CONSTANTS[:extra_common_labels]
1236
+ .map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h
1237
+ )
1238
+ )
1239
+ end
1240
+
1241
+ # TODO(qingling128): Temporary fallback for metadata agent restarts.
1242
+ # K8s resources.
1243
+ when K8S_CONTAINER_CONSTANTS[:resource_type],
1244
+ K8S_POD_CONSTANTS[:resource_type],
1245
+ K8S_NODE_CONSTANTS[:resource_type]
1246
+ common_labels.delete("#{COMPUTE_CONSTANTS[:service]}/resource_name")
1247
+
1248
+ end
1249
+
1250
+ # Cloud Dataflow and Cloud ML.
1251
+ # These labels can be set via the 'labels' option.
1252
+ # Report them as monitored resource labels instead of common labels.
1253
+ # e.g. "dataflow.googleapis.com/job_id" => "job_id"
1254
+ [DATAFLOW_CONSTANTS, ML_CONSTANTS].each do |service_constants|
1255
+ next unless resource.type == service_constants[:resource_type]
1256
+
1257
+ resource.labels.merge!(
1258
+ delete_and_extract_labels(
1259
+ common_labels, service_constants[:extra_resource_labels]
1260
+ .map { |l| ["#{service_constants[:service]}/#{l}", l] }.to_h
1261
+ )
1262
+ )
1263
+ end
1264
+
1265
+ resource.freeze
1266
+ resource.labels.freeze
1267
+ common_labels.freeze
1268
+
1269
+ [resource, common_labels]
1270
+ end
1271
+
1272
+ # Extract entry level monitored resource and common labels that should be
1273
+ # applied to individual entries.
1274
+ def determine_entry_level_monitored_resource_and_labels(
1275
+ group_level_resource, group_level_common_labels, record
1276
+ )
1277
+ resource = group_level_resource.dup
1278
+ resource.labels = group_level_resource.labels.dup
1279
+ common_labels = group_level_common_labels.dup
1280
+
1281
+ case resource.type
1282
+ # GKE container.
1283
+ when GKE_CONSTANTS[:resource_type]
1284
+ # Move the stdout/stderr annotation from the record into a label.
1285
+ common_labels.merge!(
1286
+ delete_and_extract_labels(
1287
+ record, 'stream' => "#{GKE_CONSTANTS[:service]}/stream"
1288
+ )
1289
+ )
1290
+
1291
+ # If the record has been annotated by the kubernetes_metadata_filter
1292
+ # plugin, then use that metadata. Otherwise, rely on commonLabels
1293
+ # populated from the group's tag.
1294
+ if record.key?('kubernetes')
1295
+ resource.labels.merge!(
1296
+ delete_and_extract_labels(
1297
+ record['kubernetes'], GKE_CONSTANTS[:extra_resource_labels]
1298
+ .map { |l| [l, l] }.to_h
1299
+ )
1300
+ )
1301
+ common_labels.merge!(
1302
+ delete_and_extract_labels(
1303
+ record['kubernetes'], GKE_CONSTANTS[:extra_common_labels]
1304
+ .map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h
1305
+ )
1306
+ )
1307
+ # Prepend label/ to all user-defined labels' keys.
1308
+ if record['kubernetes'].key?('labels')
1309
+ common_labels.merge!(
1310
+ delete_and_extract_labels(
1311
+ record['kubernetes']['labels'], record['kubernetes']['labels']
1312
+ .map { |key, _| [key, "label/#{key}"] }.to_h
1313
+ )
1314
+ )
1315
+ end
1316
+ # We've explicitly consumed all the fields we care about -- don't
1317
+ # litter the log entries with the remaining fields that the kubernetes
1318
+ # metadata filter plugin includes (or an empty 'kubernetes' field).
1319
+ record.delete('kubernetes')
1320
+ record.delete('docker')
1321
+ end
1322
+ end
1323
+
1324
+ # If the name of a field in the record is present in the @label_map
1325
+ # configured by users, report its value as a label and do not send that
1326
+ # field as part of the payload.
1327
+ common_labels.merge!(delete_and_extract_labels(record, @label_map))
1328
+
1329
+ # Cloud Dataflow and Cloud ML.
1330
+ # These labels can be set via the 'labels' or 'label_map' options.
1331
+ # Report them as monitored resource labels instead of common labels.
1332
+ # e.g. "dataflow.googleapis.com/job_id" => "job_id"
1333
+ [DATAFLOW_CONSTANTS, ML_CONSTANTS].each do |service_constants|
1334
+ next unless resource.type == service_constants[:resource_type]
1335
+
1336
+ resource.labels.merge!(
1337
+ delete_and_extract_labels(
1338
+ common_labels, service_constants[:extra_resource_labels]
1339
+ .map { |l| ["#{service_constants[:service]}/#{l}", l] }.to_h
1340
+ )
1341
+ )
1342
+ end
1343
+
1344
+ [resource, common_labels]
1345
+ end
1346
+
1347
+ def time_or_nil(ts_secs, ts_nanos)
1348
+ Time.at((Integer ts_secs), (Integer ts_nanos) / 1_000.0)
1349
+ rescue ArgumentError, TypeError
1350
+ nil
1351
+ end
1352
+
1353
+ def compute_timestamp(record, time)
1354
+ if record.key?('timestamp') &&
1355
+ record['timestamp'].is_a?(Hash) &&
1356
+ record['timestamp'].key?('seconds') &&
1357
+ record['timestamp'].key?('nanos')
1358
+ ts_secs = record['timestamp']['seconds']
1359
+ ts_nanos = record['timestamp']['nanos']
1360
+ record.delete('timestamp')
1361
+ timestamp = time_or_nil(ts_secs, ts_nanos)
1362
+ elsif record.key?('timestampSeconds') &&
1363
+ record.key?('timestampNanos')
1364
+ ts_secs = record.delete('timestampSeconds')
1365
+ ts_nanos = record.delete('timestampNanos')
1366
+ timestamp = time_or_nil(ts_secs, ts_nanos)
1367
+ elsif record.key?('timeNanos')
1368
+ # This is deprecated since the precision is insufficient.
1369
+ # Use timestampSeconds/timestampNanos instead
1370
+ nanos = record.delete('timeNanos')
1371
+ ts_secs = (nanos / 1_000_000_000).to_i
1372
+ ts_nanos = nanos % 1_000_000_000
1373
+ unless @timenanos_warning
1374
+ # Warn the user this is deprecated, but only once to avoid spam.
1375
+ @timenanos_warning = true
1376
+ @log.warn 'timeNanos is deprecated - please use ' \
1377
+ 'timestampSeconds and timestampNanos instead.'
1378
+ end
1379
+ timestamp = time_or_nil(ts_secs, ts_nanos)
1380
+ elsif record.key?('time')
1381
+ # k8s ISO8601 timestamp
1382
+ begin
1383
+ timestamp = Time.iso8601(record.delete('time'))
1384
+ rescue StandardError
1385
+ timestamp = Time.at(time)
1386
+ end
1387
+ ts_secs = timestamp.tv_sec
1388
+ ts_nanos = timestamp.tv_nsec
1389
+ else
1390
+ timestamp = Time.at(time)
1391
+ ts_secs = timestamp.tv_sec
1392
+ ts_nanos = timestamp.tv_nsec
1393
+ end
1394
+ ts_secs = begin
1395
+ Integer ts_secs
1396
+ rescue ArgumentError, TypeError
1397
+ ts_secs
1398
+ end
1399
+ ts_nanos = begin
1400
+ Integer ts_nanos
1401
+ rescue ArgumentError, TypeError
1402
+ ts_nanos
1403
+ end
1404
+
1405
+ [ts_secs, ts_nanos, timestamp]
1406
+ end
1407
+
1408
+ # Adjust timestamps from the future.
1409
+ # The base case is:
1410
+ # 0. The parsed timestamp is less than one day into the future.
1411
+ # This is allowed by the API, and should be left unchanged.
1412
+ #
1413
+ # Beyond that, there are two cases:
1414
+ # 1. The parsed timestamp is later in the current year:
1415
+ # This can happen when system log lines from previous years are missing
1416
+ # the year, so the date parser assumes the current year.
1417
+ # We treat these lines as coming from last year. This could label
1418
+ # 2-year-old logs incorrectly, but this probably isn't super important.
1419
+ #
1420
+ # 2. The parsed timestamp is past the end of the current year:
1421
+ # Since the year is different from the current year, this isn't the
1422
+ # missing year in system logs. It is unlikely that users explicitly
1423
+ # write logs at a future date. This could result from an unsynchronized
1424
+ # clock on a VM, or some random value being parsed as the timestamp.
1425
+ # We reset the timestamp on those lines to the default value and let the
1426
+ # downstream API handle it.
1427
+ def adjust_timestamp_if_invalid(timestamp, current_time)
1428
+ ts_secs = timestamp.tv_sec
1429
+ ts_nanos = timestamp.tv_nsec
1430
+
1431
+ next_year = Time.mktime(current_time.year + 1)
1432
+ one_day_later = current_time.to_datetime.next_day.to_time
1433
+ if timestamp < one_day_later # Case 0.
1434
+ # Leave the timestamp as-is.
1435
+ elsif timestamp >= next_year # Case 2.
1436
+ ts_secs = 0
1437
+ ts_nanos = 0
1438
+ else # Case 1.
1439
+ adjusted_timestamp = timestamp.to_datetime.prev_year.to_time
1440
+ ts_secs = adjusted_timestamp.tv_sec
1441
+ # The value of ts_nanos should not change when subtracting a year.
1442
+ end
1443
+
1444
+ [ts_secs, ts_nanos]
1445
+ end
1446
+
1447
+ def compute_severity(resource_type, record, entry_level_common_labels)
1448
+ if record.key?('severity')
1449
+ return parse_severity(record.delete('severity'))
1450
+ elsif resource_type == GKE_CONSTANTS[:resource_type]
1451
+ stream = entry_level_common_labels["#{GKE_CONSTANTS[:service]}/stream"]
1452
+ return GKE_CONSTANTS[:stream_severity_map].fetch(stream, 'DEFAULT')
1453
+ end
1454
+
1455
+ 'DEFAULT'
1456
+ end
1457
+
1458
+ def set_log_entry_fields(record, entry)
1459
+ # TODO(qingling128) On the next major after 0.7.4, make all logEntry
1460
+ # subfields behave the same way: if the field is not in the correct
1461
+ # format, log an error in the Fluentd log and remove this field from
1462
+ # payload. This is the preferred behavior per PM decision.
1463
+ LOG_ENTRY_FIELDS_MAP.each do |field_name, config|
1464
+ payload_key, subfields, grpc_class, non_grpc_class = config
1465
+ begin
1466
+ payload_key = instance_variable_get(payload_key)
1467
+ fields = record[payload_key]
1468
+ record.delete(payload_key) if fields.nil?
1469
+ next unless fields.is_a?(Hash)
1470
+
1471
+ extracted_subfields = subfields.each_with_object({}) \
1472
+ do |(original_key, destination_key, cast_fn), extracted_fields|
1473
+ value = fields.delete(original_key)
1474
+ next if value.nil?
1475
+
1476
+ begin
1477
+ casted_value = send(cast_fn, value)
1478
+ rescue TypeError
1479
+ @log.error "Failed to #{cast_fn} for #{field_name}." \
1480
+ "#{original_key} with value #{value.inspect}.", err
1481
+ next
1482
+ end
1483
+ next if casted_value.nil?
1484
+
1485
+ extracted_fields[destination_key] = casted_value
1486
+ end
1487
+
1488
+ next unless extracted_subfields
1489
+
1490
+ output = if @use_grpc
1491
+ Object.const_get(grpc_class).new
1492
+ else
1493
+ Object.const_get(non_grpc_class).new
1494
+ end
1495
+ extracted_subfields.each do |key, value|
1496
+ output.send("#{key}=", value)
1497
+ end
1498
+
1499
+ record.delete(payload_key) if fields.empty?
1500
+
1501
+ entry.send("#{field_name}=", output)
1502
+ rescue StandardError => e
1503
+ @log.error "Failed to set log entry field for #{field_name}.", e
1504
+ end
1505
+ end
1506
+ end
1507
+
1508
+ # Parse labels. Return nil if not set.
1509
+ def parse_labels(record)
1510
+ payload_labels = record.delete(@labels_key)
1511
+ return nil unless payload_labels
1512
+
1513
+ unless payload_labels.is_a?(Hash)
1514
+ @log.error "Invalid value of '#{@labels_key}' in the payload: " \
1515
+ "#{payload_labels}. Labels need to be a JSON object."
1516
+ return nil
1517
+ end
1518
+
1519
+ non_string_keys = payload_labels.each_with_object([]) do |(k, v), a|
1520
+ a << k unless k.is_a?(String) && v.is_a?(String)
1521
+ end
1522
+ unless non_string_keys.empty?
1523
+ @log.error "Invalid value of '#{@labels_key}' in the payload: " \
1524
+ "#{payload_labels}. Labels need string values for all " \
1525
+ "keys; keys #{non_string_keys} don't."
1526
+ return nil
1527
+ end
1528
+ payload_labels
1529
+ rescue StandardError => e
1530
+ @log.error "Failed to extract '#{@labels_key}' from payload.", e
1531
+ nil
1532
+ end
1533
+
1534
+ # Values permitted by the API for 'severity' (which is an enum).
1535
+ VALID_SEVERITIES = Set.new(
1536
+ %w[DEFAULT DEBUG INFO NOTICE WARNING ERROR CRITICAL ALERT EMERGENCY]
1537
+ ).freeze
1538
+
1539
+ # Translates other severity strings to one of the valid values above.
1540
+ SEVERITY_TRANSLATIONS = {
1541
+ # log4j levels (both current and obsolete).
1542
+ 'WARN' => 'WARNING',
1543
+ 'FATAL' => 'CRITICAL',
1544
+ 'TRACE' => 'DEBUG',
1545
+ 'TRACE_INT' => 'DEBUG',
1546
+ 'FINE' => 'DEBUG',
1547
+ 'FINER' => 'DEBUG',
1548
+ 'FINEST' => 'DEBUG',
1549
+ # java.util.logging levels (only missing ones from above listed).
1550
+ 'SEVERE' => 'ERROR',
1551
+ 'CONFIG' => 'DEBUG',
1552
+ # nginx levels (only missing ones from above listed).
1553
+ 'CRIT' => 'CRITICAL',
1554
+ 'EMERG' => 'EMERGENCY',
1555
+ # single-letter levels. Note E->ERROR and D->DEBUG.
1556
+ 'D' => 'DEBUG',
1557
+ 'I' => 'INFO',
1558
+ 'N' => 'NOTICE',
1559
+ 'W' => 'WARNING',
1560
+ 'E' => 'ERROR',
1561
+ 'C' => 'CRITICAL',
1562
+ 'A' => 'ALERT',
1563
+ # other misc. translations.
1564
+ 'INFORMATION' => 'INFO',
1565
+ 'ERR' => 'ERROR',
1566
+ 'F' => 'CRITICAL'
1567
+ }.freeze
1568
+
1569
+ def parse_severity(severity_str)
1570
+ # The API is case insensitive, but uppercase to make things simpler.
1571
+ severity = severity_str.to_s.upcase.strip
1572
+
1573
+ # If the severity is already valid, just return it.
1574
+ return severity if VALID_SEVERITIES.include?(severity)
1575
+
1576
+ # If the severity is an integer (string) return it as an integer,
1577
+ # truncated to the closest valid value (multiples of 100 between 0-800).
1578
+ if /\A\d+\z/ =~ severity
1579
+ begin
1580
+ numeric_severity = (severity.to_i / 100) * 100
1581
+ case
1582
+ when numeric_severity.negative?
1583
+ return 0
1584
+ when numeric_severity > 800
1585
+ return 800
1586
+ else
1587
+ return numeric_severity
1588
+ end
1589
+ rescue StandardError
1590
+ return 'DEFAULT'
1591
+ end
1592
+ end
1593
+
1594
+ # Try to translate the severity.
1595
+ return SEVERITY_TRANSLATIONS[severity] if SEVERITY_TRANSLATIONS.key?(severity)
1596
+
1597
+ # If all else fails, use 'DEFAULT'.
1598
+ 'DEFAULT'
1599
+ end
1600
+
1601
+ GRPC_SEVERITY_MAPPING = {
1602
+ 'DEFAULT' => Google::Cloud::Logging::Type::LogSeverity::DEFAULT,
1603
+ 'DEBUG' => Google::Cloud::Logging::Type::LogSeverity::DEBUG,
1604
+ 'INFO' => Google::Cloud::Logging::Type::LogSeverity::INFO,
1605
+ 'NOTICE' => Google::Cloud::Logging::Type::LogSeverity::NOTICE,
1606
+ 'WARNING' => Google::Cloud::Logging::Type::LogSeverity::WARNING,
1607
+ 'ERROR' => Google::Cloud::Logging::Type::LogSeverity::ERROR,
1608
+ 'CRITICAL' => Google::Cloud::Logging::Type::LogSeverity::CRITICAL,
1609
+ 'ALERT' => Google::Cloud::Logging::Type::LogSeverity::ALERT,
1610
+ 'EMERGENCY' => Google::Cloud::Logging::Type::LogSeverity::EMERGENCY,
1611
+ 0 => Google::Cloud::Logging::Type::LogSeverity::DEFAULT,
1612
+ 100 => Google::Cloud::Logging::Type::LogSeverity::DEBUG,
1613
+ 200 => Google::Cloud::Logging::Type::LogSeverity::INFO,
1614
+ 300 => Google::Cloud::Logging::Type::LogSeverity::NOTICE,
1615
+ 400 => Google::Cloud::Logging::Type::LogSeverity::WARNING,
1616
+ 500 => Google::Cloud::Logging::Type::LogSeverity::ERROR,
1617
+ 600 => Google::Cloud::Logging::Type::LogSeverity::CRITICAL,
1618
+ 700 => Google::Cloud::Logging::Type::LogSeverity::ALERT,
1619
+ 800 => Google::Cloud::Logging::Type::LogSeverity::EMERGENCY
1620
+ }.freeze
1621
+
1622
+ def grpc_severity(severity)
1623
+ # TODO: find out why this doesn't work.
1624
+ # if severity.is_a? String
1625
+ # return Google::Cloud::Logging::Type::LogSeverity.resolve(severity)
1626
+ # end
1627
+ return GRPC_SEVERITY_MAPPING[severity] if GRPC_SEVERITY_MAPPING.key?(severity)
1628
+
1629
+ severity
1630
+ end
1631
+
1632
+ def parse_string(value)
1633
+ value.to_s
1634
+ end
1635
+
1636
+ def parse_int(value)
1637
+ value.to_i
1638
+ end
1639
+
1640
+ def parse_bool(value)
1641
+ [true, 'true', 1].include?(value)
1642
+ end
1643
+
1644
+ def parse_latency(latency)
1645
+ # Parse latency.
1646
+ # If no valid format is detected, return nil so we can later skip
1647
+ # setting latency.
1648
+ # Format: whitespace (opt.) + integer + point & decimal (opt.)
1649
+ # + whitespace (opt.) + "s" + whitespace (opt.)
1650
+ # e.g.: "1.42 s"
1651
+ match = @compiled_http_latency_regexp.match(latency)
1652
+ return nil unless match
1653
+
1654
+ # Split the integer and decimal parts in order to calculate
1655
+ # seconds and nanos.
1656
+ seconds = match['seconds'].to_i
1657
+ nanos = (match['decimal'].to_f * 1000 * 1000 * 1000).round
1658
+ if @use_grpc
1659
+ Google::Protobuf::Duration.new(
1660
+ seconds: seconds,
1661
+ nanos: nanos
1662
+ )
1663
+ else
1664
+ {
1665
+ seconds: seconds,
1666
+ nanos: nanos
1667
+ }.delete_if { |_, v| v.zero? }
1668
+ end
1669
+ end
1670
+
1671
+ def format(tag, time, record)
1672
+ Fluent::MessagePackFactory
1673
+ .engine_factory
1674
+ .packer
1675
+ .write([tag, time, record])
1676
+ .to_s
1677
+ end
1678
+
1679
+ # Given a tag, returns the corresponding valid tag if possible, or nil if
1680
+ # the tag should be rejected. If 'require_valid_tags' is false, non-string
1681
+ # tags are converted to strings, and invalid characters are sanitized;
1682
+ # otherwise such tags are rejected.
1683
+ def sanitize_tag(tag)
1684
+ if @require_valid_tags &&
1685
+ (!tag.is_a?(String) || tag == '' || convert_to_utf8(tag) != tag)
1686
+ return nil
1687
+ end
1688
+
1689
+ tag = convert_to_utf8(tag.to_s)
1690
+ tag = '_' if tag == ''
1691
+ tag
1692
+ end
1693
+
1694
+ # For every original_label => new_label pair in the label_map, delete the
1695
+ # original_label from the hash map if it exists, and extract the value to
1696
+ # form a map with the new_label as the key.
1697
+ def delete_and_extract_labels(hash, label_map)
1698
+ return {} if label_map.nil? || !label_map.is_a?(Hash) ||
1699
+ hash.nil? || !hash.is_a?(Hash)
1700
+
1701
+ label_map.each_with_object({}) \
1702
+ do |(original_label, new_label), extracted_labels|
1703
+ value = hash.delete(original_label)
1704
+ extracted_labels[new_label] = convert_to_utf8(value.to_s) if value
1705
+ end
1706
+ end
1707
+
1708
+ def value_from_ruby(value)
1709
+ ret = Google::Protobuf::Value.new
1710
+ case value
1711
+ when NilClass
1712
+ ret.null_value = 0
1713
+ when Numeric
1714
+ ret.number_value = value
1715
+ when String
1716
+ ret.string_value = convert_to_utf8(value)
1717
+ when TrueClass
1718
+ ret.bool_value = true
1719
+ when FalseClass
1720
+ ret.bool_value = false
1721
+ when Google::Protobuf::Struct
1722
+ ret.struct_value = value
1723
+ when Hash
1724
+ ret.struct_value = struct_from_ruby(value)
1725
+ when Google::Protobuf::ListValue
1726
+ ret.list_value = value
1727
+ when Array
1728
+ ret.list_value = list_from_ruby(value)
1729
+ else
1730
+ @log.error "Unknown type: #{value.class}"
1731
+ raise Google::Protobuf::Error, "Unknown type: #{value.class}"
1732
+ end
1733
+ ret
1734
+ end
1735
+
1736
+ def list_from_ruby(arr)
1737
+ ret = Google::Protobuf::ListValue.new
1738
+ arr.each do |v|
1739
+ ret.values << value_from_ruby(v)
1740
+ end
1741
+ ret
1742
+ end
1743
+
1744
+ def struct_from_ruby(hash)
1745
+ ret = Google::Protobuf::Struct.new
1746
+ hash.each do |k, v|
1747
+ ret.fields[convert_to_utf8(k.to_s)] ||= value_from_ruby(v)
1748
+ end
1749
+ ret
1750
+ end
1751
+
1752
+ # TODO(qingling128): Fix the inconsistent behavior of 'message', 'log' and
1753
+ # 'msg' in the next major version 1.0.0.
1754
+ def set_payload(resource_type, record, entry, is_json)
1755
+ # Only one of {text_payload, json_payload} will be set.
1756
+ text_payload = nil
1757
+ json_payload = nil
1758
+ # Use JSON if we found valid JSON, or text payload in the following
1759
+ # cases:
1760
+ # 1. This is an unstructured Container log and the 'log' key is available
1761
+ # 2. The only remaining key is 'message'
1762
+ if is_json
1763
+ json_payload = record
1764
+ elsif GKE_CONSTANTS[:resource_type] == resource_type && record.key?('log')
1765
+ text_payload = record['log']
1766
+ elsif record.size == 1 && record.key?('message')
1767
+ text_payload = record['message']
1768
+ else
1769
+ json_payload = record
1770
+ end
1771
+
1772
+ if json_payload
1773
+ entry.json_payload = if @use_grpc
1774
+ struct_from_ruby(json_payload)
1775
+ else
1776
+ json_payload
1777
+ end
1778
+ elsif text_payload
1779
+ text_payload = text_payload.to_s
1780
+ entry.text_payload = if @use_grpc
1781
+ convert_to_utf8(text_payload)
1782
+ else
1783
+ text_payload
1784
+ end
1785
+ end
1786
+ end
1787
+
1788
+ def log_name(tag, resource)
1789
+ if resource.type == APPENGINE_CONSTANTS[:resource_type]
1790
+ # Add a prefix to Managed VM logs to prevent namespace collisions.
1791
+ tag = "#{APPENGINE_CONSTANTS[:service]}/#{tag}"
1792
+ elsif resource.type == GKE_CONSTANTS[:resource_type]
1793
+ # For Kubernetes logs, use just the container name as the log name
1794
+ # if we have it.
1795
+ if resource.labels&.key?('container_name')
1796
+ sanitized_tag = sanitize_tag(resource.labels['container_name'])
1797
+ tag = sanitized_tag unless sanitized_tag.nil?
1798
+ end
1799
+ end
1800
+ ERB::Util.url_encode(tag)
1801
+ end
1802
+
1803
+ def init_api_client
1804
+ # Set up the logger for the auto-generated Google Cloud APIs.
1805
+ Google::Apis.logger = @log
1806
+ if @use_grpc
1807
+ uri = URI.parse(@logging_api_url)
1808
+ host = uri.host
1809
+ unless host
1810
+ raise Fluent::ConfigError,
1811
+ 'The logging_api_url option specifies an invalid URL:' \
1812
+ " #{@logging_api_url}."
1813
+ end
1814
+ if @grpc_compression_algorithm
1815
+ compression_options =
1816
+ GRPC::Core::CompressionOptions.new(
1817
+ default_algorithm: @grpc_compression_algorithm
1818
+ )
1819
+ compression_channel_args = compression_options.to_channel_arg_hash
1820
+ else
1821
+ compression_channel_args = {}
1822
+ end
1823
+ if uri.scheme == 'https'
1824
+ ssl_creds = GRPC::Core::ChannelCredentials.new
1825
+ authentication = Google::Auth.get_application_default
1826
+ creds = GRPC::Core::CallCredentials.new(authentication.updater_proc)
1827
+ creds = ssl_creds.compose(creds)
1828
+ else
1829
+ creds = :this_channel_is_insecure
1830
+ end
1831
+ port = ":#{uri.port}" if uri.port
1832
+ user_agent = \
1833
+ "#{PLUGIN_NAME}/#{PLUGIN_VERSION} grpc-ruby/#{GRPC::VERSION} " \
1834
+ "#{Google::Apis::OS_VERSION}"
1835
+ channel_args = { 'grpc.primary_user_agent' => user_agent }
1836
+ .merge!(compression_channel_args)
1837
+ @client = Google::Cloud::Logging::V2::LoggingService::Client.new do |config|
1838
+ config.credentials = GRPC::Core::Channel.new(
1839
+ "#{host}#{port}", channel_args, creds
1840
+ )
1841
+ end
1842
+ else
1843
+ # TODO: Use a non-default ClientOptions object.
1844
+ Google::Apis::ClientOptions.default.application_name = PLUGIN_NAME
1845
+ Google::Apis::ClientOptions.default.application_version = PLUGIN_VERSION
1846
+ @client = Google::Apis::LoggingV2::LoggingService.new
1847
+ @client.authorization = Google::Auth.get_application_default(
1848
+ Common::LOGGING_SCOPE
1849
+ )
1850
+ end
1851
+ end
1852
+
1853
+ def api_client
1854
+ # For gRPC side, the Channel will take care of tokens and their renewal
1855
+ # (https://grpc.io/docs/guides/auth.html#authentication-api).
1856
+ if !@use_grpc && @client.authorization.expired?
1857
+ begin
1858
+ @client.authorization.fetch_access_token!
1859
+ rescue MultiJson::ParseError
1860
+ # Workaround an issue in the API client; just re-raise a more
1861
+ # descriptive error for the user (which will still cause a retry).
1862
+ raise Google::APIClient::ClientError,
1863
+ 'Unable to fetch access token (no scopes configured?)'
1864
+ end
1865
+ end
1866
+ @client
1867
+ end
1868
+
1869
+ # Encode as UTF-8. If 'coerce_to_utf8' is set to true in the config, any
1870
+ # non-UTF-8 character would be replaced by the string specified by
1871
+ # 'non_utf8_replacement_string'. If 'coerce_to_utf8' is set to false, any
1872
+ # non-UTF-8 character would trigger the plugin to error out.
1873
+ def convert_to_utf8(input)
1874
+ if @coerce_to_utf8
1875
+ input.encode(
1876
+ 'utf-8',
1877
+ invalid: :replace,
1878
+ undef: :replace,
1879
+ replace: @non_utf8_replacement_string
1880
+ )
1881
+ else
1882
+ begin
1883
+ input.encode('utf-8')
1884
+ rescue EncodingError
1885
+ @log.error 'Encountered encoding issues potentially due to non ' \
1886
+ 'UTF-8 characters. To allow non-UTF-8 characters and ' \
1887
+ 'replace them with spaces, please set "coerce_to_utf8" ' \
1888
+ 'to true.'
1889
+ raise
1890
+ end
1891
+ end
1892
+ end
1893
+
1894
+ # Extract a map of error details from a potentially partially successful
1895
+ # REST request.
1896
+ #
1897
+ # The keys in this map are [error_code, error_message] pairs, and the values
1898
+ # are a list of stringified indexes of log entries that failed due to this
1899
+ # error.
1900
+ #
1901
+ # A sample error.body looks like:
1902
+ # {
1903
+ # "error": {
1904
+ # "code": 403,
1905
+ # "message": "User not authorized.",
1906
+ # "status": "PERMISSION_DENIED",
1907
+ # "details": [
1908
+ # {
1909
+ # "@type": "type.googleapis.com/google.logging.v2.WriteLogEntriesPar
1910
+ # tialErrors",
1911
+ # "logEntryErrors": {
1912
+ # "0": {
1913
+ # "code": 7,
1914
+ # "message": "User not authorized."
1915
+ # },
1916
+ # "1": {
1917
+ # "code": 3,
1918
+ # "message": "Log name contains illegal character :"
1919
+ # },
1920
+ # "3": {
1921
+ # "code": 3,
1922
+ # "message": "Log name contains illegal character :"
1923
+ # }
1924
+ # }
1925
+ # },
1926
+ # {
1927
+ # "@type": "type.googleapis.com/google.rpc.DebugInfo",
1928
+ # "detail": ...
1929
+ # }
1930
+ # ]
1931
+ # }
1932
+ # }
1933
+ #
1934
+ # The root level "code", "message", and "status" simply match the root
1935
+ # cause of the first failed log entry. For example, if we switched the order
1936
+ # of the log entries, then we would get:
1937
+ # {
1938
+ # "error" : {
1939
+ # "code" : 400,
1940
+ # "message" : "Log name contains illegal character :",
1941
+ # "status" : "INVALID_ARGUMENT",
1942
+ # "details": ...
1943
+ # }
1944
+ # }
1945
+ # We will ignore it anyway and look at the details instead which includes
1946
+ # info for all failed log entries.
1947
+ #
1948
+ # In this example, the logEntryErrors that we care are:
1949
+ # {
1950
+ # "0": {
1951
+ # "code": 7,
1952
+ # "message": "User not authorized."
1953
+ # },
1954
+ # "1": {
1955
+ # "code": 3,
1956
+ # "message": "Log name contains illegal character :"
1957
+ # },
1958
+ # "3": {
1959
+ # "code": 3,
1960
+ # "message": "Log name contains illegal character :"
1961
+ # }
1962
+ # }
1963
+ #
1964
+ # The ultimate map that is constructed is:
1965
+ # {
1966
+ # [7, 'User not authorized.']: ['0'],
1967
+ # [3, 'Log name contains illegal character :']: ['1', '3']
1968
+ # }
1969
+ def construct_error_details_map(error)
1970
+ error_details_map = Hash.new { |h, k| h[k] = [] }
1971
+
1972
+ error_details = ensure_array(
1973
+ ensure_hash(ensure_hash(JSON.parse(error.body))['error'])['details']
1974
+ )
1975
+ partial_errors = error_details.detect(
1976
+ -> { raise JSON::ParserError, "No type #{PARTIAL_ERROR_FIELD}." }
1977
+ ) do |error_detail|
1978
+ ensure_hash(error_detail)['@type'] == PARTIAL_ERROR_FIELD
1979
+ end
1980
+ log_entry_errors = ensure_hash(
1981
+ ensure_hash(partial_errors)['logEntryErrors']
1982
+ )
1983
+ log_entry_errors.each do |index, log_entry_error|
1984
+ error_hash = ensure_hash(log_entry_error)
1985
+ unless error_hash['code'] && error_hash['message']
1986
+ raise JSON::ParserError,
1987
+ "Entry #{index} is missing 'code' or 'message'."
1988
+ end
1989
+ error_key = [error_hash['code'], error_hash['message']].freeze
1990
+ # TODO(qingling128): Convert indexes to integers.
1991
+ error_details_map[error_key] << index
1992
+ end
1993
+ error_details_map
1994
+ rescue JSON::ParserError => e
1995
+ @log.warn 'Failed to extract log entry errors from the error details:' \
1996
+ " #{error.body}.", error: e
1997
+ {}
1998
+ end
1999
+
2000
+ # Extract a map of error details from a potentially partially successful
2001
+ # gRPC request.
2002
+ #
2003
+ # The keys in this map are [error_code, error_message] pairs, and the values
2004
+ # are a list of indexes of log entries that failed due to this error.
2005
+ #
2006
+ # A sample error looks like:
2007
+ # <Google::Cloud::PermissionDeniedError:
2008
+ # message: 'User not authorized.',
2009
+ # details: [
2010
+ # <Google::Cloud::Logging::V2::WriteLogEntriesPartialErrors:
2011
+ # log_entry_errors: {
2012
+ # 0 => <Google::Rpc::Status:
2013
+ # code: 7,
2014
+ # message: "User not authorized.",
2015
+ # details: []>,
2016
+ # 1 => <Google::Rpc::Status:
2017
+ # code: 3,
2018
+ # message: "Log name contains illegal character :",
2019
+ # details: []>,
2020
+ # 3 => <Google::Rpc::Status:
2021
+ # code: 3,
2022
+ # message: "Log name contains illegal character :",
2023
+ # details: []>
2024
+ # }
2025
+ # >,
2026
+ # <Google::Rpc::DebugInfo:
2027
+ # stack_entries: [],
2028
+ # detail: "..."
2029
+ # >
2030
+ # ]
2031
+ # cause: <GRPC::PermissionDenied: 7:User not authorized.>
2032
+ # }
2033
+ #
2034
+ # The ultimate map that is constructed is:
2035
+ # {
2036
+ # [7, 'User not authorized.']: [0],
2037
+ # [3, 'Log name contains illegal character :']: [1, 3]
2038
+ # }
2039
+ def construct_error_details_map_grpc(gax_error)
2040
+ @log.error "construct_error_details_map_grpc: #{gax_error}"
2041
+ error_details_map = Hash.new { |h, k| h[k] = [] }
2042
+ error_details = ensure_array(gax_error.status_details)
2043
+ raise JSON::ParserError, 'The error details are empty.' if
2044
+ error_details.empty?
2045
+ raise JSON::ParserError, 'No partial error info in error details.' unless
2046
+ error_details[0].is_a?(
2047
+ Google::Cloud::Logging::V2::WriteLogEntriesPartialErrors
2048
+ )
2049
+
2050
+ log_entry_errors = ensure_hash(error_details[0].log_entry_errors)
2051
+ log_entry_errors.each do |index, log_entry_error|
2052
+ error_key = [log_entry_error[:code], log_entry_error[:message]].freeze
2053
+ error_details_map[error_key] << index
2054
+ end
2055
+ error_details_map
2056
+ rescue JSON::ParserError => e
2057
+ @log.warn 'Failed to extract log entry errors from the error details:' \
2058
+ " #{gax_error.details.inspect}.", error: e
2059
+ {}
2060
+ end
2061
+
2062
+ # Take a locally unique resource id and convert it to the globally unique
2063
+ # monitored resource.
2064
+ def monitored_resource_from_local_resource_id(local_resource_id)
2065
+ return unless
2066
+ /^
2067
+ (?<resource_type>k8s_container)
2068
+ \.(?<namespace_name>[0-9a-z-]+)
2069
+ \.(?<pod_name>[.0-9a-z-]+)
2070
+ \.(?<container_name>[0-9a-z-]+)$/x =~ local_resource_id ||
2071
+ /^
2072
+ (?<resource_type>k8s_pod)
2073
+ \.(?<namespace_name>[0-9a-z-]+)
2074
+ \.(?<pod_name>[.0-9a-z-]+)$/x =~ local_resource_id ||
2075
+ /^
2076
+ (?<resource_type>k8s_node)
2077
+ \.(?<node_name>[0-9a-z-]+)$/x =~ local_resource_id
2078
+
2079
+ # Clear name and location if they're explicitly set to empty.
2080
+ @k8s_cluster_name = nil if @k8s_cluster_name == ''
2081
+ @k8s_cluster_location = nil if @k8s_cluster_location == ''
2082
+
2083
+ begin
2084
+ @k8s_cluster_name ||= @utils.fetch_gce_metadata(
2085
+ @platform, 'instance/attributes/cluster-name'
2086
+ )
2087
+ @k8s_cluster_location ||= @utils.fetch_gce_metadata(
2088
+ @platform, 'instance/attributes/cluster-location'
2089
+ )
2090
+ rescue StandardError => e
2091
+ @log.error 'Failed to retrieve k8s cluster name and location.', \
2092
+ error: e
2093
+ end
2094
+ case resource_type
2095
+ when K8S_CONTAINER_CONSTANTS[:resource_type]
2096
+ labels = {
2097
+ 'namespace_name' => namespace_name,
2098
+ 'pod_name' => pod_name,
2099
+ 'container_name' => container_name,
2100
+ 'cluster_name' => @k8s_cluster_name,
2101
+ 'location' => @k8s_cluster_location
2102
+ }
2103
+ fallback_resource = GKE_CONSTANTS[:resource_type]
2104
+ when K8S_POD_CONSTANTS[:resource_type]
2105
+ labels = {
2106
+ 'namespace_name' => namespace_name,
2107
+ 'pod_name' => pod_name,
2108
+ 'cluster_name' => @k8s_cluster_name,
2109
+ 'location' => @k8s_cluster_location
2110
+ }
2111
+ fallback_resource = GKE_CONSTANTS[:resource_type]
2112
+ when K8S_NODE_CONSTANTS[:resource_type]
2113
+ labels = {
2114
+ 'node_name' => node_name,
2115
+ 'cluster_name' => @k8s_cluster_name,
2116
+ 'location' => @k8s_cluster_location
2117
+ }
2118
+ fallback_resource = COMPUTE_CONSTANTS[:resource_type]
2119
+ end
2120
+ unless @k8s_cluster_name && @k8s_cluster_location
2121
+ @log.error "Failed to construct #{resource_type} resource locally." \
2122
+ ' Falling back to writing logs against' \
2123
+ " #{fallback_resource} resource.", error: e
2124
+ return
2125
+ end
2126
+ constructed_resource = Google::Apis::LoggingV2::MonitoredResource.new(
2127
+ type: resource_type,
2128
+ labels: labels
2129
+ )
2130
+ @log.debug("Constructed #{resource_type} resource locally: " \
2131
+ "#{constructed_resource.inspect}")
2132
+ constructed_resource
2133
+ end
2134
+
2135
+ # Convert the value to a Ruby array.
2136
+ def ensure_array(value)
2137
+ Array.try_convert(value) || (raise JSON::ParserError, value.class.to_s)
2138
+ end
2139
+
2140
+ # Convert the value to a Ruby hash.
2141
+ def ensure_hash(value)
2142
+ Hash.try_convert(value) || (raise JSON::ParserError, value.class.to_s)
2143
+ end
2144
+
2145
+ # Increment the metric for the number of successful requests.
2146
+ def increment_successful_requests_count
2147
+ return unless @successful_requests_count
2148
+
2149
+ @successful_requests_count.increment(
2150
+ labels: { grpc: @use_grpc, code: @ok_code }
2151
+ )
2152
+ end
2153
+
2154
+ # Increment the metric for the number of failed requests, labeled by
2155
+ # the provided status code.
2156
+ def increment_failed_requests_count(code)
2157
+ return unless @failed_requests_count
2158
+
2159
+ @failed_requests_count.increment(
2160
+ labels: { grpc: @use_grpc, code: code }
2161
+ )
2162
+ end
2163
+
2164
+ # Increment the metric for the number of log entries, successfully
2165
+ # ingested by the Stackdriver Logging API.
2166
+ def increment_ingested_entries_count(count)
2167
+ return unless @ingested_entries_count
2168
+
2169
+ @ingested_entries_count.increment(
2170
+ labels: { grpc: @use_grpc, code: @ok_code }, by: count
2171
+ )
2172
+ end
2173
+
2174
+ # Increment the metric for the number of log entries that were dropped
2175
+ # and not ingested by the Stackdriver Logging API.
2176
+ def increment_dropped_entries_count(count, code)
2177
+ return unless @dropped_entries_count
2178
+
2179
+ @dropped_entries_count.increment(
2180
+ labels: { grpc: @use_grpc, code: code }, by: count
2181
+ )
2182
+ end
2183
+
2184
+ # Increment the metric for the number of log entries that were dropped
2185
+ # and not ingested by the Stackdriver Logging API.
2186
+ def increment_retried_entries_count(count, code)
2187
+ return unless @retried_entries_count
2188
+
2189
+ @retried_entries_count.increment(
2190
+ labels: { grpc: @use_grpc, code: code }, by: count
2191
+ )
2192
+ end
2193
+ end
2194
+ end
2195
+
2196
+ module Google
2197
+ module Apis
2198
+ module LoggingV2
2199
+ # Override MonitoredResource::dup to make a deep copy.
2200
+ class MonitoredResource
2201
+ def dup
2202
+ ret = super
2203
+ ret.labels = labels.dup
2204
+ ret
2205
+ end
2206
+ end
2207
+ end
2208
+ end
2209
+ end