fluent-plugin-vadimberezniker-gcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2209 @@
1
+ # Copyright 2014 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ require 'cgi'
15
+ require 'erb'
16
+ require 'grpc'
17
+ require 'json'
18
+ require 'open-uri'
19
+ require 'socket'
20
+ require 'time'
21
+ require 'yaml'
22
+ require 'google/apis'
23
+ require 'google/cloud/errors'
24
+ require 'google/apis/logging_v2'
25
+ require 'google/cloud/logging/v2'
26
+ require 'google/logging/v2/logging_pb'
27
+ require 'google/logging/v2/logging_services_pb'
28
+ require 'google/logging/v2/log_entry_pb'
29
+ require 'googleauth'
30
+
31
+ require_relative 'common'
32
+ require_relative 'monitoring'
33
+ require_relative 'statusz'
34
+
35
+ module Google
36
+ module Protobuf
37
+ # Alias the has_key? method to have the same interface as a regular map.
38
+ class Map
39
+ alias key? has_key?
40
+ alias to_hash to_h
41
+ end
42
+ end
43
+ end
44
+
45
+ module Google
46
+ module Auth
47
+ # Disable gcloud lookup in googleauth to avoid picking up its project id.
48
+ module CredentialsLoader
49
+ # Set $VERBOSE to nil to mute the "already initialized constant" warnings.
50
+ warn_level = $VERBOSE
51
+ begin
52
+ $VERBOSE = nil
53
+ # These constants are used to invoke gcloud on Linux and Windows,
54
+ # respectively. Ideally, we would have overridden
55
+ # CredentialsLoader.load_gcloud_project_id, but we cannot catch it
56
+ # before it's invoked via "require 'googleauth'". So we override the
57
+ # constants instead.
58
+ GCLOUD_POSIX_COMMAND = '/bin/true'.freeze
59
+ GCLOUD_WINDOWS_COMMAND = 'cd .'.freeze
60
+ GCLOUD_CONFIG_COMMAND = ''.freeze
61
+ ensure
62
+ $VERBOSE = warn_level
63
+ end
64
+ end
65
+ end
66
+ end
67
+
68
+ # FluentLogger exposes the Fluent logger to the gRPC library.
69
+ module FluentLogger
70
+ def logger
71
+ $log # rubocop:disable Style/GlobalVars
72
+ end
73
+ end
74
+
75
+ # Define a gRPC module-level logger method before grpc/logconfig.rb loads.
76
+ module GRPC
77
+ extend FluentLogger
78
+ end
79
+
80
+ # Disable the nurse/strptime gem used by FluentD's TimeParser class in
81
+ # lib/fluent/time.rb. We found this gem to be slower than the builtin Ruby
82
+ # parser in recent versions of Ruby. Fortunately FluentD will fall back to the
83
+ # builtin parser.
84
+ require 'strptime'
85
+ # Dummy Strptime class.
86
+ class Strptime
87
+ def self.new(_)
88
+ # empty
89
+ end
90
+ end
91
+
92
+ module Fluent
93
+ # fluentd output plugin for the Stackdriver Logging API
94
+ class GoogleCloudOutput < BufferedOutput
95
+ # Constants for configuration.
96
+ module ConfigConstants
97
+ # Default values for JSON payload keys to set the "httpRequest",
98
+ # "operation", "sourceLocation", "trace" fields in the LogEntry.
99
+ DEFAULT_HTTP_REQUEST_KEY = 'httpRequest'.freeze
100
+ DEFAULT_INSERT_ID_KEY = 'logging.googleapis.com/insertId'.freeze
101
+ DEFAULT_LABELS_KEY = 'logging.googleapis.com/labels'.freeze
102
+ DEFAULT_OPERATION_KEY = 'logging.googleapis.com/operation'.freeze
103
+ DEFAULT_SOURCE_LOCATION_KEY =
104
+ 'logging.googleapis.com/sourceLocation'.freeze
105
+ DEFAULT_SPAN_ID_KEY = 'logging.googleapis.com/spanId'.freeze
106
+ DEFAULT_TRACE_KEY = 'logging.googleapis.com/trace'.freeze
107
+ DEFAULT_TRACE_SAMPLED_KEY = 'logging.googleapis.com/trace_sampled'.freeze
108
+ end
109
+
110
+ # Internal constants.
111
+ module InternalConstants
112
+ CREDENTIALS_PATH_ENV_VAR = 'GOOGLE_APPLICATION_CREDENTIALS'.freeze
113
+ DEFAULT_LOGGING_API_URL = 'https://logging.googleapis.com'.freeze
114
+
115
+ # The label name of local_resource_id in the json payload. When a record
116
+ # has this field in the payload, we will use the value to retrieve
117
+ # monitored resource from Stackdriver Metadata agent.
118
+ LOCAL_RESOURCE_ID_KEY = 'logging.googleapis.com/local_resource_id'.freeze
119
+
120
+ # The regexp matches stackdriver trace id format: 32-byte hex string.
121
+ # The format is documented in
122
+ # https://cloud.google.com/trace/docs/reference/v2/rpc/google.devtools.cloudtrace.v1#trace
123
+ STACKDRIVER_TRACE_ID_REGEXP = Regexp.new('^\h{32}$').freeze
124
+
125
+ # Map from each field name under LogEntry to corresponding variables
126
+ # required to perform field value extraction from the log record.
127
+ LOG_ENTRY_FIELDS_MAP = {
128
+ 'http_request' => [
129
+ # The config to specify label name for field extraction from record.
130
+ '@http_request_key',
131
+ # Map from subfields' names to their types.
132
+ [
133
+ # subfield key in the payload, destination key, cast lambda (opt)
134
+ %w[cacheFillBytes cache_fill_bytes parse_int],
135
+ %w[cacheHit cache_hit parse_bool],
136
+ %w[cacheLookup cache_lookup parse_bool],
137
+ %w[cacheValidatedWithOriginServer
138
+ cache_validated_with_origin_server parse_bool],
139
+ %w[latency latency parse_latency],
140
+ %w[protocol protocol parse_string],
141
+ %w[referer referer parse_string],
142
+ %w[remoteIp remote_ip parse_string],
143
+ %w[responseSize response_size parse_int],
144
+ %w[requestMethod request_method parse_string],
145
+ %w[requestSize request_size parse_int],
146
+ %w[requestUrl request_url parse_string],
147
+ %w[serverIp server_ip parse_string],
148
+ %w[status status parse_int],
149
+ %w[userAgent user_agent parse_string]
150
+ ],
151
+ # The grpc version class name.
152
+ 'Google::Cloud::Logging::Type::HttpRequest',
153
+ # The non-grpc version class name.
154
+ 'Google::Apis::LoggingV2::HttpRequest'
155
+ ],
156
+ 'operation' => [
157
+ '@operation_key',
158
+ [
159
+ %w[id id parse_string],
160
+ %w[producer producer parse_string],
161
+ %w[first first parse_bool],
162
+ %w[last last parse_bool]
163
+ ],
164
+ 'Google::Cloud::Logging::V2::LogEntryOperation',
165
+ 'Google::Apis::LoggingV2::LogEntryOperation'
166
+ ],
167
+ 'source_location' => [
168
+ '@source_location_key',
169
+ [
170
+ %w[file file parse_string],
171
+ %w[function function parse_string],
172
+ %w[line line parse_int]
173
+ ],
174
+ 'Google::Cloud::Logging::V2::LogEntrySourceLocation',
175
+ 'Google::Apis::LoggingV2::LogEntrySourceLocation'
176
+ ]
177
+ }.freeze
178
+
179
+ # The name of the WriteLogEntriesPartialErrors field in the error details.
180
+ PARTIAL_ERROR_FIELD =
181
+ 'type.googleapis.com/google.logging.v2.WriteLogEntriesPartialErrors' \
182
+ .freeze
183
+ end
184
+
185
+ include Common::ServiceConstants
186
+ include self::ConfigConstants
187
+ include self::InternalConstants
188
+
189
+ Fluent::Plugin.register_output('google_cloud', self)
190
+
191
+ helpers :server, :timer
192
+
193
+ PLUGIN_NAME = 'Fluentd Google Cloud Logging plugin'.freeze
194
+
195
+ # Follows semver.org format.
196
+ PLUGIN_VERSION = begin
197
+ # Extract plugin version from file path.
198
+ match_data = __FILE__.match(
199
+ %r{fluent-plugin-google-cloud-(?<version>[^/]*)/}
200
+ )
201
+ if match_data
202
+ match_data['version']
203
+ else
204
+ # Extract plugin version by finding the spec this file was loaded from.
205
+ dependency = Gem::Dependency.new('fluent-plugin-google-cloud')
206
+ all_specs, = Gem::SpecFetcher.fetcher.spec_for_dependency(dependency)
207
+ matching_version, = all_specs.grep(
208
+ proc { |spec,| __FILE__.include?(spec.full_gem_path) }
209
+ ) do |spec,|
210
+ spec.version.to_s
211
+ end
212
+ # If no matching version was found, return a valid but obviously wrong
213
+ # value.
214
+ matching_version || '0.0.0-unknown'
215
+ end
216
+ end.freeze
217
+
218
+ # Disable this warning to conform to fluentd config_param conventions.
219
+ # rubocop:disable Style/HashSyntax
220
+
221
+ # Specify project/instance metadata.
222
+ #
223
+ # project_id, zone, and vm_id are required to have valid values, which
224
+ # can be obtained from the metadata service or set explicitly.
225
+ # Otherwise, the plugin will fail to initialize.
226
+ #
227
+ # Note that while 'project id' properly refers to the alphanumeric name
228
+ # of the project, the logging service will also accept the project number,
229
+ # so either one is acceptable in this context.
230
+ #
231
+ # Whether to attempt to obtain metadata from the local metadata service.
232
+ # It is safe to specify 'true' even on platforms with no metadata service.
233
+ config_param :use_metadata_service, :bool, :default => true
234
+ # A compatibility option to enable the legacy behavior of setting the AWS
235
+ # location to the availability zone rather than the region.
236
+ config_param :use_aws_availability_zone, :bool, :default => true
237
+ # These parameters override any values obtained from the metadata service.
238
+ config_param :project_id, :string, :default => nil
239
+ config_param :zone, :string, :default => nil
240
+ config_param :vm_id, :string, :default => nil
241
+ config_param :vm_name, :string, :default => nil
242
+ # Kubernetes-specific parameters, only used to override these values in
243
+ # the fallback path when the metadata agent is temporarily unavailable.
244
+ # They have to match the configuration of the metadata agent.
245
+ config_param :k8s_cluster_name, :string, :default => nil
246
+ config_param :k8s_cluster_location, :string, :default => nil
247
+
248
+ # Map keys from a JSON payload to corresponding LogEntry fields.
249
+ config_param :http_request_key, :string, :default =>
250
+ DEFAULT_HTTP_REQUEST_KEY
251
+ config_param :insert_id_key, :string, :default => DEFAULT_INSERT_ID_KEY
252
+ config_param :labels_key, :string, :default => DEFAULT_LABELS_KEY
253
+ config_param :operation_key, :string, :default => DEFAULT_OPERATION_KEY
254
+ config_param :source_location_key, :string, :default =>
255
+ DEFAULT_SOURCE_LOCATION_KEY
256
+ config_param :span_id_key, :string, :default => DEFAULT_SPAN_ID_KEY
257
+ config_param :trace_key, :string, :default => DEFAULT_TRACE_KEY
258
+ config_param :trace_sampled_key, :string, :default =>
259
+ DEFAULT_TRACE_SAMPLED_KEY
260
+
261
+ # Whether to try to detect if the record is a text log entry with JSON
262
+ # content that needs to be parsed.
263
+ config_param :detect_json, :bool, :default => false
264
+ # TODO(igorpeshansky): Add a parameter for the text field in the payload.
265
+
266
+ # Whether to try to detect if the VM is owned by a "subservice" such as App
267
+ # Engine of Kubernetes, rather than just associating the logs with the
268
+ # compute service of the platform. This currently only has any effect when
269
+ # running on GCE.
270
+ #
271
+ # The initial motivation for this is to separate out Kubernetes node
272
+ # component (Kubelet, etc.) logs from container logs.
273
+ config_param :detect_subservice, :bool, :default => true
274
+ # The subservice_name overrides the subservice detection, if provided.
275
+ config_param :subservice_name, :string, :default => nil
276
+
277
+ # Whether to reject log entries with invalid tags. If this option is set to
278
+ # false, tags will be made valid by converting any non-string tag to a
279
+ # string, and sanitizing any non-utf8 or other invalid characters.
280
+ config_param :require_valid_tags, :bool, :default => false
281
+
282
+ # The regular expression to use on Kubernetes logs to extract some basic
283
+ # information about the log source. The regexp must contain capture groups
284
+ # for pod_name, namespace_name, and container_name.
285
+ config_param :kubernetes_tag_regexp, :string, :default =>
286
+ '\.(?<pod_name>[^_]+)_(?<namespace_name>[^_]+)_(?<container_name>.+)$'
287
+
288
+ # label_map (specified as a JSON object) is an unordered set of fluent
289
+ # field names whose values are sent as labels rather than as part of the
290
+ # struct payload.
291
+ #
292
+ # Each entry in the map is a {"field_name": "label_name"} pair. When
293
+ # the "field_name" (as parsed by the input plugin) is encountered, a label
294
+ # with the corresponding "label_name" is added to the log entry. The
295
+ # value of the field is used as the value of the label.
296
+ #
297
+ # The map gives the user additional flexibility in specifying label
298
+ # names, including the ability to use characters which would not be
299
+ # legal as part of fluent field names.
300
+ #
301
+ # Example:
302
+ # label_map {
303
+ # "field_name_1": "sent_label_name_1",
304
+ # "field_name_2": "some.prefix/sent_label_name_2"
305
+ # }
306
+ config_param :label_map, :hash, :default => nil
307
+
308
+ # labels (specified as a JSON object) is a set of custom labels
309
+ # provided at configuration time. It allows users to inject extra
310
+ # environmental information into every message or to customize
311
+ # labels otherwise detected automatically.
312
+ #
313
+ # Each entry in the map is a {"label_name": "label_value"} pair.
314
+ #
315
+ # Example:
316
+ # labels {
317
+ # "label_name_1": "label_value_1",
318
+ # "label_name_2": "label_value_2"
319
+ # }
320
+ config_param :labels, :hash, :default => nil
321
+
322
+ # Whether to use gRPC instead of REST/JSON to communicate to the
323
+ # Stackdriver Logging API.
324
+ config_param :use_grpc, :bool, :default => false
325
+
326
+ # Whether to enable gRPC compression when communicating with the Stackdriver
327
+ # Logging API. Only used if 'use_grpc' is set to true.
328
+ config_param :grpc_compression_algorithm, :enum,
329
+ list: %i[none gzip],
330
+ :default => nil
331
+
332
+ # Whether valid entries should be written even if some other entries fail
333
+ # due to INVALID_ARGUMENT or PERMISSION_DENIED errors when communicating to
334
+ # the Stackdriver Logging API. This flag is no longer used, and is kept for
335
+ # backwards compatibility, partial_success is enabled for all requests.
336
+ # TODO: Breaking change. Remove this flag in Logging Agent 2.0.0 release.
337
+ config_param :partial_success, :bool,
338
+ :default => true,
339
+ :skip_accessor => true,
340
+ :deprecated => 'This feature is permanently enabled'
341
+
342
+ # Whether to allow non-UTF-8 characters in user logs. If set to true, any
343
+ # non-UTF-8 character would be replaced by the string specified by
344
+ # 'non_utf8_replacement_string'. If set to false, any non-UTF-8 character
345
+ # would trigger the plugin to error out.
346
+ config_param :coerce_to_utf8, :bool, :default => true
347
+
348
+ # If 'coerce_to_utf8' is set to true, any non-UTF-8 character would be
349
+ # replaced by the string specified here.
350
+ config_param :non_utf8_replacement_string, :string, :default => ' '
351
+
352
+ # DEPRECATED: The following parameters, if present in the config
353
+ # indicate that the plugin configuration must be updated.
354
+ config_param :auth_method, :string, :default => nil
355
+ config_param :private_key_email, :string, :default => nil
356
+ config_param :private_key_path, :string, :default => nil
357
+ config_param :private_key_passphrase, :string,
358
+ :default => nil,
359
+ :secret => true
360
+
361
+ # The URL of Stackdriver Logging API. Right now this only works with the
362
+ # gRPC path (use_grpc = true). An unsecured channel is used if the URL
363
+ # scheme is 'http' instead of 'https'. One common use case of this config is
364
+ # to provide a mocked / stubbed Logging API, e.g., http://localhost:52000.
365
+ config_param :logging_api_url, :string, :default => DEFAULT_LOGGING_API_URL
366
+
367
+ # Whether to collect metrics about the plugin usage. The mechanism for
368
+ # collecting and exposing metrics is controlled by the monitoring_type
369
+ # parameter.
370
+ config_param :enable_monitoring, :bool, :default => false
371
+
372
+ # What system to use when collecting metrics. Possible values are:
373
+ # - 'prometheus', in this case default registry in the Prometheus
374
+ # client library is used, without actually exposing the endpoint
375
+ # to serve metrics in the Prometheus format.
376
+ # - 'opencensus', in this case the OpenCensus implementation is
377
+ # used to send metrics directly to Google Cloud Monitoring.
378
+ # - any other value will result in the absence of metrics.
379
+ config_param :monitoring_type, :string,
380
+ :default => Monitoring::PrometheusMonitoringRegistry.name
381
+
382
+ # The monitored resource to use for OpenCensus metrics. Only valid
383
+ # when monitoring_type is set to 'opencensus'. This value is a hash in
384
+ # the form:
385
+ # {"type":"gce_instance","labels":{"instance_id":"aaa","zone":"bbb"} (JSON)
386
+ # or type:gce_instance,labels.instance_id:aaa,labels.zone:bbb (Hash)
387
+ config_param :metrics_resource, :hash,
388
+ :symbolize_keys => true, :default => nil
389
+
390
+ # Whether to call metadata agent to retrieve monitored resource. This flag
391
+ # is kept for backwards compatibility, and is no longer used.
392
+ # TODO: Breaking change. Remove this flag in Logging Agent 2.0.0 release.
393
+ config_param :enable_metadata_agent, :bool,
394
+ :default => false,
395
+ :skip_accessor => true,
396
+ :deprecated => 'This feature is permanently disabled'
397
+
398
+ # The URL of the Metadata Agent. This flag is kept for backwards
399
+ # compatibility, and is no longer used.
400
+ # TODO: Breaking change. Remove this flag in Logging Agent 2.0.0 release.
401
+ config_param :metadata_agent_url, :string,
402
+ :default => nil,
403
+ :skip_accessor => true,
404
+ :deprecated => 'This feature is permanently disabled'
405
+
406
+ # Whether to split log entries with different log tags into different
407
+ # requests when talking to Stackdriver Logging API.
408
+ config_param :split_logs_by_tag, :bool, :default => false
409
+
410
+ # Whether to attempt adjusting invalid log entry timestamps.
411
+ config_param :adjust_invalid_timestamps, :bool, :default => true
412
+
413
+ # Whether to autoformat value of "logging.googleapis.com/trace" to
414
+ # comply with Stackdriver Trace format
415
+ # "projects/[PROJECT-ID]/traces/[TRACE-ID]" when setting
416
+ # LogEntry.trace.
417
+ config_param :autoformat_stackdriver_trace, :bool, :default => true
418
+
419
+ # Port for web server that exposes a /statusz endpoint with
420
+ # diagnostic information in HTML format. If the value is 0,
421
+ # the server is not created.
422
+ config_param :statusz_port, :integer, :default => 0
423
+
424
+ # Override for the Google Cloud Monitoring service hostname, or
425
+ # `nil` to leave as the default.
426
+ config_param :gcm_service_address, :string, :default => nil
427
+
428
+ # rubocop:enable Style/HashSyntax
429
+
430
+ # TODO: Add a log_name config option rather than just using the tag?
431
+
432
+ # Expose attr_readers to make testing of metadata more direct than only
433
+ # testing it indirectly through metadata sent with logs.
434
+ attr_reader :resource, :common_labels, :monitoring_resource
435
+
436
+ def initialize
437
+ super
438
+ # use the global logger
439
+ @log = $log # rubocop:disable Style/GlobalVars
440
+
441
+ @failed_requests_count = nil
442
+ @successful_requests_count = nil
443
+ @dropped_entries_count = nil
444
+ @ingested_entries_count = nil
445
+ @retried_entries_count = nil
446
+
447
+ @ok_code = nil
448
+ @uptime_update_time = Time.now.to_i
449
+ end
450
+
451
+ def configure(conf)
452
+ super
453
+
454
+ # TODO(qingling128): Remove this warning after the support is added. Also
455
+ # remove the comment in the description of this configuration.
456
+ unless @logging_api_url == DEFAULT_LOGGING_API_URL || @use_grpc
457
+ @log.warn 'Detected customized logging_api_url while use_grpc is not' \
458
+ ' enabled. Customized logging_api_url for the non-gRPC path' \
459
+ ' is not supported. The logging_api_url option will be' \
460
+ ' ignored.'
461
+ end
462
+
463
+ # Alert on old authentication configuration.
464
+ unless @auth_method.nil? && @private_key_email.nil? &&
465
+ @private_key_path.nil? && @private_key_passphrase.nil?
466
+ extra = []
467
+ extra << 'auth_method' unless @auth_method.nil?
468
+ extra << 'private_key_email' unless @private_key_email.nil?
469
+ extra << 'private_key_path' unless @private_key_path.nil?
470
+ extra << 'private_key_passphrase' unless @private_key_passphrase.nil?
471
+
472
+ raise Fluent::ConfigError,
473
+ "#{PLUGIN_NAME} no longer supports auth_method.\n" \
474
+ "Please remove configuration parameters: #{extra.join(' ')}"
475
+ end
476
+
477
+ set_regexp_patterns
478
+
479
+ @utils = Common::Utils.new(@log)
480
+
481
+ @platform = @utils.detect_platform(@use_metadata_service)
482
+
483
+ # Treat an empty setting of the credentials file path environment variable
484
+ # as unset. This way the googleauth lib could fetch the credentials
485
+ # following the fallback path.
486
+ ENV.delete(CREDENTIALS_PATH_ENV_VAR) if
487
+ ENV[CREDENTIALS_PATH_ENV_VAR] == ''
488
+
489
+ # Set required variables: @project_id, @vm_id, @vm_name and @zone.
490
+ @project_id = @utils.get_project_id(@platform, @project_id)
491
+ @vm_id = @utils.get_vm_id(@platform, @vm_id)
492
+ @vm_name = @utils.get_vm_name(@vm_name)
493
+ @zone = @utils.get_location(@platform, @zone, @use_aws_availability_zone)
494
+
495
+ # All metadata parameters must now be set.
496
+ @utils.check_required_metadata_variables(
497
+ @platform, @project_id, @zone, @vm_id
498
+ )
499
+
500
+ # Retrieve monitored resource.
501
+ # Fail over to retrieve monitored resource via the legacy path if we fail
502
+ # to get it from Metadata Agent.
503
+ @resource ||= @utils.determine_agent_level_monitored_resource_via_legacy(
504
+ @platform, @subservice_name, @detect_subservice, @vm_id, @zone
505
+ )
506
+
507
+ if @metrics_resource
508
+ unless @metrics_resource[:type].is_a?(String)
509
+ raise Fluent::ConfigError,
510
+ 'metrics_resource.type must be a string:' \
511
+ " #{@metrics_resource}."
512
+ end
513
+ if @metrics_resource.key?(:labels)
514
+ unless @metrics_resource[:labels].is_a?(Hash)
515
+ raise Fluent::ConfigError,
516
+ 'metrics_resource.labels must be a hash:' \
517
+ " #{@metrics_resource}."
518
+ end
519
+ extra_keys = @metrics_resource.reject do |k, _|
520
+ %i[type labels].include?(k)
521
+ end
522
+ unless extra_keys.empty?
523
+ raise Fluent::ConfigError,
524
+ "metrics_resource has unrecognized keys: #{extra_keys.keys}."
525
+ end
526
+ else
527
+ extra_keys = @metrics_resource.reject do |k, _|
528
+ k == :type || k.to_s.start_with?('labels.')
529
+ end
530
+ unless extra_keys.empty?
531
+ raise Fluent::ConfigError,
532
+ "metrics_resource has unrecognized keys: #{extra_keys.keys}."
533
+ end
534
+ # Transform the Hash form of the metrics_resource config if necessary.
535
+ resource_type = @metrics_resource[:type]
536
+ resource_labels = @metrics_resource.each_with_object({}) \
537
+ do |(k, v), h|
538
+ h[k.to_s.sub('labels.', '')] = v if k.to_s.start_with? 'labels.'
539
+ end
540
+ @metrics_resource = { type: resource_type, labels: resource_labels }
541
+ end
542
+ end
543
+
544
+ # If monitoring is enabled, register metrics in the default registry
545
+ # and store metric objects for future use.
546
+ if @enable_monitoring
547
+ unless Monitoring::MonitoringRegistryFactory.supports_monitoring_type(
548
+ @monitoring_type
549
+ )
550
+ @log.warn "monitoring_type '#{@monitoring_type}' is unknown; "\
551
+ 'there will be no metrics'
552
+ end
553
+ @monitoring_resource = if @metrics_resource
554
+ @utils.create_monitored_resource(
555
+ @metrics_resource[:type], @metrics_resource[:labels]
556
+ )
557
+ else
558
+ @resource
559
+ end
560
+ @registry = Monitoring::MonitoringRegistryFactory
561
+ .create(@monitoring_type, @project_id,
562
+ @monitoring_resource, @gcm_service_address)
563
+ # Export metrics every 60 seconds.
564
+ timer_execute(:export_metrics, 60) { @registry.export }
565
+ # Uptime should be a gauge, but the metric definition is a counter and
566
+ # we can't change it.
567
+ @uptime_metric = @registry.counter(
568
+ :uptime, [:version], 'Uptime of Logging agent',
569
+ 'agent.googleapis.com/agent', 'CUMULATIVE'
570
+ )
571
+ update_uptime
572
+ timer_execute(:update_uptime, 1) { update_uptime }
573
+ @successful_requests_count = @registry.counter(
574
+ :stackdriver_successful_requests_count,
575
+ %i[grpc code],
576
+ 'A number of successful requests to the Stackdriver Logging API',
577
+ 'agent.googleapis.com/agent', 'CUMULATIVE'
578
+ )
579
+ @failed_requests_count = @registry.counter(
580
+ :stackdriver_failed_requests_count,
581
+ %i[grpc code],
582
+ 'A number of failed requests to the Stackdriver Logging '\
583
+ 'API, broken down by the error code',
584
+ 'agent.googleapis.com/agent', 'CUMULATIVE'
585
+ )
586
+ @ingested_entries_count = @registry.counter(
587
+ :stackdriver_ingested_entries_count,
588
+ %i[grpc code],
589
+ 'A number of log entries ingested by Stackdriver Logging',
590
+ 'agent.googleapis.com/agent', 'CUMULATIVE'
591
+ )
592
+ @dropped_entries_count = @registry.counter(
593
+ :stackdriver_dropped_entries_count,
594
+ %i[grpc code],
595
+ 'A number of log entries dropped by the Stackdriver output plugin',
596
+ 'agent.googleapis.com/agent', 'CUMULATIVE'
597
+ )
598
+ @retried_entries_count = @registry.counter(
599
+ :stackdriver_retried_entries_count,
600
+ %i[grpc code],
601
+ 'The number of log entries that failed to be ingested by '\
602
+ 'the Stackdriver output plugin due to a transient error '\
603
+ 'and were retried',
604
+ 'agent.googleapis.com/agent', 'CUMULATIVE'
605
+ )
606
+ @ok_code = @use_grpc ? GRPC::Core::StatusCodes::OK : 200
607
+ end
608
+
609
+ # Set regexp that we should match tags against later on. Using a list
610
+ # instead of a map to ensure order.
611
+ @tag_regexp_list = []
612
+ if @resource.type == GKE_CONSTANTS[:resource_type]
613
+ @tag_regexp_list << [
614
+ GKE_CONSTANTS[:resource_type], @compiled_kubernetes_tag_regexp
615
+ ]
616
+ end
617
+
618
+ # Determine the common labels that should be added to all log entries
619
+ # processed by this logging agent.
620
+ @common_labels = determine_agent_level_common_labels(@resource)
621
+
622
+ # The resource and labels are now set up; ensure they can't be modified
623
+ # without first duping them.
624
+ @resource.freeze
625
+ @resource.labels.freeze
626
+ @common_labels.freeze
627
+
628
+ if @use_grpc
629
+ @construct_log_entry = method(:construct_log_entry_in_grpc_format)
630
+ @write_request = method(:write_request_via_grpc)
631
+ else
632
+ @construct_log_entry = method(:construct_log_entry_in_rest_format)
633
+ @write_request = method(:write_request_via_rest)
634
+ end
635
+
636
+ return unless [Common::Platform::GCE, Common::Platform::EC2].include?(@platform)
637
+
638
+ # Log an informational message containing the Logs viewer URL
639
+ @log.info 'Logs viewer address: https://console.cloud.google.com/logs/',
640
+ "viewer?project=#{@project_id}&resource=#{@resource.type}/",
641
+ "instance_id/#{@vm_id}"
642
+ end
643
+
644
+ def start
645
+ super
646
+ init_api_client
647
+ @successful_call = false
648
+ @timenanos_warning = false
649
+
650
+ return unless @statusz_port.positive?
651
+
652
+ @log.info "Starting statusz server on port #{@statusz_port}"
653
+ server_create(:out_google_cloud_statusz,
654
+ @statusz_port,
655
+ bind: '127.0.0.1') do |data, conn|
656
+ if data.split(' ')[1] == '/statusz'
657
+ write_html_response(data, conn, 200, Statusz.response(self))
658
+ else
659
+ write_html_response(data, conn, 404, "Not found\n")
660
+ end
661
+ end
662
+ end
663
+
664
+ def shutdown
665
+ super
666
+ # Export metrics on shutdown. This is a best-effort attempt, and it might
667
+ # fail, for instance if there was a recent write to the same time series.
668
+ @registry&.export
669
+ end
670
+
671
+ def write(chunk)
672
+ grouped_entries = group_log_entries_by_tag_and_local_resource_id(chunk)
673
+
674
+ requests_to_send = []
675
+ grouped_entries.each do |(tag, local_resource_id), arr|
676
+ entries = []
677
+ group_level_resource, group_level_common_labels =
678
+ determine_group_level_monitored_resource_and_labels(
679
+ tag, local_resource_id
680
+ )
681
+
682
+ arr.each do |time, record|
683
+ entry_level_resource, entry_level_common_labels =
684
+ determine_entry_level_monitored_resource_and_labels(
685
+ group_level_resource, group_level_common_labels, record
686
+ )
687
+
688
+ is_json = false
689
+ if @detect_json
690
+ # Save the following fields if available, then clear them out to
691
+ # allow for determining whether we should parse the log or message
692
+ # field.
693
+ # This list should be in sync with
694
+ # https://cloud.google.com/logging/docs/agent/configuration#special-fields.
695
+ preserved_keys = [
696
+ 'time',
697
+ 'timeNanos',
698
+ 'timestamp',
699
+ 'timestampNanos',
700
+ 'timestampSeconds',
701
+ 'severity',
702
+ @http_request_key,
703
+ @insert_id_key,
704
+ @labels_key,
705
+ @operation_key,
706
+ @source_location_key,
707
+ @span_id_key,
708
+ @trace_key,
709
+ @trace_sampled_key
710
+ ]
711
+
712
+ # If the log is json, we want to export it as a structured log
713
+ # unless there is additional metadata that would be lost.
714
+ record_json = nil
715
+ if (record.keys - preserved_keys).length == 1
716
+ %w[log message msg].each do |field|
717
+ record_json = parse_json_or_nil(record[field]) if record.key?(field)
718
+ end
719
+ end
720
+ unless record_json.nil?
721
+ # Propagate these if necessary. Note that we don't want to
722
+ # override these keys in the JSON we've just parsed.
723
+ preserved_keys.each do |key|
724
+ record_json[key] ||= record[key] if
725
+ record.key?(key) && !record_json.key?(key)
726
+ end
727
+
728
+ record = record_json
729
+ is_json = true
730
+ end
731
+ end
732
+
733
+ ts_secs, ts_nanos, timestamp = compute_timestamp(record, time)
734
+ ts_secs, ts_nanos = adjust_timestamp_if_invalid(timestamp, Time.now) \
735
+ if @adjust_invalid_timestamps && timestamp
736
+
737
+ severity = compute_severity(
738
+ entry_level_resource.type, record, entry_level_common_labels
739
+ )
740
+
741
+ dynamic_labels_from_payload = parse_labels(record)
742
+
743
+ if dynamic_labels_from_payload
744
+ entry_level_common_labels.merge!(
745
+ dynamic_labels_from_payload
746
+ )
747
+ end
748
+
749
+ entry = @construct_log_entry.call(entry_level_common_labels,
750
+ entry_level_resource,
751
+ severity,
752
+ ts_secs,
753
+ ts_nanos)
754
+
755
+ insert_id = record.delete(@insert_id_key)
756
+ entry.insert_id = insert_id if insert_id
757
+ span_id = record.delete(@span_id_key)
758
+ entry.span_id = span_id if span_id
759
+ trace = record.delete(@trace_key)
760
+ entry.trace = compute_trace(trace) if trace
761
+ trace_sampled = record.delete(@trace_sampled_key)
762
+ entry.trace_sampled = parse_bool(trace_sampled) unless
763
+ trace_sampled.nil?
764
+
765
+ set_log_entry_fields(record, entry)
766
+ set_payload(entry_level_resource.type, record, entry, is_json)
767
+
768
+ entries.push(entry)
769
+ end
770
+ # Don't send an empty request if we rejected all the entries.
771
+ next if entries.empty?
772
+
773
+ log_name = "projects/#{@project_id}/logs/#{log_name(
774
+ tag, group_level_resource
775
+ )}"
776
+
777
+ requests_to_send << {
778
+ entries: entries,
779
+ log_name: log_name,
780
+ resource: group_level_resource,
781
+ labels: group_level_common_labels
782
+ }
783
+ end
784
+
785
+ if @split_logs_by_tag
786
+ requests_to_send.each do |request|
787
+ @write_request.call(**request)
788
+ end
789
+ else
790
+ # Combine all requests into one. The request level "log_name" will be
791
+ # ported to the entry level. The request level "resource" and "labels"
792
+ # are ignored as they should have been folded into the entry level
793
+ # "resource" and "labels" already anyway.
794
+ combined_entries = []
795
+ requests_to_send.each do |request|
796
+ request[:entries].each do |entry|
797
+ # Modify entries in-place as they are not needed later on.
798
+ entry.log_name = request[:log_name]
799
+ end
800
+ combined_entries.concat(request[:entries])
801
+ end
802
+ @write_request.call(entries: combined_entries) unless
803
+ combined_entries.empty?
804
+ end
805
+ end
806
+
807
+ def multi_workers_ready?
808
+ true
809
+ end
810
+
811
+ def self.version_string
812
+ @version_string ||= "google-fluentd/#{PLUGIN_VERSION}"
813
+ end
814
+
815
+ def update_uptime
816
+ now = Time.now.to_i
817
+ @uptime_metric.increment(
818
+ by: now - @uptime_update_time,
819
+ labels: { version: Fluent::GoogleCloudOutput.version_string }
820
+ )
821
+ @uptime_update_time = now
822
+ end
823
+
824
+ private
825
+
826
+ def write_html_response(data, conn, code, response)
827
+ @log.info "#{conn.remote_host} - - " \
828
+ "#{Time.now.strftime('%d/%b/%Y:%H:%M:%S %z')} " \
829
+ "\"#{data.lines.first.strip}\" #{code} #{response.bytesize}"
830
+ conn.write "HTTP/1.1 #{code}\r\n"
831
+ conn.write "Content-Type: text/html\r\n"
832
+ conn.write "Content-Length: #{response.bytesize}\r\n"
833
+ conn.write "\r\n"
834
+ conn.write response
835
+ end
836
+
837
+ def compute_trace(trace)
838
+ return trace unless @autoformat_stackdriver_trace &&
839
+ STACKDRIVER_TRACE_ID_REGEXP.match(trace)
840
+
841
+ "projects/#{@project_id}/traces/#{trace}"
842
+ end
843
+
844
+ def construct_log_entry_in_grpc_format(labels,
845
+ resource,
846
+ severity,
847
+ ts_secs,
848
+ ts_nanos)
849
+ entry = Google::Cloud::Logging::V2::LogEntry.new(
850
+ labels: labels,
851
+ resource: Google::Api::MonitoredResource.new(
852
+ type: resource.type,
853
+ labels: resource.labels.to_h
854
+ ),
855
+ severity: grpc_severity(severity)
856
+ )
857
+ # If "seconds" is null or not an integer, we will omit the timestamp
858
+ # field and defer the decision on how to handle it to the downstream
859
+ # Logging API. If "nanos" is null or not an integer, it will be set
860
+ # to 0.
861
+ if ts_secs.is_a?(Integer)
862
+ ts_nanos = 0 unless ts_nanos.is_a?(Integer)
863
+ entry.timestamp = Google::Protobuf::Timestamp.new(
864
+ seconds: ts_secs,
865
+ nanos: ts_nanos
866
+ )
867
+ end
868
+ entry
869
+ end
870
+
871
+ def construct_log_entry_in_rest_format(labels,
872
+ resource,
873
+ severity,
874
+ ts_secs,
875
+ ts_nanos)
876
+ # Remove the labels if we didn't populate them with anything.
877
+ resource.labels = nil if resource.labels.empty?
878
+ Google::Apis::LoggingV2::LogEntry.new(
879
+ labels: labels,
880
+ resource: resource,
881
+ severity: severity,
882
+ timestamp: {
883
+ seconds: ts_secs,
884
+ nanos: ts_nanos
885
+ }
886
+ )
887
+ end
888
+
889
+ def write_request_via_grpc(entries:,
890
+ log_name: '',
891
+ resource: nil,
892
+ labels: {})
893
+ client = api_client
894
+ entries_count = entries.length
895
+ client.write_log_entries(
896
+ entries: entries,
897
+ log_name: log_name,
898
+ # Leave resource nil if it's nil.
899
+ resource: if resource
900
+ Google::Api::MonitoredResource.new(
901
+ type: resource.type,
902
+ labels: resource.labels.to_h
903
+ )
904
+ end,
905
+ labels: labels.map do |k, v|
906
+ [k.encode('utf-8'), convert_to_utf8(v)]
907
+ end.to_h,
908
+ partial_success: true
909
+ )
910
+ increment_successful_requests_count
911
+ increment_ingested_entries_count(entries_count)
912
+
913
+ # Let the user explicitly know when the first call succeeded, to
914
+ # aid with verification and troubleshooting.
915
+ unless @successful_call
916
+ @successful_call = true
917
+ @log.info 'Successfully sent gRPC to Stackdriver Logging API.'
918
+ end
919
+ rescue Google::Cloud::Error => e
920
+ # GRPC::BadStatus is wrapped in error.cause.
921
+ error = e.cause
922
+
923
+ # See the mapping between HTTP status and gRPC status code at:
924
+ # https://github.com/grpc/grpc/blob/master/src/core/lib/transport/status_conversion.cc
925
+ case error
926
+ # Server error, so retry via re-raising the error.
927
+ when \
928
+ # HTTP status 500 (Internal Server Error).
929
+ GRPC::Internal,
930
+ # HTTP status 501 (Not Implemented).
931
+ GRPC::Unimplemented,
932
+ # HTTP status 503 (Service Unavailable).
933
+ GRPC::Unavailable,
934
+ # HTTP status 504 (Gateway Timeout).
935
+ GRPC::DeadlineExceeded
936
+ increment_retried_entries_count(entries_count, error.code)
937
+ @log.debug "Retrying #{entries_count} log message(s) later.",
938
+ error: error.to_s, error_code: error.code.to_s
939
+ raise error
940
+
941
+ # Most client errors indicate a problem with the request itself and
942
+ # should not be retried.
943
+ when \
944
+ # HTTP status 401 (Unauthorized).
945
+ # These are usually solved via a `gcloud auth` call, or by modifying
946
+ # the permissions on the Google Cloud project.
947
+ GRPC::Unauthenticated,
948
+ # HTTP status 404 (Not Found).
949
+ GRPC::NotFound,
950
+ # HTTP status 409 (Conflict).
951
+ GRPC::Aborted,
952
+ # HTTP status 412 (Precondition Failed).
953
+ GRPC::FailedPrecondition,
954
+ # HTTP status 429 (Too Many Requests).
955
+ GRPC::ResourceExhausted,
956
+ # HTTP status 499 (Client Closed Request).
957
+ GRPC::Cancelled,
958
+ # the remaining http codes in both 4xx and 5xx category.
959
+ # It's debatable whether to retry or drop these log entries.
960
+ # This decision is made to avoid retrying forever due to
961
+ # client errors.
962
+ GRPC::Unknown
963
+ increment_failed_requests_count(error.code)
964
+ increment_dropped_entries_count(entries_count, error.code)
965
+ @log.warn "Dropping #{entries_count} log message(s)",
966
+ error: error.to_s, error_code: error.code.to_s
967
+
968
+ # As partial_success is enabled, valid entries should have been
969
+ # written even if some other entries fail due to InvalidArgument or
970
+ # PermissionDenied errors. Only invalid entries will be dropped.
971
+ when \
972
+ # HTTP status 400 (Bad Request).
973
+ GRPC::InvalidArgument,
974
+ # HTTP status 403 (Forbidden).
975
+ GRPC::PermissionDenied
976
+ error_details_map = construct_error_details_map_grpc(e)
977
+ if error_details_map.empty?
978
+ increment_failed_requests_count(error.code)
979
+ increment_dropped_entries_count(entries_count, error.code)
980
+ @log.warn "Dropping #{entries_count} log message(s)",
981
+ error: error.to_s, error_code: error.code.to_s
982
+ else
983
+ error_details_map.each do |(error_code, error_message), indexes|
984
+ partial_errors_count = indexes.length
985
+ increment_dropped_entries_count(partial_errors_count,
986
+ error_code)
987
+ entries_count -= partial_errors_count
988
+ @log.warn "Dropping #{partial_errors_count} log message(s)",
989
+ error: error_message, error_code: error_code.to_s
990
+ end
991
+ # Consider partially successful requests successful.
992
+ increment_successful_requests_count
993
+ increment_ingested_entries_count(entries_count)
994
+ end
995
+
996
+ else
997
+ # Assume it's a problem with the request itself and don't retry.
998
+ error_code = if error.respond_to?(:code)
999
+ error.code
1000
+ else
1001
+ GRPC::Core::StatusCodes::UNKNOWN
1002
+ end
1003
+ increment_failed_requests_count(error_code)
1004
+ increment_dropped_entries_count(entries_count, error_code)
1005
+ @log.error "Unknown response code #{error_code} from the server," \
1006
+ " dropping #{entries_count} log message(s)",
1007
+ error: error.to_s, error_code: error_code.to_s
1008
+ end
1009
+
1010
+ # Got an unexpected error (not Google::Cloud::Error) from the
1011
+ # google-cloud-logging lib.
1012
+ rescue StandardError => e
1013
+ increment_failed_requests_count(GRPC::Core::StatusCodes::UNKNOWN)
1014
+ increment_dropped_entries_count(entries_count,
1015
+ GRPC::Core::StatusCodes::UNKNOWN)
1016
+ @log.error "Unexpected error type #{e.class.name} from the client" \
1017
+ " library, dropping #{entries_count} log message(s)",
1018
+ error: e.to_s
1019
+ end
1020
+
1021
+ def write_request_via_rest(entries:,
1022
+ log_name: '',
1023
+ resource: nil,
1024
+ labels: {})
1025
+ client = api_client
1026
+ entries_count = entries.length
1027
+ client.write_entry_log_entries(
1028
+ Google::Apis::LoggingV2::WriteLogEntriesRequest.new(
1029
+ entries: entries,
1030
+ log_name: log_name,
1031
+ resource: resource,
1032
+ labels: labels,
1033
+ partial_success: true
1034
+ ),
1035
+ options: { api_format_version: '2' }
1036
+ )
1037
+ increment_successful_requests_count
1038
+ increment_ingested_entries_count(entries_count)
1039
+
1040
+ # Let the user explicitly know when the first call succeeded, to aid
1041
+ # with verification and troubleshooting.
1042
+ unless @successful_call
1043
+ @successful_call = true
1044
+ @log.info 'Successfully sent to Stackdriver Logging API.'
1045
+ end
1046
+ rescue Google::Apis::ServerError => e
1047
+ # 5xx server errors. Retry via re-raising the error.
1048
+ increment_retried_entries_count(entries_count, e.status_code)
1049
+ @log.debug "Retrying #{entries_count} log message(s) later.",
1050
+ error: e.to_s, error_code: e.status_code.to_s
1051
+ raise e
1052
+ rescue Google::Apis::AuthorizationError => e
1053
+ # 401 authorization error.
1054
+ # These are usually solved via a `gcloud auth` call, or by modifying
1055
+ # the permissions on the Google Cloud project.
1056
+ increment_failed_requests_count(e.status_code)
1057
+ increment_dropped_entries_count(entries_count, e.status_code)
1058
+ @log.warn "Dropping #{entries_count} log message(s)",
1059
+ error: e.to_s, error_code: e.status_code.to_s
1060
+ rescue Google::Apis::ClientError => e
1061
+ # 4xx client errors. Most client errors indicate a problem with the
1062
+ # request itself and should not be retried.
1063
+ error_details_map = construct_error_details_map(e)
1064
+ if error_details_map.empty?
1065
+ increment_failed_requests_count(e.status_code)
1066
+ increment_dropped_entries_count(entries_count, e.status_code)
1067
+ @log.warn "Dropping #{entries_count} log message(s)",
1068
+ error: e.to_s, error_code: e.status_code.to_s
1069
+ else
1070
+ error_details_map.each do |(error_code, error_message), indexes|
1071
+ partial_errors_count = indexes.length
1072
+ increment_dropped_entries_count(partial_errors_count, error_code)
1073
+ entries_count -= partial_errors_count
1074
+ @log.warn "Dropping #{partial_errors_count} log message(s)",
1075
+ error: error_message,
1076
+ error_code: "google.rpc.Code[#{error_code}]"
1077
+ end
1078
+ # Consider partially successful requests successful.
1079
+ increment_successful_requests_count
1080
+ increment_ingested_entries_count(entries_count)
1081
+ end
1082
+ end
1083
+
1084
+ def parse_json_or_nil(input)
1085
+ return nil unless input.is_a?(String)
1086
+
1087
+ input.each_codepoint do |c|
1088
+ if c == 123
1089
+ # left curly bracket (U+007B)
1090
+ begin
1091
+ return JSON.parse(input)
1092
+ rescue JSON::ParserError
1093
+ return nil
1094
+ end
1095
+ else
1096
+ # Break (and return nil) unless the current character is whitespace,
1097
+ # in which case we continue to look for a left curly bracket.
1098
+ # Whitespace as per the JSON spec are: tabulation (U+0009),
1099
+ # line feed (U+000A), carriage return (U+000D), and space (U+0020).
1100
+ break unless [9, 10, 13, 32].include?(c)
1101
+ end
1102
+ end
1103
+ nil
1104
+ end
1105
+
1106
+ # Set regexp patterns to parse tags and logs.
1107
+ def set_regexp_patterns
1108
+ @compiled_kubernetes_tag_regexp = Regexp.new(@kubernetes_tag_regexp) if
1109
+ @kubernetes_tag_regexp
1110
+
1111
+ @compiled_http_latency_regexp =
1112
+ /^\s*(?<seconds>\d+)(?<decimal>\.\d+)?\s*s\s*$/
1113
+ end
1114
+
1115
+ # Determine the common labels that should be added to all log entries
1116
+ # processed by this logging agent.
1117
+ def determine_agent_level_common_labels(resource)
1118
+ labels = {}
1119
+ # User can specify labels via config. We want to capture those as well.
1120
+ labels.merge!(@labels) if @labels
1121
+
1122
+ case resource.type
1123
+ # GAE, Cloud Dataflow, Cloud Dataproc and Cloud ML.
1124
+ when APPENGINE_CONSTANTS[:resource_type],
1125
+ DATAFLOW_CONSTANTS[:resource_type],
1126
+ DATAPROC_CONSTANTS[:resource_type],
1127
+ ML_CONSTANTS[:resource_type]
1128
+ labels.merge!(
1129
+ "#{COMPUTE_CONSTANTS[:service]}/resource_id" => @vm_id,
1130
+ "#{COMPUTE_CONSTANTS[:service]}/resource_name" => @vm_name,
1131
+ "#{COMPUTE_CONSTANTS[:service]}/zone" => @zone
1132
+ )
1133
+
1134
+ # GCE instance and GKE container.
1135
+ when COMPUTE_CONSTANTS[:resource_type],
1136
+ GKE_CONSTANTS[:resource_type]
1137
+ labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
1138
+
1139
+ # EC2.
1140
+ when EC2_CONSTANTS[:resource_type]
1141
+ labels["#{EC2_CONSTANTS[:service]}/resource_name"] = @vm_name
1142
+ end
1143
+ labels
1144
+ end
1145
+
1146
+ # Group the log entries by tag and local_resource_id pairs. Also filter out
1147
+ # invalid non-Hash entries.
1148
+ def group_log_entries_by_tag_and_local_resource_id(chunk)
1149
+ groups = {}
1150
+ chunk.msgpack_each do |tag, time, record|
1151
+ unless record.is_a?(Hash)
1152
+ @log.warn 'Dropping log entries with malformed record: ' \
1153
+ "'#{record.inspect}' from tag '#{tag}' at '#{time}'. " \
1154
+ 'A log record should be in JSON format.'
1155
+ next
1156
+ end
1157
+ sanitized_tag = sanitize_tag(tag)
1158
+ if sanitized_tag.nil?
1159
+ @log.warn "Dropping log entries with invalid tag: '#{tag.inspect}'." \
1160
+ ' A tag should be a string with utf8 characters.'
1161
+ next
1162
+ end
1163
+ local_resource_id = record.delete(LOCAL_RESOURCE_ID_KEY)
1164
+ # A nil local_resource_id means "fall back to legacy".
1165
+ hash_key = [sanitized_tag, local_resource_id].freeze
1166
+ groups[hash_key] ||= []
1167
+ groups[hash_key].push([time, record])
1168
+ end
1169
+ groups
1170
+ end
1171
+
1172
+ # Determine the group level monitored resource and common labels shared by a
1173
+ # collection of entries.
1174
+ def determine_group_level_monitored_resource_and_labels(tag,
1175
+ local_resource_id)
1176
+ resource = @resource.dup
1177
+ resource.labels = @resource.labels.dup
1178
+ common_labels = @common_labels.dup
1179
+
1180
+ # Change the resource type and set matched_regexp_group if the tag matches
1181
+ # certain regexp.
1182
+ matched_regexp_group = nil # @tag_regexp_list can be an empty list.
1183
+ @tag_regexp_list.each do |derived_type, tag_regexp|
1184
+ matched_regexp_group = tag_regexp.match(tag)
1185
+ if matched_regexp_group
1186
+ resource.type = derived_type
1187
+ break
1188
+ end
1189
+ end
1190
+
1191
+ # Determine the monitored resource based on the local_resource_id.
1192
+ # Different monitored resource types have unique ids in different format.
1193
+ # We will query Metadata Agent for the monitored resource. Return the
1194
+ # legacy monitored resource (either the instance resource or the resource
1195
+ # inferred from the tag) if failed to get a monitored resource from
1196
+ # Metadata Agent with this key.
1197
+ #
1198
+ # Examples:
1199
+ # // GKE Pod.
1200
+ # "k8s_pod.<namespace_name>.<pod_name>"
1201
+ # // GKE container.
1202
+ # "k8s_container.<namespace_name>.<pod_name>.<container_name>"
1203
+ if local_resource_id
1204
+ converted_resource = monitored_resource_from_local_resource_id(
1205
+ local_resource_id
1206
+ )
1207
+ resource = converted_resource if converted_resource
1208
+ end
1209
+
1210
+ # Once the resource type is settled down, determine the labels.
1211
+ case resource.type
1212
+ # GKE container.
1213
+ when GKE_CONSTANTS[:resource_type]
1214
+ if matched_regexp_group
1215
+ # We only expect one occurrence of each key in the match group.
1216
+ resource_labels_candidates =
1217
+ matched_regexp_group.names.zip(matched_regexp_group.captures).to_h
1218
+ common_labels_candidates = resource_labels_candidates.dup
1219
+ resource.labels.merge!(
1220
+ delete_and_extract_labels(
1221
+ resource_labels_candidates,
1222
+ # The kubernetes_tag_regexp is poorly named. 'namespace_name' is
1223
+ # in fact 'namespace_id'. 'pod_name' is in fact 'pod_id'.
1224
+ # TODO(qingling128): Figure out how to put this map into
1225
+ # constants like GKE_CONSTANTS[:extra_resource_labels].
1226
+ 'container_name' => 'container_name',
1227
+ 'namespace_name' => 'namespace_id',
1228
+ 'pod_name' => 'pod_id'
1229
+ )
1230
+ )
1231
+
1232
+ common_labels.merge!(
1233
+ delete_and_extract_labels(
1234
+ common_labels_candidates,
1235
+ GKE_CONSTANTS[:extra_common_labels]
1236
+ .map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h
1237
+ )
1238
+ )
1239
+ end
1240
+
1241
+ # TODO(qingling128): Temporary fallback for metadata agent restarts.
1242
+ # K8s resources.
1243
+ when K8S_CONTAINER_CONSTANTS[:resource_type],
1244
+ K8S_POD_CONSTANTS[:resource_type],
1245
+ K8S_NODE_CONSTANTS[:resource_type]
1246
+ common_labels.delete("#{COMPUTE_CONSTANTS[:service]}/resource_name")
1247
+
1248
+ end
1249
+
1250
+ # Cloud Dataflow and Cloud ML.
1251
+ # These labels can be set via the 'labels' option.
1252
+ # Report them as monitored resource labels instead of common labels.
1253
+ # e.g. "dataflow.googleapis.com/job_id" => "job_id"
1254
+ [DATAFLOW_CONSTANTS, ML_CONSTANTS].each do |service_constants|
1255
+ next unless resource.type == service_constants[:resource_type]
1256
+
1257
+ resource.labels.merge!(
1258
+ delete_and_extract_labels(
1259
+ common_labels, service_constants[:extra_resource_labels]
1260
+ .map { |l| ["#{service_constants[:service]}/#{l}", l] }.to_h
1261
+ )
1262
+ )
1263
+ end
1264
+
1265
+ resource.freeze
1266
+ resource.labels.freeze
1267
+ common_labels.freeze
1268
+
1269
+ [resource, common_labels]
1270
+ end
1271
+
1272
+ # Extract entry level monitored resource and common labels that should be
1273
+ # applied to individual entries.
1274
+ def determine_entry_level_monitored_resource_and_labels(
1275
+ group_level_resource, group_level_common_labels, record
1276
+ )
1277
+ resource = group_level_resource.dup
1278
+ resource.labels = group_level_resource.labels.dup
1279
+ common_labels = group_level_common_labels.dup
1280
+
1281
+ case resource.type
1282
+ # GKE container.
1283
+ when GKE_CONSTANTS[:resource_type]
1284
+ # Move the stdout/stderr annotation from the record into a label.
1285
+ common_labels.merge!(
1286
+ delete_and_extract_labels(
1287
+ record, 'stream' => "#{GKE_CONSTANTS[:service]}/stream"
1288
+ )
1289
+ )
1290
+
1291
+ # If the record has been annotated by the kubernetes_metadata_filter
1292
+ # plugin, then use that metadata. Otherwise, rely on commonLabels
1293
+ # populated from the group's tag.
1294
+ if record.key?('kubernetes')
1295
+ resource.labels.merge!(
1296
+ delete_and_extract_labels(
1297
+ record['kubernetes'], GKE_CONSTANTS[:extra_resource_labels]
1298
+ .map { |l| [l, l] }.to_h
1299
+ )
1300
+ )
1301
+ common_labels.merge!(
1302
+ delete_and_extract_labels(
1303
+ record['kubernetes'], GKE_CONSTANTS[:extra_common_labels]
1304
+ .map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h
1305
+ )
1306
+ )
1307
+ # Prepend label/ to all user-defined labels' keys.
1308
+ if record['kubernetes'].key?('labels')
1309
+ common_labels.merge!(
1310
+ delete_and_extract_labels(
1311
+ record['kubernetes']['labels'], record['kubernetes']['labels']
1312
+ .map { |key, _| [key, "label/#{key}"] }.to_h
1313
+ )
1314
+ )
1315
+ end
1316
+ # We've explicitly consumed all the fields we care about -- don't
1317
+ # litter the log entries with the remaining fields that the kubernetes
1318
+ # metadata filter plugin includes (or an empty 'kubernetes' field).
1319
+ record.delete('kubernetes')
1320
+ record.delete('docker')
1321
+ end
1322
+ end
1323
+
1324
+ # If the name of a field in the record is present in the @label_map
1325
+ # configured by users, report its value as a label and do not send that
1326
+ # field as part of the payload.
1327
+ common_labels.merge!(delete_and_extract_labels(record, @label_map))
1328
+
1329
+ # Cloud Dataflow and Cloud ML.
1330
+ # These labels can be set via the 'labels' or 'label_map' options.
1331
+ # Report them as monitored resource labels instead of common labels.
1332
+ # e.g. "dataflow.googleapis.com/job_id" => "job_id"
1333
+ [DATAFLOW_CONSTANTS, ML_CONSTANTS].each do |service_constants|
1334
+ next unless resource.type == service_constants[:resource_type]
1335
+
1336
+ resource.labels.merge!(
1337
+ delete_and_extract_labels(
1338
+ common_labels, service_constants[:extra_resource_labels]
1339
+ .map { |l| ["#{service_constants[:service]}/#{l}", l] }.to_h
1340
+ )
1341
+ )
1342
+ end
1343
+
1344
+ [resource, common_labels]
1345
+ end
1346
+
1347
+ def time_or_nil(ts_secs, ts_nanos)
1348
+ Time.at((Integer ts_secs), (Integer ts_nanos) / 1_000.0)
1349
+ rescue ArgumentError, TypeError
1350
+ nil
1351
+ end
1352
+
1353
+ def compute_timestamp(record, time)
1354
+ if record.key?('timestamp') &&
1355
+ record['timestamp'].is_a?(Hash) &&
1356
+ record['timestamp'].key?('seconds') &&
1357
+ record['timestamp'].key?('nanos')
1358
+ ts_secs = record['timestamp']['seconds']
1359
+ ts_nanos = record['timestamp']['nanos']
1360
+ record.delete('timestamp')
1361
+ timestamp = time_or_nil(ts_secs, ts_nanos)
1362
+ elsif record.key?('timestampSeconds') &&
1363
+ record.key?('timestampNanos')
1364
+ ts_secs = record.delete('timestampSeconds')
1365
+ ts_nanos = record.delete('timestampNanos')
1366
+ timestamp = time_or_nil(ts_secs, ts_nanos)
1367
+ elsif record.key?('timeNanos')
1368
+ # This is deprecated since the precision is insufficient.
1369
+ # Use timestampSeconds/timestampNanos instead
1370
+ nanos = record.delete('timeNanos')
1371
+ ts_secs = (nanos / 1_000_000_000).to_i
1372
+ ts_nanos = nanos % 1_000_000_000
1373
+ unless @timenanos_warning
1374
+ # Warn the user this is deprecated, but only once to avoid spam.
1375
+ @timenanos_warning = true
1376
+ @log.warn 'timeNanos is deprecated - please use ' \
1377
+ 'timestampSeconds and timestampNanos instead.'
1378
+ end
1379
+ timestamp = time_or_nil(ts_secs, ts_nanos)
1380
+ elsif record.key?('time')
1381
+ # k8s ISO8601 timestamp
1382
+ begin
1383
+ timestamp = Time.iso8601(record.delete('time'))
1384
+ rescue StandardError
1385
+ timestamp = Time.at(time)
1386
+ end
1387
+ ts_secs = timestamp.tv_sec
1388
+ ts_nanos = timestamp.tv_nsec
1389
+ else
1390
+ timestamp = Time.at(time)
1391
+ ts_secs = timestamp.tv_sec
1392
+ ts_nanos = timestamp.tv_nsec
1393
+ end
1394
+ ts_secs = begin
1395
+ Integer ts_secs
1396
+ rescue ArgumentError, TypeError
1397
+ ts_secs
1398
+ end
1399
+ ts_nanos = begin
1400
+ Integer ts_nanos
1401
+ rescue ArgumentError, TypeError
1402
+ ts_nanos
1403
+ end
1404
+
1405
+ [ts_secs, ts_nanos, timestamp]
1406
+ end
1407
+
1408
+ # Adjust timestamps from the future.
1409
+ # The base case is:
1410
+ # 0. The parsed timestamp is less than one day into the future.
1411
+ # This is allowed by the API, and should be left unchanged.
1412
+ #
1413
+ # Beyond that, there are two cases:
1414
+ # 1. The parsed timestamp is later in the current year:
1415
+ # This can happen when system log lines from previous years are missing
1416
+ # the year, so the date parser assumes the current year.
1417
+ # We treat these lines as coming from last year. This could label
1418
+ # 2-year-old logs incorrectly, but this probably isn't super important.
1419
+ #
1420
+ # 2. The parsed timestamp is past the end of the current year:
1421
+ # Since the year is different from the current year, this isn't the
1422
+ # missing year in system logs. It is unlikely that users explicitly
1423
+ # write logs at a future date. This could result from an unsynchronized
1424
+ # clock on a VM, or some random value being parsed as the timestamp.
1425
+ # We reset the timestamp on those lines to the default value and let the
1426
+ # downstream API handle it.
1427
+ def adjust_timestamp_if_invalid(timestamp, current_time)
1428
+ ts_secs = timestamp.tv_sec
1429
+ ts_nanos = timestamp.tv_nsec
1430
+
1431
+ next_year = Time.mktime(current_time.year + 1)
1432
+ one_day_later = current_time.to_datetime.next_day.to_time
1433
+ if timestamp < one_day_later # Case 0.
1434
+ # Leave the timestamp as-is.
1435
+ elsif timestamp >= next_year # Case 2.
1436
+ ts_secs = 0
1437
+ ts_nanos = 0
1438
+ else # Case 1.
1439
+ adjusted_timestamp = timestamp.to_datetime.prev_year.to_time
1440
+ ts_secs = adjusted_timestamp.tv_sec
1441
+ # The value of ts_nanos should not change when subtracting a year.
1442
+ end
1443
+
1444
+ [ts_secs, ts_nanos]
1445
+ end
1446
+
1447
+ def compute_severity(resource_type, record, entry_level_common_labels)
1448
+ if record.key?('severity')
1449
+ return parse_severity(record.delete('severity'))
1450
+ elsif resource_type == GKE_CONSTANTS[:resource_type]
1451
+ stream = entry_level_common_labels["#{GKE_CONSTANTS[:service]}/stream"]
1452
+ return GKE_CONSTANTS[:stream_severity_map].fetch(stream, 'DEFAULT')
1453
+ end
1454
+
1455
+ 'DEFAULT'
1456
+ end
1457
+
1458
+ def set_log_entry_fields(record, entry)
1459
+ # TODO(qingling128) On the next major after 0.7.4, make all logEntry
1460
+ # subfields behave the same way: if the field is not in the correct
1461
+ # format, log an error in the Fluentd log and remove this field from
1462
+ # payload. This is the preferred behavior per PM decision.
1463
+ LOG_ENTRY_FIELDS_MAP.each do |field_name, config|
1464
+ payload_key, subfields, grpc_class, non_grpc_class = config
1465
+ begin
1466
+ payload_key = instance_variable_get(payload_key)
1467
+ fields = record[payload_key]
1468
+ record.delete(payload_key) if fields.nil?
1469
+ next unless fields.is_a?(Hash)
1470
+
1471
+ extracted_subfields = subfields.each_with_object({}) \
1472
+ do |(original_key, destination_key, cast_fn), extracted_fields|
1473
+ value = fields.delete(original_key)
1474
+ next if value.nil?
1475
+
1476
+ begin
1477
+ casted_value = send(cast_fn, value)
1478
+ rescue TypeError
1479
+ @log.error "Failed to #{cast_fn} for #{field_name}." \
1480
+ "#{original_key} with value #{value.inspect}.", err
1481
+ next
1482
+ end
1483
+ next if casted_value.nil?
1484
+
1485
+ extracted_fields[destination_key] = casted_value
1486
+ end
1487
+
1488
+ next unless extracted_subfields
1489
+
1490
+ output = if @use_grpc
1491
+ Object.const_get(grpc_class).new
1492
+ else
1493
+ Object.const_get(non_grpc_class).new
1494
+ end
1495
+ extracted_subfields.each do |key, value|
1496
+ output.send("#{key}=", value)
1497
+ end
1498
+
1499
+ record.delete(payload_key) if fields.empty?
1500
+
1501
+ entry.send("#{field_name}=", output)
1502
+ rescue StandardError => e
1503
+ @log.error "Failed to set log entry field for #{field_name}.", e
1504
+ end
1505
+ end
1506
+ end
1507
+
1508
+ # Parse labels. Return nil if not set.
1509
+ def parse_labels(record)
1510
+ payload_labels = record.delete(@labels_key)
1511
+ return nil unless payload_labels
1512
+
1513
+ unless payload_labels.is_a?(Hash)
1514
+ @log.error "Invalid value of '#{@labels_key}' in the payload: " \
1515
+ "#{payload_labels}. Labels need to be a JSON object."
1516
+ return nil
1517
+ end
1518
+
1519
+ non_string_keys = payload_labels.each_with_object([]) do |(k, v), a|
1520
+ a << k unless k.is_a?(String) && v.is_a?(String)
1521
+ end
1522
+ unless non_string_keys.empty?
1523
+ @log.error "Invalid value of '#{@labels_key}' in the payload: " \
1524
+ "#{payload_labels}. Labels need string values for all " \
1525
+ "keys; keys #{non_string_keys} don't."
1526
+ return nil
1527
+ end
1528
+ payload_labels
1529
+ rescue StandardError => e
1530
+ @log.error "Failed to extract '#{@labels_key}' from payload.", e
1531
+ nil
1532
+ end
1533
+
1534
+ # Values permitted by the API for 'severity' (which is an enum).
1535
+ VALID_SEVERITIES = Set.new(
1536
+ %w[DEFAULT DEBUG INFO NOTICE WARNING ERROR CRITICAL ALERT EMERGENCY]
1537
+ ).freeze
1538
+
1539
+ # Translates other severity strings to one of the valid values above.
1540
+ SEVERITY_TRANSLATIONS = {
1541
+ # log4j levels (both current and obsolete).
1542
+ 'WARN' => 'WARNING',
1543
+ 'FATAL' => 'CRITICAL',
1544
+ 'TRACE' => 'DEBUG',
1545
+ 'TRACE_INT' => 'DEBUG',
1546
+ 'FINE' => 'DEBUG',
1547
+ 'FINER' => 'DEBUG',
1548
+ 'FINEST' => 'DEBUG',
1549
+ # java.util.logging levels (only missing ones from above listed).
1550
+ 'SEVERE' => 'ERROR',
1551
+ 'CONFIG' => 'DEBUG',
1552
+ # nginx levels (only missing ones from above listed).
1553
+ 'CRIT' => 'CRITICAL',
1554
+ 'EMERG' => 'EMERGENCY',
1555
+ # single-letter levels. Note E->ERROR and D->DEBUG.
1556
+ 'D' => 'DEBUG',
1557
+ 'I' => 'INFO',
1558
+ 'N' => 'NOTICE',
1559
+ 'W' => 'WARNING',
1560
+ 'E' => 'ERROR',
1561
+ 'C' => 'CRITICAL',
1562
+ 'A' => 'ALERT',
1563
+ # other misc. translations.
1564
+ 'INFORMATION' => 'INFO',
1565
+ 'ERR' => 'ERROR',
1566
+ 'F' => 'CRITICAL'
1567
+ }.freeze
1568
+
1569
+ def parse_severity(severity_str)
1570
+ # The API is case insensitive, but uppercase to make things simpler.
1571
+ severity = severity_str.to_s.upcase.strip
1572
+
1573
+ # If the severity is already valid, just return it.
1574
+ return severity if VALID_SEVERITIES.include?(severity)
1575
+
1576
+ # If the severity is an integer (string) return it as an integer,
1577
+ # truncated to the closest valid value (multiples of 100 between 0-800).
1578
+ if /\A\d+\z/ =~ severity
1579
+ begin
1580
+ numeric_severity = (severity.to_i / 100) * 100
1581
+ case
1582
+ when numeric_severity.negative?
1583
+ return 0
1584
+ when numeric_severity > 800
1585
+ return 800
1586
+ else
1587
+ return numeric_severity
1588
+ end
1589
+ rescue StandardError
1590
+ return 'DEFAULT'
1591
+ end
1592
+ end
1593
+
1594
+ # Try to translate the severity.
1595
+ return SEVERITY_TRANSLATIONS[severity] if SEVERITY_TRANSLATIONS.key?(severity)
1596
+
1597
+ # If all else fails, use 'DEFAULT'.
1598
+ 'DEFAULT'
1599
+ end
1600
+
1601
+ GRPC_SEVERITY_MAPPING = {
1602
+ 'DEFAULT' => Google::Cloud::Logging::Type::LogSeverity::DEFAULT,
1603
+ 'DEBUG' => Google::Cloud::Logging::Type::LogSeverity::DEBUG,
1604
+ 'INFO' => Google::Cloud::Logging::Type::LogSeverity::INFO,
1605
+ 'NOTICE' => Google::Cloud::Logging::Type::LogSeverity::NOTICE,
1606
+ 'WARNING' => Google::Cloud::Logging::Type::LogSeverity::WARNING,
1607
+ 'ERROR' => Google::Cloud::Logging::Type::LogSeverity::ERROR,
1608
+ 'CRITICAL' => Google::Cloud::Logging::Type::LogSeverity::CRITICAL,
1609
+ 'ALERT' => Google::Cloud::Logging::Type::LogSeverity::ALERT,
1610
+ 'EMERGENCY' => Google::Cloud::Logging::Type::LogSeverity::EMERGENCY,
1611
+ 0 => Google::Cloud::Logging::Type::LogSeverity::DEFAULT,
1612
+ 100 => Google::Cloud::Logging::Type::LogSeverity::DEBUG,
1613
+ 200 => Google::Cloud::Logging::Type::LogSeverity::INFO,
1614
+ 300 => Google::Cloud::Logging::Type::LogSeverity::NOTICE,
1615
+ 400 => Google::Cloud::Logging::Type::LogSeverity::WARNING,
1616
+ 500 => Google::Cloud::Logging::Type::LogSeverity::ERROR,
1617
+ 600 => Google::Cloud::Logging::Type::LogSeverity::CRITICAL,
1618
+ 700 => Google::Cloud::Logging::Type::LogSeverity::ALERT,
1619
+ 800 => Google::Cloud::Logging::Type::LogSeverity::EMERGENCY
1620
+ }.freeze
1621
+
1622
+ def grpc_severity(severity)
1623
+ # TODO: find out why this doesn't work.
1624
+ # if severity.is_a? String
1625
+ # return Google::Cloud::Logging::Type::LogSeverity.resolve(severity)
1626
+ # end
1627
+ return GRPC_SEVERITY_MAPPING[severity] if GRPC_SEVERITY_MAPPING.key?(severity)
1628
+
1629
+ severity
1630
+ end
1631
+
1632
+ def parse_string(value)
1633
+ value.to_s
1634
+ end
1635
+
1636
+ def parse_int(value)
1637
+ value.to_i
1638
+ end
1639
+
1640
+ def parse_bool(value)
1641
+ [true, 'true', 1].include?(value)
1642
+ end
1643
+
1644
+ def parse_latency(latency)
1645
+ # Parse latency.
1646
+ # If no valid format is detected, return nil so we can later skip
1647
+ # setting latency.
1648
+ # Format: whitespace (opt.) + integer + point & decimal (opt.)
1649
+ # + whitespace (opt.) + "s" + whitespace (opt.)
1650
+ # e.g.: "1.42 s"
1651
+ match = @compiled_http_latency_regexp.match(latency)
1652
+ return nil unless match
1653
+
1654
+ # Split the integer and decimal parts in order to calculate
1655
+ # seconds and nanos.
1656
+ seconds = match['seconds'].to_i
1657
+ nanos = (match['decimal'].to_f * 1000 * 1000 * 1000).round
1658
+ if @use_grpc
1659
+ Google::Protobuf::Duration.new(
1660
+ seconds: seconds,
1661
+ nanos: nanos
1662
+ )
1663
+ else
1664
+ {
1665
+ seconds: seconds,
1666
+ nanos: nanos
1667
+ }.delete_if { |_, v| v.zero? }
1668
+ end
1669
+ end
1670
+
1671
+ def format(tag, time, record)
1672
+ Fluent::MessagePackFactory
1673
+ .engine_factory
1674
+ .packer
1675
+ .write([tag, time, record])
1676
+ .to_s
1677
+ end
1678
+
1679
+ # Given a tag, returns the corresponding valid tag if possible, or nil if
1680
+ # the tag should be rejected. If 'require_valid_tags' is false, non-string
1681
+ # tags are converted to strings, and invalid characters are sanitized;
1682
+ # otherwise such tags are rejected.
1683
+ def sanitize_tag(tag)
1684
+ if @require_valid_tags &&
1685
+ (!tag.is_a?(String) || tag == '' || convert_to_utf8(tag) != tag)
1686
+ return nil
1687
+ end
1688
+
1689
+ tag = convert_to_utf8(tag.to_s)
1690
+ tag = '_' if tag == ''
1691
+ tag
1692
+ end
1693
+
1694
+ # For every original_label => new_label pair in the label_map, delete the
1695
+ # original_label from the hash map if it exists, and extract the value to
1696
+ # form a map with the new_label as the key.
1697
+ def delete_and_extract_labels(hash, label_map)
1698
+ return {} if label_map.nil? || !label_map.is_a?(Hash) ||
1699
+ hash.nil? || !hash.is_a?(Hash)
1700
+
1701
+ label_map.each_with_object({}) \
1702
+ do |(original_label, new_label), extracted_labels|
1703
+ value = hash.delete(original_label)
1704
+ extracted_labels[new_label] = convert_to_utf8(value.to_s) if value
1705
+ end
1706
+ end
1707
+
1708
+ def value_from_ruby(value)
1709
+ ret = Google::Protobuf::Value.new
1710
+ case value
1711
+ when NilClass
1712
+ ret.null_value = 0
1713
+ when Numeric
1714
+ ret.number_value = value
1715
+ when String
1716
+ ret.string_value = convert_to_utf8(value)
1717
+ when TrueClass
1718
+ ret.bool_value = true
1719
+ when FalseClass
1720
+ ret.bool_value = false
1721
+ when Google::Protobuf::Struct
1722
+ ret.struct_value = value
1723
+ when Hash
1724
+ ret.struct_value = struct_from_ruby(value)
1725
+ when Google::Protobuf::ListValue
1726
+ ret.list_value = value
1727
+ when Array
1728
+ ret.list_value = list_from_ruby(value)
1729
+ else
1730
+ @log.error "Unknown type: #{value.class}"
1731
+ raise Google::Protobuf::Error, "Unknown type: #{value.class}"
1732
+ end
1733
+ ret
1734
+ end
1735
+
1736
+ def list_from_ruby(arr)
1737
+ ret = Google::Protobuf::ListValue.new
1738
+ arr.each do |v|
1739
+ ret.values << value_from_ruby(v)
1740
+ end
1741
+ ret
1742
+ end
1743
+
1744
+ def struct_from_ruby(hash)
1745
+ ret = Google::Protobuf::Struct.new
1746
+ hash.each do |k, v|
1747
+ ret.fields[convert_to_utf8(k.to_s)] ||= value_from_ruby(v)
1748
+ end
1749
+ ret
1750
+ end
1751
+
1752
+ # TODO(qingling128): Fix the inconsistent behavior of 'message', 'log' and
1753
+ # 'msg' in the next major version 1.0.0.
1754
+ def set_payload(resource_type, record, entry, is_json)
1755
+ # Only one of {text_payload, json_payload} will be set.
1756
+ text_payload = nil
1757
+ json_payload = nil
1758
+ # Use JSON if we found valid JSON, or text payload in the following
1759
+ # cases:
1760
+ # 1. This is an unstructured Container log and the 'log' key is available
1761
+ # 2. The only remaining key is 'message'
1762
+ if is_json
1763
+ json_payload = record
1764
+ elsif GKE_CONSTANTS[:resource_type] == resource_type && record.key?('log')
1765
+ text_payload = record['log']
1766
+ elsif record.size == 1 && record.key?('message')
1767
+ text_payload = record['message']
1768
+ else
1769
+ json_payload = record
1770
+ end
1771
+
1772
+ if json_payload
1773
+ entry.json_payload = if @use_grpc
1774
+ struct_from_ruby(json_payload)
1775
+ else
1776
+ json_payload
1777
+ end
1778
+ elsif text_payload
1779
+ text_payload = text_payload.to_s
1780
+ entry.text_payload = if @use_grpc
1781
+ convert_to_utf8(text_payload)
1782
+ else
1783
+ text_payload
1784
+ end
1785
+ end
1786
+ end
1787
+
1788
+ def log_name(tag, resource)
1789
+ if resource.type == APPENGINE_CONSTANTS[:resource_type]
1790
+ # Add a prefix to Managed VM logs to prevent namespace collisions.
1791
+ tag = "#{APPENGINE_CONSTANTS[:service]}/#{tag}"
1792
+ elsif resource.type == GKE_CONSTANTS[:resource_type]
1793
+ # For Kubernetes logs, use just the container name as the log name
1794
+ # if we have it.
1795
+ if resource.labels&.key?('container_name')
1796
+ sanitized_tag = sanitize_tag(resource.labels['container_name'])
1797
+ tag = sanitized_tag unless sanitized_tag.nil?
1798
+ end
1799
+ end
1800
+ ERB::Util.url_encode(tag)
1801
+ end
1802
+
1803
+ def init_api_client
1804
+ # Set up the logger for the auto-generated Google Cloud APIs.
1805
+ Google::Apis.logger = @log
1806
+ if @use_grpc
1807
+ uri = URI.parse(@logging_api_url)
1808
+ host = uri.host
1809
+ unless host
1810
+ raise Fluent::ConfigError,
1811
+ 'The logging_api_url option specifies an invalid URL:' \
1812
+ " #{@logging_api_url}."
1813
+ end
1814
+ if @grpc_compression_algorithm
1815
+ compression_options =
1816
+ GRPC::Core::CompressionOptions.new(
1817
+ default_algorithm: @grpc_compression_algorithm
1818
+ )
1819
+ compression_channel_args = compression_options.to_channel_arg_hash
1820
+ else
1821
+ compression_channel_args = {}
1822
+ end
1823
+ if uri.scheme == 'https'
1824
+ ssl_creds = GRPC::Core::ChannelCredentials.new
1825
+ authentication = Google::Auth.get_application_default
1826
+ creds = GRPC::Core::CallCredentials.new(authentication.updater_proc)
1827
+ creds = ssl_creds.compose(creds)
1828
+ else
1829
+ creds = :this_channel_is_insecure
1830
+ end
1831
+ port = ":#{uri.port}" if uri.port
1832
+ user_agent = \
1833
+ "#{PLUGIN_NAME}/#{PLUGIN_VERSION} grpc-ruby/#{GRPC::VERSION} " \
1834
+ "#{Google::Apis::OS_VERSION}"
1835
+ channel_args = { 'grpc.primary_user_agent' => user_agent }
1836
+ .merge!(compression_channel_args)
1837
+ @client = Google::Cloud::Logging::V2::LoggingService::Client.new do |config|
1838
+ config.credentials = GRPC::Core::Channel.new(
1839
+ "#{host}#{port}", channel_args, creds
1840
+ )
1841
+ end
1842
+ else
1843
+ # TODO: Use a non-default ClientOptions object.
1844
+ Google::Apis::ClientOptions.default.application_name = PLUGIN_NAME
1845
+ Google::Apis::ClientOptions.default.application_version = PLUGIN_VERSION
1846
+ @client = Google::Apis::LoggingV2::LoggingService.new
1847
+ @client.authorization = Google::Auth.get_application_default(
1848
+ Common::LOGGING_SCOPE
1849
+ )
1850
+ end
1851
+ end
1852
+
1853
+ def api_client
1854
+ # For gRPC side, the Channel will take care of tokens and their renewal
1855
+ # (https://grpc.io/docs/guides/auth.html#authentication-api).
1856
+ if !@use_grpc && @client.authorization.expired?
1857
+ begin
1858
+ @client.authorization.fetch_access_token!
1859
+ rescue MultiJson::ParseError
1860
+ # Workaround an issue in the API client; just re-raise a more
1861
+ # descriptive error for the user (which will still cause a retry).
1862
+ raise Google::APIClient::ClientError,
1863
+ 'Unable to fetch access token (no scopes configured?)'
1864
+ end
1865
+ end
1866
+ @client
1867
+ end
1868
+
1869
+ # Encode as UTF-8. If 'coerce_to_utf8' is set to true in the config, any
1870
+ # non-UTF-8 character would be replaced by the string specified by
1871
+ # 'non_utf8_replacement_string'. If 'coerce_to_utf8' is set to false, any
1872
+ # non-UTF-8 character would trigger the plugin to error out.
1873
+ def convert_to_utf8(input)
1874
+ if @coerce_to_utf8
1875
+ input.encode(
1876
+ 'utf-8',
1877
+ invalid: :replace,
1878
+ undef: :replace,
1879
+ replace: @non_utf8_replacement_string
1880
+ )
1881
+ else
1882
+ begin
1883
+ input.encode('utf-8')
1884
+ rescue EncodingError
1885
+ @log.error 'Encountered encoding issues potentially due to non ' \
1886
+ 'UTF-8 characters. To allow non-UTF-8 characters and ' \
1887
+ 'replace them with spaces, please set "coerce_to_utf8" ' \
1888
+ 'to true.'
1889
+ raise
1890
+ end
1891
+ end
1892
+ end
1893
+
1894
+ # Extract a map of error details from a potentially partially successful
1895
+ # REST request.
1896
+ #
1897
+ # The keys in this map are [error_code, error_message] pairs, and the values
1898
+ # are a list of stringified indexes of log entries that failed due to this
1899
+ # error.
1900
+ #
1901
+ # A sample error.body looks like:
1902
+ # {
1903
+ # "error": {
1904
+ # "code": 403,
1905
+ # "message": "User not authorized.",
1906
+ # "status": "PERMISSION_DENIED",
1907
+ # "details": [
1908
+ # {
1909
+ # "@type": "type.googleapis.com/google.logging.v2.WriteLogEntriesPar
1910
+ # tialErrors",
1911
+ # "logEntryErrors": {
1912
+ # "0": {
1913
+ # "code": 7,
1914
+ # "message": "User not authorized."
1915
+ # },
1916
+ # "1": {
1917
+ # "code": 3,
1918
+ # "message": "Log name contains illegal character :"
1919
+ # },
1920
+ # "3": {
1921
+ # "code": 3,
1922
+ # "message": "Log name contains illegal character :"
1923
+ # }
1924
+ # }
1925
+ # },
1926
+ # {
1927
+ # "@type": "type.googleapis.com/google.rpc.DebugInfo",
1928
+ # "detail": ...
1929
+ # }
1930
+ # ]
1931
+ # }
1932
+ # }
1933
+ #
1934
+ # The root level "code", "message", and "status" simply match the root
1935
+ # cause of the first failed log entry. For example, if we switched the order
1936
+ # of the log entries, then we would get:
1937
+ # {
1938
+ # "error" : {
1939
+ # "code" : 400,
1940
+ # "message" : "Log name contains illegal character :",
1941
+ # "status" : "INVALID_ARGUMENT",
1942
+ # "details": ...
1943
+ # }
1944
+ # }
1945
+ # We will ignore it anyway and look at the details instead which includes
1946
+ # info for all failed log entries.
1947
+ #
1948
+ # In this example, the logEntryErrors that we care are:
1949
+ # {
1950
+ # "0": {
1951
+ # "code": 7,
1952
+ # "message": "User not authorized."
1953
+ # },
1954
+ # "1": {
1955
+ # "code": 3,
1956
+ # "message": "Log name contains illegal character :"
1957
+ # },
1958
+ # "3": {
1959
+ # "code": 3,
1960
+ # "message": "Log name contains illegal character :"
1961
+ # }
1962
+ # }
1963
+ #
1964
+ # The ultimate map that is constructed is:
1965
+ # {
1966
+ # [7, 'User not authorized.']: ['0'],
1967
+ # [3, 'Log name contains illegal character :']: ['1', '3']
1968
+ # }
1969
+ def construct_error_details_map(error)
1970
+ error_details_map = Hash.new { |h, k| h[k] = [] }
1971
+
1972
+ error_details = ensure_array(
1973
+ ensure_hash(ensure_hash(JSON.parse(error.body))['error'])['details']
1974
+ )
1975
+ partial_errors = error_details.detect(
1976
+ -> { raise JSON::ParserError, "No type #{PARTIAL_ERROR_FIELD}." }
1977
+ ) do |error_detail|
1978
+ ensure_hash(error_detail)['@type'] == PARTIAL_ERROR_FIELD
1979
+ end
1980
+ log_entry_errors = ensure_hash(
1981
+ ensure_hash(partial_errors)['logEntryErrors']
1982
+ )
1983
+ log_entry_errors.each do |index, log_entry_error|
1984
+ error_hash = ensure_hash(log_entry_error)
1985
+ unless error_hash['code'] && error_hash['message']
1986
+ raise JSON::ParserError,
1987
+ "Entry #{index} is missing 'code' or 'message'."
1988
+ end
1989
+ error_key = [error_hash['code'], error_hash['message']].freeze
1990
+ # TODO(qingling128): Convert indexes to integers.
1991
+ error_details_map[error_key] << index
1992
+ end
1993
+ error_details_map
1994
+ rescue JSON::ParserError => e
1995
+ @log.warn 'Failed to extract log entry errors from the error details:' \
1996
+ " #{error.body}.", error: e
1997
+ {}
1998
+ end
1999
+
2000
+ # Extract a map of error details from a potentially partially successful
2001
+ # gRPC request.
2002
+ #
2003
+ # The keys in this map are [error_code, error_message] pairs, and the values
2004
+ # are a list of indexes of log entries that failed due to this error.
2005
+ #
2006
+ # A sample error looks like:
2007
+ # <Google::Cloud::PermissionDeniedError:
2008
+ # message: 'User not authorized.',
2009
+ # details: [
2010
+ # <Google::Cloud::Logging::V2::WriteLogEntriesPartialErrors:
2011
+ # log_entry_errors: {
2012
+ # 0 => <Google::Rpc::Status:
2013
+ # code: 7,
2014
+ # message: "User not authorized.",
2015
+ # details: []>,
2016
+ # 1 => <Google::Rpc::Status:
2017
+ # code: 3,
2018
+ # message: "Log name contains illegal character :",
2019
+ # details: []>,
2020
+ # 3 => <Google::Rpc::Status:
2021
+ # code: 3,
2022
+ # message: "Log name contains illegal character :",
2023
+ # details: []>
2024
+ # }
2025
+ # >,
2026
+ # <Google::Rpc::DebugInfo:
2027
+ # stack_entries: [],
2028
+ # detail: "..."
2029
+ # >
2030
+ # ]
2031
+ # cause: <GRPC::PermissionDenied: 7:User not authorized.>
2032
+ # }
2033
+ #
2034
+ # The ultimate map that is constructed is:
2035
+ # {
2036
+ # [7, 'User not authorized.']: [0],
2037
+ # [3, 'Log name contains illegal character :']: [1, 3]
2038
+ # }
2039
+ def construct_error_details_map_grpc(gax_error)
2040
+ @log.error "construct_error_details_map_grpc: #{gax_error}"
2041
+ error_details_map = Hash.new { |h, k| h[k] = [] }
2042
+ error_details = ensure_array(gax_error.status_details)
2043
+ raise JSON::ParserError, 'The error details are empty.' if
2044
+ error_details.empty?
2045
+ raise JSON::ParserError, 'No partial error info in error details.' unless
2046
+ error_details[0].is_a?(
2047
+ Google::Cloud::Logging::V2::WriteLogEntriesPartialErrors
2048
+ )
2049
+
2050
+ log_entry_errors = ensure_hash(error_details[0].log_entry_errors)
2051
+ log_entry_errors.each do |index, log_entry_error|
2052
+ error_key = [log_entry_error[:code], log_entry_error[:message]].freeze
2053
+ error_details_map[error_key] << index
2054
+ end
2055
+ error_details_map
2056
+ rescue JSON::ParserError => e
2057
+ @log.warn 'Failed to extract log entry errors from the error details:' \
2058
+ " #{gax_error.details.inspect}.", error: e
2059
+ {}
2060
+ end
2061
+
2062
+ # Take a locally unique resource id and convert it to the globally unique
2063
+ # monitored resource.
2064
+ def monitored_resource_from_local_resource_id(local_resource_id)
2065
+ return unless
2066
+ /^
2067
+ (?<resource_type>k8s_container)
2068
+ \.(?<namespace_name>[0-9a-z-]+)
2069
+ \.(?<pod_name>[.0-9a-z-]+)
2070
+ \.(?<container_name>[0-9a-z-]+)$/x =~ local_resource_id ||
2071
+ /^
2072
+ (?<resource_type>k8s_pod)
2073
+ \.(?<namespace_name>[0-9a-z-]+)
2074
+ \.(?<pod_name>[.0-9a-z-]+)$/x =~ local_resource_id ||
2075
+ /^
2076
+ (?<resource_type>k8s_node)
2077
+ \.(?<node_name>[0-9a-z-]+)$/x =~ local_resource_id
2078
+
2079
+ # Clear name and location if they're explicitly set to empty.
2080
+ @k8s_cluster_name = nil if @k8s_cluster_name == ''
2081
+ @k8s_cluster_location = nil if @k8s_cluster_location == ''
2082
+
2083
+ begin
2084
+ @k8s_cluster_name ||= @utils.fetch_gce_metadata(
2085
+ @platform, 'instance/attributes/cluster-name'
2086
+ )
2087
+ @k8s_cluster_location ||= @utils.fetch_gce_metadata(
2088
+ @platform, 'instance/attributes/cluster-location'
2089
+ )
2090
+ rescue StandardError => e
2091
+ @log.error 'Failed to retrieve k8s cluster name and location.', \
2092
+ error: e
2093
+ end
2094
+ case resource_type
2095
+ when K8S_CONTAINER_CONSTANTS[:resource_type]
2096
+ labels = {
2097
+ 'namespace_name' => namespace_name,
2098
+ 'pod_name' => pod_name,
2099
+ 'container_name' => container_name,
2100
+ 'cluster_name' => @k8s_cluster_name,
2101
+ 'location' => @k8s_cluster_location
2102
+ }
2103
+ fallback_resource = GKE_CONSTANTS[:resource_type]
2104
+ when K8S_POD_CONSTANTS[:resource_type]
2105
+ labels = {
2106
+ 'namespace_name' => namespace_name,
2107
+ 'pod_name' => pod_name,
2108
+ 'cluster_name' => @k8s_cluster_name,
2109
+ 'location' => @k8s_cluster_location
2110
+ }
2111
+ fallback_resource = GKE_CONSTANTS[:resource_type]
2112
+ when K8S_NODE_CONSTANTS[:resource_type]
2113
+ labels = {
2114
+ 'node_name' => node_name,
2115
+ 'cluster_name' => @k8s_cluster_name,
2116
+ 'location' => @k8s_cluster_location
2117
+ }
2118
+ fallback_resource = COMPUTE_CONSTANTS[:resource_type]
2119
+ end
2120
+ unless @k8s_cluster_name && @k8s_cluster_location
2121
+ @log.error "Failed to construct #{resource_type} resource locally." \
2122
+ ' Falling back to writing logs against' \
2123
+ " #{fallback_resource} resource.", error: e
2124
+ return
2125
+ end
2126
+ constructed_resource = Google::Apis::LoggingV2::MonitoredResource.new(
2127
+ type: resource_type,
2128
+ labels: labels
2129
+ )
2130
+ @log.debug("Constructed #{resource_type} resource locally: " \
2131
+ "#{constructed_resource.inspect}")
2132
+ constructed_resource
2133
+ end
2134
+
2135
+ # Convert the value to a Ruby array.
2136
+ def ensure_array(value)
2137
+ Array.try_convert(value) || (raise JSON::ParserError, value.class.to_s)
2138
+ end
2139
+
2140
+ # Convert the value to a Ruby hash.
2141
+ def ensure_hash(value)
2142
+ Hash.try_convert(value) || (raise JSON::ParserError, value.class.to_s)
2143
+ end
2144
+
2145
+ # Increment the metric for the number of successful requests.
2146
+ def increment_successful_requests_count
2147
+ return unless @successful_requests_count
2148
+
2149
+ @successful_requests_count.increment(
2150
+ labels: { grpc: @use_grpc, code: @ok_code }
2151
+ )
2152
+ end
2153
+
2154
+ # Increment the metric for the number of failed requests, labeled by
2155
+ # the provided status code.
2156
+ def increment_failed_requests_count(code)
2157
+ return unless @failed_requests_count
2158
+
2159
+ @failed_requests_count.increment(
2160
+ labels: { grpc: @use_grpc, code: code }
2161
+ )
2162
+ end
2163
+
2164
+ # Increment the metric for the number of log entries, successfully
2165
+ # ingested by the Stackdriver Logging API.
2166
+ def increment_ingested_entries_count(count)
2167
+ return unless @ingested_entries_count
2168
+
2169
+ @ingested_entries_count.increment(
2170
+ labels: { grpc: @use_grpc, code: @ok_code }, by: count
2171
+ )
2172
+ end
2173
+
2174
+ # Increment the metric for the number of log entries that were dropped
2175
+ # and not ingested by the Stackdriver Logging API.
2176
+ def increment_dropped_entries_count(count, code)
2177
+ return unless @dropped_entries_count
2178
+
2179
+ @dropped_entries_count.increment(
2180
+ labels: { grpc: @use_grpc, code: code }, by: count
2181
+ )
2182
+ end
2183
+
2184
+ # Increment the metric for the number of log entries that were dropped
2185
+ # and not ingested by the Stackdriver Logging API.
2186
+ def increment_retried_entries_count(count, code)
2187
+ return unless @retried_entries_count
2188
+
2189
+ @retried_entries_count.increment(
2190
+ labels: { grpc: @use_grpc, code: code }, by: count
2191
+ )
2192
+ end
2193
+ end
2194
+ end
2195
+
2196
+ module Google
2197
+ module Apis
2198
+ module LoggingV2
2199
+ # Override MonitoredResource::dup to make a deep copy.
2200
+ class MonitoredResource
2201
+ def dup
2202
+ ret = super
2203
+ ret.labels = labels.dup
2204
+ ret
2205
+ end
2206
+ end
2207
+ end
2208
+ end
2209
+ end