fluent-plugin-vadimberezniker-gcp 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CONTRIBUTING +24 -0
- data/Gemfile +3 -0
- data/LICENSE +201 -0
- data/README.rdoc +53 -0
- data/Rakefile +43 -0
- data/fluent-plugin-google-cloud.gemspec +43 -0
- data/fluent-plugin-vadimberezniker-gcp-0.13.2.gem +0 -0
- data/lib/fluent/plugin/common.rb +399 -0
- data/lib/fluent/plugin/filter_add_insert_ids.rb +86 -0
- data/lib/fluent/plugin/filter_analyze_config.rb +410 -0
- data/lib/fluent/plugin/in_object_space_dump.rb +62 -0
- data/lib/fluent/plugin/monitoring.rb +265 -0
- data/lib/fluent/plugin/out_google_cloud.rb +2209 -0
- data/lib/fluent/plugin/statusz.rb +124 -0
- data/test/helper.rb +46 -0
- data/test/plugin/asserts.rb +87 -0
- data/test/plugin/base_test.rb +2680 -0
- data/test/plugin/constants.rb +1114 -0
- data/test/plugin/data/c31e573fd7f62ed495c9ca3821a5a85cb036dee1-privatekey.p12 +0 -0
- data/test/plugin/data/credentials.json +7 -0
- data/test/plugin/data/google-fluentd-baseline.conf +24 -0
- data/test/plugin/data/google-fluentd-custom.conf +40 -0
- data/test/plugin/data/iam-credentials.json +11 -0
- data/test/plugin/data/invalid_credentials.json +8 -0
- data/test/plugin/data/new-style-credentials.json +12 -0
- data/test/plugin/test_driver.rb +56 -0
- data/test/plugin/test_filter_add_insert_ids.rb +137 -0
- data/test/plugin/test_filter_analyze_config.rb +257 -0
- data/test/plugin/test_out_google_cloud.rb +465 -0
- data/test/plugin/test_out_google_cloud_grpc.rb +478 -0
- data/test/plugin/utils.rb +148 -0
- metadata +347 -0
@@ -0,0 +1,2209 @@
|
|
1
|
+
# Copyright 2014 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
require 'cgi'
|
15
|
+
require 'erb'
|
16
|
+
require 'grpc'
|
17
|
+
require 'json'
|
18
|
+
require 'open-uri'
|
19
|
+
require 'socket'
|
20
|
+
require 'time'
|
21
|
+
require 'yaml'
|
22
|
+
require 'google/apis'
|
23
|
+
require 'google/cloud/errors'
|
24
|
+
require 'google/apis/logging_v2'
|
25
|
+
require 'google/cloud/logging/v2'
|
26
|
+
require 'google/logging/v2/logging_pb'
|
27
|
+
require 'google/logging/v2/logging_services_pb'
|
28
|
+
require 'google/logging/v2/log_entry_pb'
|
29
|
+
require 'googleauth'
|
30
|
+
|
31
|
+
require_relative 'common'
|
32
|
+
require_relative 'monitoring'
|
33
|
+
require_relative 'statusz'
|
34
|
+
|
35
|
+
module Google
|
36
|
+
module Protobuf
|
37
|
+
# Alias the has_key? method to have the same interface as a regular map.
|
38
|
+
class Map
|
39
|
+
alias key? has_key?
|
40
|
+
alias to_hash to_h
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
module Google
|
46
|
+
module Auth
|
47
|
+
# Disable gcloud lookup in googleauth to avoid picking up its project id.
|
48
|
+
module CredentialsLoader
|
49
|
+
# Set $VERBOSE to nil to mute the "already initialized constant" warnings.
|
50
|
+
warn_level = $VERBOSE
|
51
|
+
begin
|
52
|
+
$VERBOSE = nil
|
53
|
+
# These constants are used to invoke gcloud on Linux and Windows,
|
54
|
+
# respectively. Ideally, we would have overridden
|
55
|
+
# CredentialsLoader.load_gcloud_project_id, but we cannot catch it
|
56
|
+
# before it's invoked via "require 'googleauth'". So we override the
|
57
|
+
# constants instead.
|
58
|
+
GCLOUD_POSIX_COMMAND = '/bin/true'.freeze
|
59
|
+
GCLOUD_WINDOWS_COMMAND = 'cd .'.freeze
|
60
|
+
GCLOUD_CONFIG_COMMAND = ''.freeze
|
61
|
+
ensure
|
62
|
+
$VERBOSE = warn_level
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# FluentLogger exposes the Fluent logger to the gRPC library.
|
69
|
+
module FluentLogger
|
70
|
+
def logger
|
71
|
+
$log # rubocop:disable Style/GlobalVars
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Define a gRPC module-level logger method before grpc/logconfig.rb loads.
|
76
|
+
module GRPC
|
77
|
+
extend FluentLogger
|
78
|
+
end
|
79
|
+
|
80
|
+
# Disable the nurse/strptime gem used by FluentD's TimeParser class in
|
81
|
+
# lib/fluent/time.rb. We found this gem to be slower than the builtin Ruby
|
82
|
+
# parser in recent versions of Ruby. Fortunately FluentD will fall back to the
|
83
|
+
# builtin parser.
|
84
|
+
require 'strptime'
|
85
|
+
# Dummy Strptime class.
|
86
|
+
class Strptime
|
87
|
+
def self.new(_)
|
88
|
+
# empty
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
module Fluent
|
93
|
+
# fluentd output plugin for the Stackdriver Logging API
|
94
|
+
class GoogleCloudOutput < BufferedOutput
|
95
|
+
# Constants for configuration.
|
96
|
+
module ConfigConstants
|
97
|
+
# Default values for JSON payload keys to set the "httpRequest",
|
98
|
+
# "operation", "sourceLocation", "trace" fields in the LogEntry.
|
99
|
+
DEFAULT_HTTP_REQUEST_KEY = 'httpRequest'.freeze
|
100
|
+
DEFAULT_INSERT_ID_KEY = 'logging.googleapis.com/insertId'.freeze
|
101
|
+
DEFAULT_LABELS_KEY = 'logging.googleapis.com/labels'.freeze
|
102
|
+
DEFAULT_OPERATION_KEY = 'logging.googleapis.com/operation'.freeze
|
103
|
+
DEFAULT_SOURCE_LOCATION_KEY =
|
104
|
+
'logging.googleapis.com/sourceLocation'.freeze
|
105
|
+
DEFAULT_SPAN_ID_KEY = 'logging.googleapis.com/spanId'.freeze
|
106
|
+
DEFAULT_TRACE_KEY = 'logging.googleapis.com/trace'.freeze
|
107
|
+
DEFAULT_TRACE_SAMPLED_KEY = 'logging.googleapis.com/trace_sampled'.freeze
|
108
|
+
end
|
109
|
+
|
110
|
+
# Internal constants.
|
111
|
+
module InternalConstants
|
112
|
+
CREDENTIALS_PATH_ENV_VAR = 'GOOGLE_APPLICATION_CREDENTIALS'.freeze
|
113
|
+
DEFAULT_LOGGING_API_URL = 'https://logging.googleapis.com'.freeze
|
114
|
+
|
115
|
+
# The label name of local_resource_id in the json payload. When a record
|
116
|
+
# has this field in the payload, we will use the value to retrieve
|
117
|
+
# monitored resource from Stackdriver Metadata agent.
|
118
|
+
LOCAL_RESOURCE_ID_KEY = 'logging.googleapis.com/local_resource_id'.freeze
|
119
|
+
|
120
|
+
# The regexp matches stackdriver trace id format: 32-byte hex string.
|
121
|
+
# The format is documented in
|
122
|
+
# https://cloud.google.com/trace/docs/reference/v2/rpc/google.devtools.cloudtrace.v1#trace
|
123
|
+
STACKDRIVER_TRACE_ID_REGEXP = Regexp.new('^\h{32}$').freeze
|
124
|
+
|
125
|
+
# Map from each field name under LogEntry to corresponding variables
|
126
|
+
# required to perform field value extraction from the log record.
|
127
|
+
LOG_ENTRY_FIELDS_MAP = {
|
128
|
+
'http_request' => [
|
129
|
+
# The config to specify label name for field extraction from record.
|
130
|
+
'@http_request_key',
|
131
|
+
# Map from subfields' names to their types.
|
132
|
+
[
|
133
|
+
# subfield key in the payload, destination key, cast lambda (opt)
|
134
|
+
%w[cacheFillBytes cache_fill_bytes parse_int],
|
135
|
+
%w[cacheHit cache_hit parse_bool],
|
136
|
+
%w[cacheLookup cache_lookup parse_bool],
|
137
|
+
%w[cacheValidatedWithOriginServer
|
138
|
+
cache_validated_with_origin_server parse_bool],
|
139
|
+
%w[latency latency parse_latency],
|
140
|
+
%w[protocol protocol parse_string],
|
141
|
+
%w[referer referer parse_string],
|
142
|
+
%w[remoteIp remote_ip parse_string],
|
143
|
+
%w[responseSize response_size parse_int],
|
144
|
+
%w[requestMethod request_method parse_string],
|
145
|
+
%w[requestSize request_size parse_int],
|
146
|
+
%w[requestUrl request_url parse_string],
|
147
|
+
%w[serverIp server_ip parse_string],
|
148
|
+
%w[status status parse_int],
|
149
|
+
%w[userAgent user_agent parse_string]
|
150
|
+
],
|
151
|
+
# The grpc version class name.
|
152
|
+
'Google::Cloud::Logging::Type::HttpRequest',
|
153
|
+
# The non-grpc version class name.
|
154
|
+
'Google::Apis::LoggingV2::HttpRequest'
|
155
|
+
],
|
156
|
+
'operation' => [
|
157
|
+
'@operation_key',
|
158
|
+
[
|
159
|
+
%w[id id parse_string],
|
160
|
+
%w[producer producer parse_string],
|
161
|
+
%w[first first parse_bool],
|
162
|
+
%w[last last parse_bool]
|
163
|
+
],
|
164
|
+
'Google::Cloud::Logging::V2::LogEntryOperation',
|
165
|
+
'Google::Apis::LoggingV2::LogEntryOperation'
|
166
|
+
],
|
167
|
+
'source_location' => [
|
168
|
+
'@source_location_key',
|
169
|
+
[
|
170
|
+
%w[file file parse_string],
|
171
|
+
%w[function function parse_string],
|
172
|
+
%w[line line parse_int]
|
173
|
+
],
|
174
|
+
'Google::Cloud::Logging::V2::LogEntrySourceLocation',
|
175
|
+
'Google::Apis::LoggingV2::LogEntrySourceLocation'
|
176
|
+
]
|
177
|
+
}.freeze
|
178
|
+
|
179
|
+
# The name of the WriteLogEntriesPartialErrors field in the error details.
|
180
|
+
PARTIAL_ERROR_FIELD =
|
181
|
+
'type.googleapis.com/google.logging.v2.WriteLogEntriesPartialErrors' \
|
182
|
+
.freeze
|
183
|
+
end
|
184
|
+
|
185
|
+
include Common::ServiceConstants
|
186
|
+
include self::ConfigConstants
|
187
|
+
include self::InternalConstants
|
188
|
+
|
189
|
+
Fluent::Plugin.register_output('google_cloud', self)
|
190
|
+
|
191
|
+
helpers :server, :timer
|
192
|
+
|
193
|
+
PLUGIN_NAME = 'Fluentd Google Cloud Logging plugin'.freeze
|
194
|
+
|
195
|
+
# Follows semver.org format.
|
196
|
+
PLUGIN_VERSION = begin
|
197
|
+
# Extract plugin version from file path.
|
198
|
+
match_data = __FILE__.match(
|
199
|
+
%r{fluent-plugin-google-cloud-(?<version>[^/]*)/}
|
200
|
+
)
|
201
|
+
if match_data
|
202
|
+
match_data['version']
|
203
|
+
else
|
204
|
+
# Extract plugin version by finding the spec this file was loaded from.
|
205
|
+
dependency = Gem::Dependency.new('fluent-plugin-google-cloud')
|
206
|
+
all_specs, = Gem::SpecFetcher.fetcher.spec_for_dependency(dependency)
|
207
|
+
matching_version, = all_specs.grep(
|
208
|
+
proc { |spec,| __FILE__.include?(spec.full_gem_path) }
|
209
|
+
) do |spec,|
|
210
|
+
spec.version.to_s
|
211
|
+
end
|
212
|
+
# If no matching version was found, return a valid but obviously wrong
|
213
|
+
# value.
|
214
|
+
matching_version || '0.0.0-unknown'
|
215
|
+
end
|
216
|
+
end.freeze
|
217
|
+
|
218
|
+
# Disable this warning to conform to fluentd config_param conventions.
|
219
|
+
# rubocop:disable Style/HashSyntax
|
220
|
+
|
221
|
+
# Specify project/instance metadata.
|
222
|
+
#
|
223
|
+
# project_id, zone, and vm_id are required to have valid values, which
|
224
|
+
# can be obtained from the metadata service or set explicitly.
|
225
|
+
# Otherwise, the plugin will fail to initialize.
|
226
|
+
#
|
227
|
+
# Note that while 'project id' properly refers to the alphanumeric name
|
228
|
+
# of the project, the logging service will also accept the project number,
|
229
|
+
# so either one is acceptable in this context.
|
230
|
+
#
|
231
|
+
# Whether to attempt to obtain metadata from the local metadata service.
|
232
|
+
# It is safe to specify 'true' even on platforms with no metadata service.
|
233
|
+
config_param :use_metadata_service, :bool, :default => true
|
234
|
+
# A compatibility option to enable the legacy behavior of setting the AWS
|
235
|
+
# location to the availability zone rather than the region.
|
236
|
+
config_param :use_aws_availability_zone, :bool, :default => true
|
237
|
+
# These parameters override any values obtained from the metadata service.
|
238
|
+
config_param :project_id, :string, :default => nil
|
239
|
+
config_param :zone, :string, :default => nil
|
240
|
+
config_param :vm_id, :string, :default => nil
|
241
|
+
config_param :vm_name, :string, :default => nil
|
242
|
+
# Kubernetes-specific parameters, only used to override these values in
|
243
|
+
# the fallback path when the metadata agent is temporarily unavailable.
|
244
|
+
# They have to match the configuration of the metadata agent.
|
245
|
+
config_param :k8s_cluster_name, :string, :default => nil
|
246
|
+
config_param :k8s_cluster_location, :string, :default => nil
|
247
|
+
|
248
|
+
# Map keys from a JSON payload to corresponding LogEntry fields.
|
249
|
+
config_param :http_request_key, :string, :default =>
|
250
|
+
DEFAULT_HTTP_REQUEST_KEY
|
251
|
+
config_param :insert_id_key, :string, :default => DEFAULT_INSERT_ID_KEY
|
252
|
+
config_param :labels_key, :string, :default => DEFAULT_LABELS_KEY
|
253
|
+
config_param :operation_key, :string, :default => DEFAULT_OPERATION_KEY
|
254
|
+
config_param :source_location_key, :string, :default =>
|
255
|
+
DEFAULT_SOURCE_LOCATION_KEY
|
256
|
+
config_param :span_id_key, :string, :default => DEFAULT_SPAN_ID_KEY
|
257
|
+
config_param :trace_key, :string, :default => DEFAULT_TRACE_KEY
|
258
|
+
config_param :trace_sampled_key, :string, :default =>
|
259
|
+
DEFAULT_TRACE_SAMPLED_KEY
|
260
|
+
|
261
|
+
# Whether to try to detect if the record is a text log entry with JSON
|
262
|
+
# content that needs to be parsed.
|
263
|
+
config_param :detect_json, :bool, :default => false
|
264
|
+
# TODO(igorpeshansky): Add a parameter for the text field in the payload.
|
265
|
+
|
266
|
+
# Whether to try to detect if the VM is owned by a "subservice" such as App
|
267
|
+
# Engine of Kubernetes, rather than just associating the logs with the
|
268
|
+
# compute service of the platform. This currently only has any effect when
|
269
|
+
# running on GCE.
|
270
|
+
#
|
271
|
+
# The initial motivation for this is to separate out Kubernetes node
|
272
|
+
# component (Kubelet, etc.) logs from container logs.
|
273
|
+
config_param :detect_subservice, :bool, :default => true
|
274
|
+
# The subservice_name overrides the subservice detection, if provided.
|
275
|
+
config_param :subservice_name, :string, :default => nil
|
276
|
+
|
277
|
+
# Whether to reject log entries with invalid tags. If this option is set to
|
278
|
+
# false, tags will be made valid by converting any non-string tag to a
|
279
|
+
# string, and sanitizing any non-utf8 or other invalid characters.
|
280
|
+
config_param :require_valid_tags, :bool, :default => false
|
281
|
+
|
282
|
+
# The regular expression to use on Kubernetes logs to extract some basic
|
283
|
+
# information about the log source. The regexp must contain capture groups
|
284
|
+
# for pod_name, namespace_name, and container_name.
|
285
|
+
config_param :kubernetes_tag_regexp, :string, :default =>
|
286
|
+
'\.(?<pod_name>[^_]+)_(?<namespace_name>[^_]+)_(?<container_name>.+)$'
|
287
|
+
|
288
|
+
# label_map (specified as a JSON object) is an unordered set of fluent
|
289
|
+
# field names whose values are sent as labels rather than as part of the
|
290
|
+
# struct payload.
|
291
|
+
#
|
292
|
+
# Each entry in the map is a {"field_name": "label_name"} pair. When
|
293
|
+
# the "field_name" (as parsed by the input plugin) is encountered, a label
|
294
|
+
# with the corresponding "label_name" is added to the log entry. The
|
295
|
+
# value of the field is used as the value of the label.
|
296
|
+
#
|
297
|
+
# The map gives the user additional flexibility in specifying label
|
298
|
+
# names, including the ability to use characters which would not be
|
299
|
+
# legal as part of fluent field names.
|
300
|
+
#
|
301
|
+
# Example:
|
302
|
+
# label_map {
|
303
|
+
# "field_name_1": "sent_label_name_1",
|
304
|
+
# "field_name_2": "some.prefix/sent_label_name_2"
|
305
|
+
# }
|
306
|
+
config_param :label_map, :hash, :default => nil
|
307
|
+
|
308
|
+
# labels (specified as a JSON object) is a set of custom labels
|
309
|
+
# provided at configuration time. It allows users to inject extra
|
310
|
+
# environmental information into every message or to customize
|
311
|
+
# labels otherwise detected automatically.
|
312
|
+
#
|
313
|
+
# Each entry in the map is a {"label_name": "label_value"} pair.
|
314
|
+
#
|
315
|
+
# Example:
|
316
|
+
# labels {
|
317
|
+
# "label_name_1": "label_value_1",
|
318
|
+
# "label_name_2": "label_value_2"
|
319
|
+
# }
|
320
|
+
config_param :labels, :hash, :default => nil
|
321
|
+
|
322
|
+
# Whether to use gRPC instead of REST/JSON to communicate to the
|
323
|
+
# Stackdriver Logging API.
|
324
|
+
config_param :use_grpc, :bool, :default => false
|
325
|
+
|
326
|
+
# Whether to enable gRPC compression when communicating with the Stackdriver
|
327
|
+
# Logging API. Only used if 'use_grpc' is set to true.
|
328
|
+
config_param :grpc_compression_algorithm, :enum,
|
329
|
+
list: %i[none gzip],
|
330
|
+
:default => nil
|
331
|
+
|
332
|
+
# Whether valid entries should be written even if some other entries fail
|
333
|
+
# due to INVALID_ARGUMENT or PERMISSION_DENIED errors when communicating to
|
334
|
+
# the Stackdriver Logging API. This flag is no longer used, and is kept for
|
335
|
+
# backwards compatibility, partial_success is enabled for all requests.
|
336
|
+
# TODO: Breaking change. Remove this flag in Logging Agent 2.0.0 release.
|
337
|
+
config_param :partial_success, :bool,
|
338
|
+
:default => true,
|
339
|
+
:skip_accessor => true,
|
340
|
+
:deprecated => 'This feature is permanently enabled'
|
341
|
+
|
342
|
+
# Whether to allow non-UTF-8 characters in user logs. If set to true, any
|
343
|
+
# non-UTF-8 character would be replaced by the string specified by
|
344
|
+
# 'non_utf8_replacement_string'. If set to false, any non-UTF-8 character
|
345
|
+
# would trigger the plugin to error out.
|
346
|
+
config_param :coerce_to_utf8, :bool, :default => true
|
347
|
+
|
348
|
+
# If 'coerce_to_utf8' is set to true, any non-UTF-8 character would be
|
349
|
+
# replaced by the string specified here.
|
350
|
+
config_param :non_utf8_replacement_string, :string, :default => ' '
|
351
|
+
|
352
|
+
# DEPRECATED: The following parameters, if present in the config
|
353
|
+
# indicate that the plugin configuration must be updated.
|
354
|
+
config_param :auth_method, :string, :default => nil
|
355
|
+
config_param :private_key_email, :string, :default => nil
|
356
|
+
config_param :private_key_path, :string, :default => nil
|
357
|
+
config_param :private_key_passphrase, :string,
|
358
|
+
:default => nil,
|
359
|
+
:secret => true
|
360
|
+
|
361
|
+
# The URL of Stackdriver Logging API. Right now this only works with the
|
362
|
+
# gRPC path (use_grpc = true). An unsecured channel is used if the URL
|
363
|
+
# scheme is 'http' instead of 'https'. One common use case of this config is
|
364
|
+
# to provide a mocked / stubbed Logging API, e.g., http://localhost:52000.
|
365
|
+
config_param :logging_api_url, :string, :default => DEFAULT_LOGGING_API_URL
|
366
|
+
|
367
|
+
# Whether to collect metrics about the plugin usage. The mechanism for
|
368
|
+
# collecting and exposing metrics is controlled by the monitoring_type
|
369
|
+
# parameter.
|
370
|
+
config_param :enable_monitoring, :bool, :default => false
|
371
|
+
|
372
|
+
# What system to use when collecting metrics. Possible values are:
|
373
|
+
# - 'prometheus', in this case default registry in the Prometheus
|
374
|
+
# client library is used, without actually exposing the endpoint
|
375
|
+
# to serve metrics in the Prometheus format.
|
376
|
+
# - 'opencensus', in this case the OpenCensus implementation is
|
377
|
+
# used to send metrics directly to Google Cloud Monitoring.
|
378
|
+
# - any other value will result in the absence of metrics.
|
379
|
+
config_param :monitoring_type, :string,
|
380
|
+
:default => Monitoring::PrometheusMonitoringRegistry.name
|
381
|
+
|
382
|
+
# The monitored resource to use for OpenCensus metrics. Only valid
|
383
|
+
# when monitoring_type is set to 'opencensus'. This value is a hash in
|
384
|
+
# the form:
|
385
|
+
# {"type":"gce_instance","labels":{"instance_id":"aaa","zone":"bbb"} (JSON)
|
386
|
+
# or type:gce_instance,labels.instance_id:aaa,labels.zone:bbb (Hash)
|
387
|
+
config_param :metrics_resource, :hash,
|
388
|
+
:symbolize_keys => true, :default => nil
|
389
|
+
|
390
|
+
# Whether to call metadata agent to retrieve monitored resource. This flag
|
391
|
+
# is kept for backwards compatibility, and is no longer used.
|
392
|
+
# TODO: Breaking change. Remove this flag in Logging Agent 2.0.0 release.
|
393
|
+
config_param :enable_metadata_agent, :bool,
|
394
|
+
:default => false,
|
395
|
+
:skip_accessor => true,
|
396
|
+
:deprecated => 'This feature is permanently disabled'
|
397
|
+
|
398
|
+
# The URL of the Metadata Agent. This flag is kept for backwards
|
399
|
+
# compatibility, and is no longer used.
|
400
|
+
# TODO: Breaking change. Remove this flag in Logging Agent 2.0.0 release.
|
401
|
+
config_param :metadata_agent_url, :string,
|
402
|
+
:default => nil,
|
403
|
+
:skip_accessor => true,
|
404
|
+
:deprecated => 'This feature is permanently disabled'
|
405
|
+
|
406
|
+
# Whether to split log entries with different log tags into different
|
407
|
+
# requests when talking to Stackdriver Logging API.
|
408
|
+
config_param :split_logs_by_tag, :bool, :default => false
|
409
|
+
|
410
|
+
# Whether to attempt adjusting invalid log entry timestamps.
|
411
|
+
config_param :adjust_invalid_timestamps, :bool, :default => true
|
412
|
+
|
413
|
+
# Whether to autoformat value of "logging.googleapis.com/trace" to
|
414
|
+
# comply with Stackdriver Trace format
|
415
|
+
# "projects/[PROJECT-ID]/traces/[TRACE-ID]" when setting
|
416
|
+
# LogEntry.trace.
|
417
|
+
config_param :autoformat_stackdriver_trace, :bool, :default => true
|
418
|
+
|
419
|
+
# Port for web server that exposes a /statusz endpoint with
|
420
|
+
# diagnostic information in HTML format. If the value is 0,
|
421
|
+
# the server is not created.
|
422
|
+
config_param :statusz_port, :integer, :default => 0
|
423
|
+
|
424
|
+
# Override for the Google Cloud Monitoring service hostname, or
|
425
|
+
# `nil` to leave as the default.
|
426
|
+
config_param :gcm_service_address, :string, :default => nil
|
427
|
+
|
428
|
+
# rubocop:enable Style/HashSyntax
|
429
|
+
|
430
|
+
# TODO: Add a log_name config option rather than just using the tag?
|
431
|
+
|
432
|
+
# Expose attr_readers to make testing of metadata more direct than only
|
433
|
+
# testing it indirectly through metadata sent with logs.
|
434
|
+
attr_reader :resource, :common_labels, :monitoring_resource
|
435
|
+
|
436
|
+
def initialize
|
437
|
+
super
|
438
|
+
# use the global logger
|
439
|
+
@log = $log # rubocop:disable Style/GlobalVars
|
440
|
+
|
441
|
+
@failed_requests_count = nil
|
442
|
+
@successful_requests_count = nil
|
443
|
+
@dropped_entries_count = nil
|
444
|
+
@ingested_entries_count = nil
|
445
|
+
@retried_entries_count = nil
|
446
|
+
|
447
|
+
@ok_code = nil
|
448
|
+
@uptime_update_time = Time.now.to_i
|
449
|
+
end
|
450
|
+
|
451
|
+
def configure(conf)
|
452
|
+
super
|
453
|
+
|
454
|
+
# TODO(qingling128): Remove this warning after the support is added. Also
|
455
|
+
# remove the comment in the description of this configuration.
|
456
|
+
unless @logging_api_url == DEFAULT_LOGGING_API_URL || @use_grpc
|
457
|
+
@log.warn 'Detected customized logging_api_url while use_grpc is not' \
|
458
|
+
' enabled. Customized logging_api_url for the non-gRPC path' \
|
459
|
+
' is not supported. The logging_api_url option will be' \
|
460
|
+
' ignored.'
|
461
|
+
end
|
462
|
+
|
463
|
+
# Alert on old authentication configuration.
|
464
|
+
unless @auth_method.nil? && @private_key_email.nil? &&
|
465
|
+
@private_key_path.nil? && @private_key_passphrase.nil?
|
466
|
+
extra = []
|
467
|
+
extra << 'auth_method' unless @auth_method.nil?
|
468
|
+
extra << 'private_key_email' unless @private_key_email.nil?
|
469
|
+
extra << 'private_key_path' unless @private_key_path.nil?
|
470
|
+
extra << 'private_key_passphrase' unless @private_key_passphrase.nil?
|
471
|
+
|
472
|
+
raise Fluent::ConfigError,
|
473
|
+
"#{PLUGIN_NAME} no longer supports auth_method.\n" \
|
474
|
+
"Please remove configuration parameters: #{extra.join(' ')}"
|
475
|
+
end
|
476
|
+
|
477
|
+
set_regexp_patterns
|
478
|
+
|
479
|
+
@utils = Common::Utils.new(@log)
|
480
|
+
|
481
|
+
@platform = @utils.detect_platform(@use_metadata_service)
|
482
|
+
|
483
|
+
# Treat an empty setting of the credentials file path environment variable
|
484
|
+
# as unset. This way the googleauth lib could fetch the credentials
|
485
|
+
# following the fallback path.
|
486
|
+
ENV.delete(CREDENTIALS_PATH_ENV_VAR) if
|
487
|
+
ENV[CREDENTIALS_PATH_ENV_VAR] == ''
|
488
|
+
|
489
|
+
# Set required variables: @project_id, @vm_id, @vm_name and @zone.
|
490
|
+
@project_id = @utils.get_project_id(@platform, @project_id)
|
491
|
+
@vm_id = @utils.get_vm_id(@platform, @vm_id)
|
492
|
+
@vm_name = @utils.get_vm_name(@vm_name)
|
493
|
+
@zone = @utils.get_location(@platform, @zone, @use_aws_availability_zone)
|
494
|
+
|
495
|
+
# All metadata parameters must now be set.
|
496
|
+
@utils.check_required_metadata_variables(
|
497
|
+
@platform, @project_id, @zone, @vm_id
|
498
|
+
)
|
499
|
+
|
500
|
+
# Retrieve monitored resource.
|
501
|
+
# Fail over to retrieve monitored resource via the legacy path if we fail
|
502
|
+
# to get it from Metadata Agent.
|
503
|
+
@resource ||= @utils.determine_agent_level_monitored_resource_via_legacy(
|
504
|
+
@platform, @subservice_name, @detect_subservice, @vm_id, @zone
|
505
|
+
)
|
506
|
+
|
507
|
+
if @metrics_resource
|
508
|
+
unless @metrics_resource[:type].is_a?(String)
|
509
|
+
raise Fluent::ConfigError,
|
510
|
+
'metrics_resource.type must be a string:' \
|
511
|
+
" #{@metrics_resource}."
|
512
|
+
end
|
513
|
+
if @metrics_resource.key?(:labels)
|
514
|
+
unless @metrics_resource[:labels].is_a?(Hash)
|
515
|
+
raise Fluent::ConfigError,
|
516
|
+
'metrics_resource.labels must be a hash:' \
|
517
|
+
" #{@metrics_resource}."
|
518
|
+
end
|
519
|
+
extra_keys = @metrics_resource.reject do |k, _|
|
520
|
+
%i[type labels].include?(k)
|
521
|
+
end
|
522
|
+
unless extra_keys.empty?
|
523
|
+
raise Fluent::ConfigError,
|
524
|
+
"metrics_resource has unrecognized keys: #{extra_keys.keys}."
|
525
|
+
end
|
526
|
+
else
|
527
|
+
extra_keys = @metrics_resource.reject do |k, _|
|
528
|
+
k == :type || k.to_s.start_with?('labels.')
|
529
|
+
end
|
530
|
+
unless extra_keys.empty?
|
531
|
+
raise Fluent::ConfigError,
|
532
|
+
"metrics_resource has unrecognized keys: #{extra_keys.keys}."
|
533
|
+
end
|
534
|
+
# Transform the Hash form of the metrics_resource config if necessary.
|
535
|
+
resource_type = @metrics_resource[:type]
|
536
|
+
resource_labels = @metrics_resource.each_with_object({}) \
|
537
|
+
do |(k, v), h|
|
538
|
+
h[k.to_s.sub('labels.', '')] = v if k.to_s.start_with? 'labels.'
|
539
|
+
end
|
540
|
+
@metrics_resource = { type: resource_type, labels: resource_labels }
|
541
|
+
end
|
542
|
+
end
|
543
|
+
|
544
|
+
# If monitoring is enabled, register metrics in the default registry
|
545
|
+
# and store metric objects for future use.
|
546
|
+
if @enable_monitoring
|
547
|
+
unless Monitoring::MonitoringRegistryFactory.supports_monitoring_type(
|
548
|
+
@monitoring_type
|
549
|
+
)
|
550
|
+
@log.warn "monitoring_type '#{@monitoring_type}' is unknown; "\
|
551
|
+
'there will be no metrics'
|
552
|
+
end
|
553
|
+
@monitoring_resource = if @metrics_resource
|
554
|
+
@utils.create_monitored_resource(
|
555
|
+
@metrics_resource[:type], @metrics_resource[:labels]
|
556
|
+
)
|
557
|
+
else
|
558
|
+
@resource
|
559
|
+
end
|
560
|
+
@registry = Monitoring::MonitoringRegistryFactory
|
561
|
+
.create(@monitoring_type, @project_id,
|
562
|
+
@monitoring_resource, @gcm_service_address)
|
563
|
+
# Export metrics every 60 seconds.
|
564
|
+
timer_execute(:export_metrics, 60) { @registry.export }
|
565
|
+
# Uptime should be a gauge, but the metric definition is a counter and
|
566
|
+
# we can't change it.
|
567
|
+
@uptime_metric = @registry.counter(
|
568
|
+
:uptime, [:version], 'Uptime of Logging agent',
|
569
|
+
'agent.googleapis.com/agent', 'CUMULATIVE'
|
570
|
+
)
|
571
|
+
update_uptime
|
572
|
+
timer_execute(:update_uptime, 1) { update_uptime }
|
573
|
+
@successful_requests_count = @registry.counter(
|
574
|
+
:stackdriver_successful_requests_count,
|
575
|
+
%i[grpc code],
|
576
|
+
'A number of successful requests to the Stackdriver Logging API',
|
577
|
+
'agent.googleapis.com/agent', 'CUMULATIVE'
|
578
|
+
)
|
579
|
+
@failed_requests_count = @registry.counter(
|
580
|
+
:stackdriver_failed_requests_count,
|
581
|
+
%i[grpc code],
|
582
|
+
'A number of failed requests to the Stackdriver Logging '\
|
583
|
+
'API, broken down by the error code',
|
584
|
+
'agent.googleapis.com/agent', 'CUMULATIVE'
|
585
|
+
)
|
586
|
+
@ingested_entries_count = @registry.counter(
|
587
|
+
:stackdriver_ingested_entries_count,
|
588
|
+
%i[grpc code],
|
589
|
+
'A number of log entries ingested by Stackdriver Logging',
|
590
|
+
'agent.googleapis.com/agent', 'CUMULATIVE'
|
591
|
+
)
|
592
|
+
@dropped_entries_count = @registry.counter(
|
593
|
+
:stackdriver_dropped_entries_count,
|
594
|
+
%i[grpc code],
|
595
|
+
'A number of log entries dropped by the Stackdriver output plugin',
|
596
|
+
'agent.googleapis.com/agent', 'CUMULATIVE'
|
597
|
+
)
|
598
|
+
@retried_entries_count = @registry.counter(
|
599
|
+
:stackdriver_retried_entries_count,
|
600
|
+
%i[grpc code],
|
601
|
+
'The number of log entries that failed to be ingested by '\
|
602
|
+
'the Stackdriver output plugin due to a transient error '\
|
603
|
+
'and were retried',
|
604
|
+
'agent.googleapis.com/agent', 'CUMULATIVE'
|
605
|
+
)
|
606
|
+
@ok_code = @use_grpc ? GRPC::Core::StatusCodes::OK : 200
|
607
|
+
end
|
608
|
+
|
609
|
+
# Set regexp that we should match tags against later on. Using a list
|
610
|
+
# instead of a map to ensure order.
|
611
|
+
@tag_regexp_list = []
|
612
|
+
if @resource.type == GKE_CONSTANTS[:resource_type]
|
613
|
+
@tag_regexp_list << [
|
614
|
+
GKE_CONSTANTS[:resource_type], @compiled_kubernetes_tag_regexp
|
615
|
+
]
|
616
|
+
end
|
617
|
+
|
618
|
+
# Determine the common labels that should be added to all log entries
|
619
|
+
# processed by this logging agent.
|
620
|
+
@common_labels = determine_agent_level_common_labels(@resource)
|
621
|
+
|
622
|
+
# The resource and labels are now set up; ensure they can't be modified
|
623
|
+
# without first duping them.
|
624
|
+
@resource.freeze
|
625
|
+
@resource.labels.freeze
|
626
|
+
@common_labels.freeze
|
627
|
+
|
628
|
+
if @use_grpc
|
629
|
+
@construct_log_entry = method(:construct_log_entry_in_grpc_format)
|
630
|
+
@write_request = method(:write_request_via_grpc)
|
631
|
+
else
|
632
|
+
@construct_log_entry = method(:construct_log_entry_in_rest_format)
|
633
|
+
@write_request = method(:write_request_via_rest)
|
634
|
+
end
|
635
|
+
|
636
|
+
return unless [Common::Platform::GCE, Common::Platform::EC2].include?(@platform)
|
637
|
+
|
638
|
+
# Log an informational message containing the Logs viewer URL
|
639
|
+
@log.info 'Logs viewer address: https://console.cloud.google.com/logs/',
|
640
|
+
"viewer?project=#{@project_id}&resource=#{@resource.type}/",
|
641
|
+
"instance_id/#{@vm_id}"
|
642
|
+
end
|
643
|
+
|
644
|
+
def start
|
645
|
+
super
|
646
|
+
init_api_client
|
647
|
+
@successful_call = false
|
648
|
+
@timenanos_warning = false
|
649
|
+
|
650
|
+
return unless @statusz_port.positive?
|
651
|
+
|
652
|
+
@log.info "Starting statusz server on port #{@statusz_port}"
|
653
|
+
server_create(:out_google_cloud_statusz,
|
654
|
+
@statusz_port,
|
655
|
+
bind: '127.0.0.1') do |data, conn|
|
656
|
+
if data.split(' ')[1] == '/statusz'
|
657
|
+
write_html_response(data, conn, 200, Statusz.response(self))
|
658
|
+
else
|
659
|
+
write_html_response(data, conn, 404, "Not found\n")
|
660
|
+
end
|
661
|
+
end
|
662
|
+
end
|
663
|
+
|
664
|
+
def shutdown
|
665
|
+
super
|
666
|
+
# Export metrics on shutdown. This is a best-effort attempt, and it might
|
667
|
+
# fail, for instance if there was a recent write to the same time series.
|
668
|
+
@registry&.export
|
669
|
+
end
|
670
|
+
|
671
|
+
def write(chunk)
|
672
|
+
grouped_entries = group_log_entries_by_tag_and_local_resource_id(chunk)
|
673
|
+
|
674
|
+
requests_to_send = []
|
675
|
+
grouped_entries.each do |(tag, local_resource_id), arr|
|
676
|
+
entries = []
|
677
|
+
group_level_resource, group_level_common_labels =
|
678
|
+
determine_group_level_monitored_resource_and_labels(
|
679
|
+
tag, local_resource_id
|
680
|
+
)
|
681
|
+
|
682
|
+
arr.each do |time, record|
|
683
|
+
entry_level_resource, entry_level_common_labels =
|
684
|
+
determine_entry_level_monitored_resource_and_labels(
|
685
|
+
group_level_resource, group_level_common_labels, record
|
686
|
+
)
|
687
|
+
|
688
|
+
is_json = false
|
689
|
+
if @detect_json
|
690
|
+
# Save the following fields if available, then clear them out to
|
691
|
+
# allow for determining whether we should parse the log or message
|
692
|
+
# field.
|
693
|
+
# This list should be in sync with
|
694
|
+
# https://cloud.google.com/logging/docs/agent/configuration#special-fields.
|
695
|
+
preserved_keys = [
|
696
|
+
'time',
|
697
|
+
'timeNanos',
|
698
|
+
'timestamp',
|
699
|
+
'timestampNanos',
|
700
|
+
'timestampSeconds',
|
701
|
+
'severity',
|
702
|
+
@http_request_key,
|
703
|
+
@insert_id_key,
|
704
|
+
@labels_key,
|
705
|
+
@operation_key,
|
706
|
+
@source_location_key,
|
707
|
+
@span_id_key,
|
708
|
+
@trace_key,
|
709
|
+
@trace_sampled_key
|
710
|
+
]
|
711
|
+
|
712
|
+
# If the log is json, we want to export it as a structured log
|
713
|
+
# unless there is additional metadata that would be lost.
|
714
|
+
record_json = nil
|
715
|
+
if (record.keys - preserved_keys).length == 1
|
716
|
+
%w[log message msg].each do |field|
|
717
|
+
record_json = parse_json_or_nil(record[field]) if record.key?(field)
|
718
|
+
end
|
719
|
+
end
|
720
|
+
unless record_json.nil?
|
721
|
+
# Propagate these if necessary. Note that we don't want to
|
722
|
+
# override these keys in the JSON we've just parsed.
|
723
|
+
preserved_keys.each do |key|
|
724
|
+
record_json[key] ||= record[key] if
|
725
|
+
record.key?(key) && !record_json.key?(key)
|
726
|
+
end
|
727
|
+
|
728
|
+
record = record_json
|
729
|
+
is_json = true
|
730
|
+
end
|
731
|
+
end
|
732
|
+
|
733
|
+
ts_secs, ts_nanos, timestamp = compute_timestamp(record, time)
|
734
|
+
ts_secs, ts_nanos = adjust_timestamp_if_invalid(timestamp, Time.now) \
|
735
|
+
if @adjust_invalid_timestamps && timestamp
|
736
|
+
|
737
|
+
severity = compute_severity(
|
738
|
+
entry_level_resource.type, record, entry_level_common_labels
|
739
|
+
)
|
740
|
+
|
741
|
+
dynamic_labels_from_payload = parse_labels(record)
|
742
|
+
|
743
|
+
if dynamic_labels_from_payload
|
744
|
+
entry_level_common_labels.merge!(
|
745
|
+
dynamic_labels_from_payload
|
746
|
+
)
|
747
|
+
end
|
748
|
+
|
749
|
+
entry = @construct_log_entry.call(entry_level_common_labels,
|
750
|
+
entry_level_resource,
|
751
|
+
severity,
|
752
|
+
ts_secs,
|
753
|
+
ts_nanos)
|
754
|
+
|
755
|
+
insert_id = record.delete(@insert_id_key)
|
756
|
+
entry.insert_id = insert_id if insert_id
|
757
|
+
span_id = record.delete(@span_id_key)
|
758
|
+
entry.span_id = span_id if span_id
|
759
|
+
trace = record.delete(@trace_key)
|
760
|
+
entry.trace = compute_trace(trace) if trace
|
761
|
+
trace_sampled = record.delete(@trace_sampled_key)
|
762
|
+
entry.trace_sampled = parse_bool(trace_sampled) unless
|
763
|
+
trace_sampled.nil?
|
764
|
+
|
765
|
+
set_log_entry_fields(record, entry)
|
766
|
+
set_payload(entry_level_resource.type, record, entry, is_json)
|
767
|
+
|
768
|
+
entries.push(entry)
|
769
|
+
end
|
770
|
+
# Don't send an empty request if we rejected all the entries.
|
771
|
+
next if entries.empty?
|
772
|
+
|
773
|
+
log_name = "projects/#{@project_id}/logs/#{log_name(
|
774
|
+
tag, group_level_resource
|
775
|
+
)}"
|
776
|
+
|
777
|
+
requests_to_send << {
|
778
|
+
entries: entries,
|
779
|
+
log_name: log_name,
|
780
|
+
resource: group_level_resource,
|
781
|
+
labels: group_level_common_labels
|
782
|
+
}
|
783
|
+
end
|
784
|
+
|
785
|
+
if @split_logs_by_tag
|
786
|
+
requests_to_send.each do |request|
|
787
|
+
@write_request.call(**request)
|
788
|
+
end
|
789
|
+
else
|
790
|
+
# Combine all requests into one. The request level "log_name" will be
|
791
|
+
# ported to the entry level. The request level "resource" and "labels"
|
792
|
+
# are ignored as they should have been folded into the entry level
|
793
|
+
# "resource" and "labels" already anyway.
|
794
|
+
combined_entries = []
|
795
|
+
requests_to_send.each do |request|
|
796
|
+
request[:entries].each do |entry|
|
797
|
+
# Modify entries in-place as they are not needed later on.
|
798
|
+
entry.log_name = request[:log_name]
|
799
|
+
end
|
800
|
+
combined_entries.concat(request[:entries])
|
801
|
+
end
|
802
|
+
@write_request.call(entries: combined_entries) unless
|
803
|
+
combined_entries.empty?
|
804
|
+
end
|
805
|
+
end
|
806
|
+
|
807
|
+
def multi_workers_ready?
|
808
|
+
true
|
809
|
+
end
|
810
|
+
|
811
|
+
def self.version_string
|
812
|
+
@version_string ||= "google-fluentd/#{PLUGIN_VERSION}"
|
813
|
+
end
|
814
|
+
|
815
|
+
def update_uptime
|
816
|
+
now = Time.now.to_i
|
817
|
+
@uptime_metric.increment(
|
818
|
+
by: now - @uptime_update_time,
|
819
|
+
labels: { version: Fluent::GoogleCloudOutput.version_string }
|
820
|
+
)
|
821
|
+
@uptime_update_time = now
|
822
|
+
end
|
823
|
+
|
824
|
+
private
|
825
|
+
|
826
|
+
def write_html_response(data, conn, code, response)
|
827
|
+
@log.info "#{conn.remote_host} - - " \
|
828
|
+
"#{Time.now.strftime('%d/%b/%Y:%H:%M:%S %z')} " \
|
829
|
+
"\"#{data.lines.first.strip}\" #{code} #{response.bytesize}"
|
830
|
+
conn.write "HTTP/1.1 #{code}\r\n"
|
831
|
+
conn.write "Content-Type: text/html\r\n"
|
832
|
+
conn.write "Content-Length: #{response.bytesize}\r\n"
|
833
|
+
conn.write "\r\n"
|
834
|
+
conn.write response
|
835
|
+
end
|
836
|
+
|
837
|
+
def compute_trace(trace)
|
838
|
+
return trace unless @autoformat_stackdriver_trace &&
|
839
|
+
STACKDRIVER_TRACE_ID_REGEXP.match(trace)
|
840
|
+
|
841
|
+
"projects/#{@project_id}/traces/#{trace}"
|
842
|
+
end
|
843
|
+
|
844
|
+
def construct_log_entry_in_grpc_format(labels,
|
845
|
+
resource,
|
846
|
+
severity,
|
847
|
+
ts_secs,
|
848
|
+
ts_nanos)
|
849
|
+
entry = Google::Cloud::Logging::V2::LogEntry.new(
|
850
|
+
labels: labels,
|
851
|
+
resource: Google::Api::MonitoredResource.new(
|
852
|
+
type: resource.type,
|
853
|
+
labels: resource.labels.to_h
|
854
|
+
),
|
855
|
+
severity: grpc_severity(severity)
|
856
|
+
)
|
857
|
+
# If "seconds" is null or not an integer, we will omit the timestamp
|
858
|
+
# field and defer the decision on how to handle it to the downstream
|
859
|
+
# Logging API. If "nanos" is null or not an integer, it will be set
|
860
|
+
# to 0.
|
861
|
+
if ts_secs.is_a?(Integer)
|
862
|
+
ts_nanos = 0 unless ts_nanos.is_a?(Integer)
|
863
|
+
entry.timestamp = Google::Protobuf::Timestamp.new(
|
864
|
+
seconds: ts_secs,
|
865
|
+
nanos: ts_nanos
|
866
|
+
)
|
867
|
+
end
|
868
|
+
entry
|
869
|
+
end
|
870
|
+
|
871
|
+
def construct_log_entry_in_rest_format(labels,
|
872
|
+
resource,
|
873
|
+
severity,
|
874
|
+
ts_secs,
|
875
|
+
ts_nanos)
|
876
|
+
# Remove the labels if we didn't populate them with anything.
|
877
|
+
resource.labels = nil if resource.labels.empty?
|
878
|
+
Google::Apis::LoggingV2::LogEntry.new(
|
879
|
+
labels: labels,
|
880
|
+
resource: resource,
|
881
|
+
severity: severity,
|
882
|
+
timestamp: {
|
883
|
+
seconds: ts_secs,
|
884
|
+
nanos: ts_nanos
|
885
|
+
}
|
886
|
+
)
|
887
|
+
end
|
888
|
+
|
889
|
+
def write_request_via_grpc(entries:,
|
890
|
+
log_name: '',
|
891
|
+
resource: nil,
|
892
|
+
labels: {})
|
893
|
+
client = api_client
|
894
|
+
entries_count = entries.length
|
895
|
+
client.write_log_entries(
|
896
|
+
entries: entries,
|
897
|
+
log_name: log_name,
|
898
|
+
# Leave resource nil if it's nil.
|
899
|
+
resource: if resource
|
900
|
+
Google::Api::MonitoredResource.new(
|
901
|
+
type: resource.type,
|
902
|
+
labels: resource.labels.to_h
|
903
|
+
)
|
904
|
+
end,
|
905
|
+
labels: labels.map do |k, v|
|
906
|
+
[k.encode('utf-8'), convert_to_utf8(v)]
|
907
|
+
end.to_h,
|
908
|
+
partial_success: true
|
909
|
+
)
|
910
|
+
increment_successful_requests_count
|
911
|
+
increment_ingested_entries_count(entries_count)
|
912
|
+
|
913
|
+
# Let the user explicitly know when the first call succeeded, to
|
914
|
+
# aid with verification and troubleshooting.
|
915
|
+
unless @successful_call
|
916
|
+
@successful_call = true
|
917
|
+
@log.info 'Successfully sent gRPC to Stackdriver Logging API.'
|
918
|
+
end
|
919
|
+
rescue Google::Cloud::Error => e
|
920
|
+
# GRPC::BadStatus is wrapped in error.cause.
|
921
|
+
error = e.cause
|
922
|
+
|
923
|
+
# See the mapping between HTTP status and gRPC status code at:
|
924
|
+
# https://github.com/grpc/grpc/blob/master/src/core/lib/transport/status_conversion.cc
|
925
|
+
case error
|
926
|
+
# Server error, so retry via re-raising the error.
|
927
|
+
when \
|
928
|
+
# HTTP status 500 (Internal Server Error).
|
929
|
+
GRPC::Internal,
|
930
|
+
# HTTP status 501 (Not Implemented).
|
931
|
+
GRPC::Unimplemented,
|
932
|
+
# HTTP status 503 (Service Unavailable).
|
933
|
+
GRPC::Unavailable,
|
934
|
+
# HTTP status 504 (Gateway Timeout).
|
935
|
+
GRPC::DeadlineExceeded
|
936
|
+
increment_retried_entries_count(entries_count, error.code)
|
937
|
+
@log.debug "Retrying #{entries_count} log message(s) later.",
|
938
|
+
error: error.to_s, error_code: error.code.to_s
|
939
|
+
raise error
|
940
|
+
|
941
|
+
# Most client errors indicate a problem with the request itself and
|
942
|
+
# should not be retried.
|
943
|
+
when \
|
944
|
+
# HTTP status 401 (Unauthorized).
|
945
|
+
# These are usually solved via a `gcloud auth` call, or by modifying
|
946
|
+
# the permissions on the Google Cloud project.
|
947
|
+
GRPC::Unauthenticated,
|
948
|
+
# HTTP status 404 (Not Found).
|
949
|
+
GRPC::NotFound,
|
950
|
+
# HTTP status 409 (Conflict).
|
951
|
+
GRPC::Aborted,
|
952
|
+
# HTTP status 412 (Precondition Failed).
|
953
|
+
GRPC::FailedPrecondition,
|
954
|
+
# HTTP status 429 (Too Many Requests).
|
955
|
+
GRPC::ResourceExhausted,
|
956
|
+
# HTTP status 499 (Client Closed Request).
|
957
|
+
GRPC::Cancelled,
|
958
|
+
# the remaining http codes in both 4xx and 5xx category.
|
959
|
+
# It's debatable whether to retry or drop these log entries.
|
960
|
+
# This decision is made to avoid retrying forever due to
|
961
|
+
# client errors.
|
962
|
+
GRPC::Unknown
|
963
|
+
increment_failed_requests_count(error.code)
|
964
|
+
increment_dropped_entries_count(entries_count, error.code)
|
965
|
+
@log.warn "Dropping #{entries_count} log message(s)",
|
966
|
+
error: error.to_s, error_code: error.code.to_s
|
967
|
+
|
968
|
+
# As partial_success is enabled, valid entries should have been
|
969
|
+
# written even if some other entries fail due to InvalidArgument or
|
970
|
+
# PermissionDenied errors. Only invalid entries will be dropped.
|
971
|
+
when \
|
972
|
+
# HTTP status 400 (Bad Request).
|
973
|
+
GRPC::InvalidArgument,
|
974
|
+
# HTTP status 403 (Forbidden).
|
975
|
+
GRPC::PermissionDenied
|
976
|
+
error_details_map = construct_error_details_map_grpc(e)
|
977
|
+
if error_details_map.empty?
|
978
|
+
increment_failed_requests_count(error.code)
|
979
|
+
increment_dropped_entries_count(entries_count, error.code)
|
980
|
+
@log.warn "Dropping #{entries_count} log message(s)",
|
981
|
+
error: error.to_s, error_code: error.code.to_s
|
982
|
+
else
|
983
|
+
error_details_map.each do |(error_code, error_message), indexes|
|
984
|
+
partial_errors_count = indexes.length
|
985
|
+
increment_dropped_entries_count(partial_errors_count,
|
986
|
+
error_code)
|
987
|
+
entries_count -= partial_errors_count
|
988
|
+
@log.warn "Dropping #{partial_errors_count} log message(s)",
|
989
|
+
error: error_message, error_code: error_code.to_s
|
990
|
+
end
|
991
|
+
# Consider partially successful requests successful.
|
992
|
+
increment_successful_requests_count
|
993
|
+
increment_ingested_entries_count(entries_count)
|
994
|
+
end
|
995
|
+
|
996
|
+
else
|
997
|
+
# Assume it's a problem with the request itself and don't retry.
|
998
|
+
error_code = if error.respond_to?(:code)
|
999
|
+
error.code
|
1000
|
+
else
|
1001
|
+
GRPC::Core::StatusCodes::UNKNOWN
|
1002
|
+
end
|
1003
|
+
increment_failed_requests_count(error_code)
|
1004
|
+
increment_dropped_entries_count(entries_count, error_code)
|
1005
|
+
@log.error "Unknown response code #{error_code} from the server," \
|
1006
|
+
" dropping #{entries_count} log message(s)",
|
1007
|
+
error: error.to_s, error_code: error_code.to_s
|
1008
|
+
end
|
1009
|
+
|
1010
|
+
# Got an unexpected error (not Google::Cloud::Error) from the
|
1011
|
+
# google-cloud-logging lib.
|
1012
|
+
rescue StandardError => e
|
1013
|
+
increment_failed_requests_count(GRPC::Core::StatusCodes::UNKNOWN)
|
1014
|
+
increment_dropped_entries_count(entries_count,
|
1015
|
+
GRPC::Core::StatusCodes::UNKNOWN)
|
1016
|
+
@log.error "Unexpected error type #{e.class.name} from the client" \
|
1017
|
+
" library, dropping #{entries_count} log message(s)",
|
1018
|
+
error: e.to_s
|
1019
|
+
end
|
1020
|
+
|
1021
|
+
def write_request_via_rest(entries:,
|
1022
|
+
log_name: '',
|
1023
|
+
resource: nil,
|
1024
|
+
labels: {})
|
1025
|
+
client = api_client
|
1026
|
+
entries_count = entries.length
|
1027
|
+
client.write_entry_log_entries(
|
1028
|
+
Google::Apis::LoggingV2::WriteLogEntriesRequest.new(
|
1029
|
+
entries: entries,
|
1030
|
+
log_name: log_name,
|
1031
|
+
resource: resource,
|
1032
|
+
labels: labels,
|
1033
|
+
partial_success: true
|
1034
|
+
),
|
1035
|
+
options: { api_format_version: '2' }
|
1036
|
+
)
|
1037
|
+
increment_successful_requests_count
|
1038
|
+
increment_ingested_entries_count(entries_count)
|
1039
|
+
|
1040
|
+
# Let the user explicitly know when the first call succeeded, to aid
|
1041
|
+
# with verification and troubleshooting.
|
1042
|
+
unless @successful_call
|
1043
|
+
@successful_call = true
|
1044
|
+
@log.info 'Successfully sent to Stackdriver Logging API.'
|
1045
|
+
end
|
1046
|
+
rescue Google::Apis::ServerError => e
|
1047
|
+
# 5xx server errors. Retry via re-raising the error.
|
1048
|
+
increment_retried_entries_count(entries_count, e.status_code)
|
1049
|
+
@log.debug "Retrying #{entries_count} log message(s) later.",
|
1050
|
+
error: e.to_s, error_code: e.status_code.to_s
|
1051
|
+
raise e
|
1052
|
+
rescue Google::Apis::AuthorizationError => e
|
1053
|
+
# 401 authorization error.
|
1054
|
+
# These are usually solved via a `gcloud auth` call, or by modifying
|
1055
|
+
# the permissions on the Google Cloud project.
|
1056
|
+
increment_failed_requests_count(e.status_code)
|
1057
|
+
increment_dropped_entries_count(entries_count, e.status_code)
|
1058
|
+
@log.warn "Dropping #{entries_count} log message(s)",
|
1059
|
+
error: e.to_s, error_code: e.status_code.to_s
|
1060
|
+
rescue Google::Apis::ClientError => e
|
1061
|
+
# 4xx client errors. Most client errors indicate a problem with the
|
1062
|
+
# request itself and should not be retried.
|
1063
|
+
error_details_map = construct_error_details_map(e)
|
1064
|
+
if error_details_map.empty?
|
1065
|
+
increment_failed_requests_count(e.status_code)
|
1066
|
+
increment_dropped_entries_count(entries_count, e.status_code)
|
1067
|
+
@log.warn "Dropping #{entries_count} log message(s)",
|
1068
|
+
error: e.to_s, error_code: e.status_code.to_s
|
1069
|
+
else
|
1070
|
+
error_details_map.each do |(error_code, error_message), indexes|
|
1071
|
+
partial_errors_count = indexes.length
|
1072
|
+
increment_dropped_entries_count(partial_errors_count, error_code)
|
1073
|
+
entries_count -= partial_errors_count
|
1074
|
+
@log.warn "Dropping #{partial_errors_count} log message(s)",
|
1075
|
+
error: error_message,
|
1076
|
+
error_code: "google.rpc.Code[#{error_code}]"
|
1077
|
+
end
|
1078
|
+
# Consider partially successful requests successful.
|
1079
|
+
increment_successful_requests_count
|
1080
|
+
increment_ingested_entries_count(entries_count)
|
1081
|
+
end
|
1082
|
+
end
|
1083
|
+
|
1084
|
+
def parse_json_or_nil(input)
|
1085
|
+
return nil unless input.is_a?(String)
|
1086
|
+
|
1087
|
+
input.each_codepoint do |c|
|
1088
|
+
if c == 123
|
1089
|
+
# left curly bracket (U+007B)
|
1090
|
+
begin
|
1091
|
+
return JSON.parse(input)
|
1092
|
+
rescue JSON::ParserError
|
1093
|
+
return nil
|
1094
|
+
end
|
1095
|
+
else
|
1096
|
+
# Break (and return nil) unless the current character is whitespace,
|
1097
|
+
# in which case we continue to look for a left curly bracket.
|
1098
|
+
# Whitespace as per the JSON spec are: tabulation (U+0009),
|
1099
|
+
# line feed (U+000A), carriage return (U+000D), and space (U+0020).
|
1100
|
+
break unless [9, 10, 13, 32].include?(c)
|
1101
|
+
end
|
1102
|
+
end
|
1103
|
+
nil
|
1104
|
+
end
|
1105
|
+
|
1106
|
+
# Set regexp patterns to parse tags and logs.
|
1107
|
+
def set_regexp_patterns
|
1108
|
+
@compiled_kubernetes_tag_regexp = Regexp.new(@kubernetes_tag_regexp) if
|
1109
|
+
@kubernetes_tag_regexp
|
1110
|
+
|
1111
|
+
@compiled_http_latency_regexp =
|
1112
|
+
/^\s*(?<seconds>\d+)(?<decimal>\.\d+)?\s*s\s*$/
|
1113
|
+
end
|
1114
|
+
|
1115
|
+
# Determine the common labels that should be added to all log entries
|
1116
|
+
# processed by this logging agent.
|
1117
|
+
def determine_agent_level_common_labels(resource)
|
1118
|
+
labels = {}
|
1119
|
+
# User can specify labels via config. We want to capture those as well.
|
1120
|
+
labels.merge!(@labels) if @labels
|
1121
|
+
|
1122
|
+
case resource.type
|
1123
|
+
# GAE, Cloud Dataflow, Cloud Dataproc and Cloud ML.
|
1124
|
+
when APPENGINE_CONSTANTS[:resource_type],
|
1125
|
+
DATAFLOW_CONSTANTS[:resource_type],
|
1126
|
+
DATAPROC_CONSTANTS[:resource_type],
|
1127
|
+
ML_CONSTANTS[:resource_type]
|
1128
|
+
labels.merge!(
|
1129
|
+
"#{COMPUTE_CONSTANTS[:service]}/resource_id" => @vm_id,
|
1130
|
+
"#{COMPUTE_CONSTANTS[:service]}/resource_name" => @vm_name,
|
1131
|
+
"#{COMPUTE_CONSTANTS[:service]}/zone" => @zone
|
1132
|
+
)
|
1133
|
+
|
1134
|
+
# GCE instance and GKE container.
|
1135
|
+
when COMPUTE_CONSTANTS[:resource_type],
|
1136
|
+
GKE_CONSTANTS[:resource_type]
|
1137
|
+
labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
|
1138
|
+
|
1139
|
+
# EC2.
|
1140
|
+
when EC2_CONSTANTS[:resource_type]
|
1141
|
+
labels["#{EC2_CONSTANTS[:service]}/resource_name"] = @vm_name
|
1142
|
+
end
|
1143
|
+
labels
|
1144
|
+
end
|
1145
|
+
|
1146
|
+
# Group the log entries by tag and local_resource_id pairs. Also filter out
|
1147
|
+
# invalid non-Hash entries.
|
1148
|
+
def group_log_entries_by_tag_and_local_resource_id(chunk)
|
1149
|
+
groups = {}
|
1150
|
+
chunk.msgpack_each do |tag, time, record|
|
1151
|
+
unless record.is_a?(Hash)
|
1152
|
+
@log.warn 'Dropping log entries with malformed record: ' \
|
1153
|
+
"'#{record.inspect}' from tag '#{tag}' at '#{time}'. " \
|
1154
|
+
'A log record should be in JSON format.'
|
1155
|
+
next
|
1156
|
+
end
|
1157
|
+
sanitized_tag = sanitize_tag(tag)
|
1158
|
+
if sanitized_tag.nil?
|
1159
|
+
@log.warn "Dropping log entries with invalid tag: '#{tag.inspect}'." \
|
1160
|
+
' A tag should be a string with utf8 characters.'
|
1161
|
+
next
|
1162
|
+
end
|
1163
|
+
local_resource_id = record.delete(LOCAL_RESOURCE_ID_KEY)
|
1164
|
+
# A nil local_resource_id means "fall back to legacy".
|
1165
|
+
hash_key = [sanitized_tag, local_resource_id].freeze
|
1166
|
+
groups[hash_key] ||= []
|
1167
|
+
groups[hash_key].push([time, record])
|
1168
|
+
end
|
1169
|
+
groups
|
1170
|
+
end
|
1171
|
+
|
1172
|
+
# Determine the group level monitored resource and common labels shared by a
|
1173
|
+
# collection of entries.
|
1174
|
+
def determine_group_level_monitored_resource_and_labels(tag,
|
1175
|
+
local_resource_id)
|
1176
|
+
resource = @resource.dup
|
1177
|
+
resource.labels = @resource.labels.dup
|
1178
|
+
common_labels = @common_labels.dup
|
1179
|
+
|
1180
|
+
# Change the resource type and set matched_regexp_group if the tag matches
|
1181
|
+
# certain regexp.
|
1182
|
+
matched_regexp_group = nil # @tag_regexp_list can be an empty list.
|
1183
|
+
@tag_regexp_list.each do |derived_type, tag_regexp|
|
1184
|
+
matched_regexp_group = tag_regexp.match(tag)
|
1185
|
+
if matched_regexp_group
|
1186
|
+
resource.type = derived_type
|
1187
|
+
break
|
1188
|
+
end
|
1189
|
+
end
|
1190
|
+
|
1191
|
+
# Determine the monitored resource based on the local_resource_id.
|
1192
|
+
# Different monitored resource types have unique ids in different format.
|
1193
|
+
# We will query Metadata Agent for the monitored resource. Return the
|
1194
|
+
# legacy monitored resource (either the instance resource or the resource
|
1195
|
+
# inferred from the tag) if failed to get a monitored resource from
|
1196
|
+
# Metadata Agent with this key.
|
1197
|
+
#
|
1198
|
+
# Examples:
|
1199
|
+
# // GKE Pod.
|
1200
|
+
# "k8s_pod.<namespace_name>.<pod_name>"
|
1201
|
+
# // GKE container.
|
1202
|
+
# "k8s_container.<namespace_name>.<pod_name>.<container_name>"
|
1203
|
+
if local_resource_id
|
1204
|
+
converted_resource = monitored_resource_from_local_resource_id(
|
1205
|
+
local_resource_id
|
1206
|
+
)
|
1207
|
+
resource = converted_resource if converted_resource
|
1208
|
+
end
|
1209
|
+
|
1210
|
+
# Once the resource type is settled down, determine the labels.
|
1211
|
+
case resource.type
|
1212
|
+
# GKE container.
|
1213
|
+
when GKE_CONSTANTS[:resource_type]
|
1214
|
+
if matched_regexp_group
|
1215
|
+
# We only expect one occurrence of each key in the match group.
|
1216
|
+
resource_labels_candidates =
|
1217
|
+
matched_regexp_group.names.zip(matched_regexp_group.captures).to_h
|
1218
|
+
common_labels_candidates = resource_labels_candidates.dup
|
1219
|
+
resource.labels.merge!(
|
1220
|
+
delete_and_extract_labels(
|
1221
|
+
resource_labels_candidates,
|
1222
|
+
# The kubernetes_tag_regexp is poorly named. 'namespace_name' is
|
1223
|
+
# in fact 'namespace_id'. 'pod_name' is in fact 'pod_id'.
|
1224
|
+
# TODO(qingling128): Figure out how to put this map into
|
1225
|
+
# constants like GKE_CONSTANTS[:extra_resource_labels].
|
1226
|
+
'container_name' => 'container_name',
|
1227
|
+
'namespace_name' => 'namespace_id',
|
1228
|
+
'pod_name' => 'pod_id'
|
1229
|
+
)
|
1230
|
+
)
|
1231
|
+
|
1232
|
+
common_labels.merge!(
|
1233
|
+
delete_and_extract_labels(
|
1234
|
+
common_labels_candidates,
|
1235
|
+
GKE_CONSTANTS[:extra_common_labels]
|
1236
|
+
.map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h
|
1237
|
+
)
|
1238
|
+
)
|
1239
|
+
end
|
1240
|
+
|
1241
|
+
# TODO(qingling128): Temporary fallback for metadata agent restarts.
|
1242
|
+
# K8s resources.
|
1243
|
+
when K8S_CONTAINER_CONSTANTS[:resource_type],
|
1244
|
+
K8S_POD_CONSTANTS[:resource_type],
|
1245
|
+
K8S_NODE_CONSTANTS[:resource_type]
|
1246
|
+
common_labels.delete("#{COMPUTE_CONSTANTS[:service]}/resource_name")
|
1247
|
+
|
1248
|
+
end
|
1249
|
+
|
1250
|
+
# Cloud Dataflow and Cloud ML.
|
1251
|
+
# These labels can be set via the 'labels' option.
|
1252
|
+
# Report them as monitored resource labels instead of common labels.
|
1253
|
+
# e.g. "dataflow.googleapis.com/job_id" => "job_id"
|
1254
|
+
[DATAFLOW_CONSTANTS, ML_CONSTANTS].each do |service_constants|
|
1255
|
+
next unless resource.type == service_constants[:resource_type]
|
1256
|
+
|
1257
|
+
resource.labels.merge!(
|
1258
|
+
delete_and_extract_labels(
|
1259
|
+
common_labels, service_constants[:extra_resource_labels]
|
1260
|
+
.map { |l| ["#{service_constants[:service]}/#{l}", l] }.to_h
|
1261
|
+
)
|
1262
|
+
)
|
1263
|
+
end
|
1264
|
+
|
1265
|
+
resource.freeze
|
1266
|
+
resource.labels.freeze
|
1267
|
+
common_labels.freeze
|
1268
|
+
|
1269
|
+
[resource, common_labels]
|
1270
|
+
end
|
1271
|
+
|
1272
|
+
# Extract entry level monitored resource and common labels that should be
|
1273
|
+
# applied to individual entries.
|
1274
|
+
def determine_entry_level_monitored_resource_and_labels(
|
1275
|
+
group_level_resource, group_level_common_labels, record
|
1276
|
+
)
|
1277
|
+
resource = group_level_resource.dup
|
1278
|
+
resource.labels = group_level_resource.labels.dup
|
1279
|
+
common_labels = group_level_common_labels.dup
|
1280
|
+
|
1281
|
+
case resource.type
|
1282
|
+
# GKE container.
|
1283
|
+
when GKE_CONSTANTS[:resource_type]
|
1284
|
+
# Move the stdout/stderr annotation from the record into a label.
|
1285
|
+
common_labels.merge!(
|
1286
|
+
delete_and_extract_labels(
|
1287
|
+
record, 'stream' => "#{GKE_CONSTANTS[:service]}/stream"
|
1288
|
+
)
|
1289
|
+
)
|
1290
|
+
|
1291
|
+
# If the record has been annotated by the kubernetes_metadata_filter
|
1292
|
+
# plugin, then use that metadata. Otherwise, rely on commonLabels
|
1293
|
+
# populated from the group's tag.
|
1294
|
+
if record.key?('kubernetes')
|
1295
|
+
resource.labels.merge!(
|
1296
|
+
delete_and_extract_labels(
|
1297
|
+
record['kubernetes'], GKE_CONSTANTS[:extra_resource_labels]
|
1298
|
+
.map { |l| [l, l] }.to_h
|
1299
|
+
)
|
1300
|
+
)
|
1301
|
+
common_labels.merge!(
|
1302
|
+
delete_and_extract_labels(
|
1303
|
+
record['kubernetes'], GKE_CONSTANTS[:extra_common_labels]
|
1304
|
+
.map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h
|
1305
|
+
)
|
1306
|
+
)
|
1307
|
+
# Prepend label/ to all user-defined labels' keys.
|
1308
|
+
if record['kubernetes'].key?('labels')
|
1309
|
+
common_labels.merge!(
|
1310
|
+
delete_and_extract_labels(
|
1311
|
+
record['kubernetes']['labels'], record['kubernetes']['labels']
|
1312
|
+
.map { |key, _| [key, "label/#{key}"] }.to_h
|
1313
|
+
)
|
1314
|
+
)
|
1315
|
+
end
|
1316
|
+
# We've explicitly consumed all the fields we care about -- don't
|
1317
|
+
# litter the log entries with the remaining fields that the kubernetes
|
1318
|
+
# metadata filter plugin includes (or an empty 'kubernetes' field).
|
1319
|
+
record.delete('kubernetes')
|
1320
|
+
record.delete('docker')
|
1321
|
+
end
|
1322
|
+
end
|
1323
|
+
|
1324
|
+
# If the name of a field in the record is present in the @label_map
|
1325
|
+
# configured by users, report its value as a label and do not send that
|
1326
|
+
# field as part of the payload.
|
1327
|
+
common_labels.merge!(delete_and_extract_labels(record, @label_map))
|
1328
|
+
|
1329
|
+
# Cloud Dataflow and Cloud ML.
|
1330
|
+
# These labels can be set via the 'labels' or 'label_map' options.
|
1331
|
+
# Report them as monitored resource labels instead of common labels.
|
1332
|
+
# e.g. "dataflow.googleapis.com/job_id" => "job_id"
|
1333
|
+
[DATAFLOW_CONSTANTS, ML_CONSTANTS].each do |service_constants|
|
1334
|
+
next unless resource.type == service_constants[:resource_type]
|
1335
|
+
|
1336
|
+
resource.labels.merge!(
|
1337
|
+
delete_and_extract_labels(
|
1338
|
+
common_labels, service_constants[:extra_resource_labels]
|
1339
|
+
.map { |l| ["#{service_constants[:service]}/#{l}", l] }.to_h
|
1340
|
+
)
|
1341
|
+
)
|
1342
|
+
end
|
1343
|
+
|
1344
|
+
[resource, common_labels]
|
1345
|
+
end
|
1346
|
+
|
1347
|
+
def time_or_nil(ts_secs, ts_nanos)
|
1348
|
+
Time.at((Integer ts_secs), (Integer ts_nanos) / 1_000.0)
|
1349
|
+
rescue ArgumentError, TypeError
|
1350
|
+
nil
|
1351
|
+
end
|
1352
|
+
|
1353
|
+
def compute_timestamp(record, time)
|
1354
|
+
if record.key?('timestamp') &&
|
1355
|
+
record['timestamp'].is_a?(Hash) &&
|
1356
|
+
record['timestamp'].key?('seconds') &&
|
1357
|
+
record['timestamp'].key?('nanos')
|
1358
|
+
ts_secs = record['timestamp']['seconds']
|
1359
|
+
ts_nanos = record['timestamp']['nanos']
|
1360
|
+
record.delete('timestamp')
|
1361
|
+
timestamp = time_or_nil(ts_secs, ts_nanos)
|
1362
|
+
elsif record.key?('timestampSeconds') &&
|
1363
|
+
record.key?('timestampNanos')
|
1364
|
+
ts_secs = record.delete('timestampSeconds')
|
1365
|
+
ts_nanos = record.delete('timestampNanos')
|
1366
|
+
timestamp = time_or_nil(ts_secs, ts_nanos)
|
1367
|
+
elsif record.key?('timeNanos')
|
1368
|
+
# This is deprecated since the precision is insufficient.
|
1369
|
+
# Use timestampSeconds/timestampNanos instead
|
1370
|
+
nanos = record.delete('timeNanos')
|
1371
|
+
ts_secs = (nanos / 1_000_000_000).to_i
|
1372
|
+
ts_nanos = nanos % 1_000_000_000
|
1373
|
+
unless @timenanos_warning
|
1374
|
+
# Warn the user this is deprecated, but only once to avoid spam.
|
1375
|
+
@timenanos_warning = true
|
1376
|
+
@log.warn 'timeNanos is deprecated - please use ' \
|
1377
|
+
'timestampSeconds and timestampNanos instead.'
|
1378
|
+
end
|
1379
|
+
timestamp = time_or_nil(ts_secs, ts_nanos)
|
1380
|
+
elsif record.key?('time')
|
1381
|
+
# k8s ISO8601 timestamp
|
1382
|
+
begin
|
1383
|
+
timestamp = Time.iso8601(record.delete('time'))
|
1384
|
+
rescue StandardError
|
1385
|
+
timestamp = Time.at(time)
|
1386
|
+
end
|
1387
|
+
ts_secs = timestamp.tv_sec
|
1388
|
+
ts_nanos = timestamp.tv_nsec
|
1389
|
+
else
|
1390
|
+
timestamp = Time.at(time)
|
1391
|
+
ts_secs = timestamp.tv_sec
|
1392
|
+
ts_nanos = timestamp.tv_nsec
|
1393
|
+
end
|
1394
|
+
ts_secs = begin
|
1395
|
+
Integer ts_secs
|
1396
|
+
rescue ArgumentError, TypeError
|
1397
|
+
ts_secs
|
1398
|
+
end
|
1399
|
+
ts_nanos = begin
|
1400
|
+
Integer ts_nanos
|
1401
|
+
rescue ArgumentError, TypeError
|
1402
|
+
ts_nanos
|
1403
|
+
end
|
1404
|
+
|
1405
|
+
[ts_secs, ts_nanos, timestamp]
|
1406
|
+
end
|
1407
|
+
|
1408
|
+
# Adjust timestamps from the future.
|
1409
|
+
# The base case is:
|
1410
|
+
# 0. The parsed timestamp is less than one day into the future.
|
1411
|
+
# This is allowed by the API, and should be left unchanged.
|
1412
|
+
#
|
1413
|
+
# Beyond that, there are two cases:
|
1414
|
+
# 1. The parsed timestamp is later in the current year:
|
1415
|
+
# This can happen when system log lines from previous years are missing
|
1416
|
+
# the year, so the date parser assumes the current year.
|
1417
|
+
# We treat these lines as coming from last year. This could label
|
1418
|
+
# 2-year-old logs incorrectly, but this probably isn't super important.
|
1419
|
+
#
|
1420
|
+
# 2. The parsed timestamp is past the end of the current year:
|
1421
|
+
# Since the year is different from the current year, this isn't the
|
1422
|
+
# missing year in system logs. It is unlikely that users explicitly
|
1423
|
+
# write logs at a future date. This could result from an unsynchronized
|
1424
|
+
# clock on a VM, or some random value being parsed as the timestamp.
|
1425
|
+
# We reset the timestamp on those lines to the default value and let the
|
1426
|
+
# downstream API handle it.
|
1427
|
+
def adjust_timestamp_if_invalid(timestamp, current_time)
|
1428
|
+
ts_secs = timestamp.tv_sec
|
1429
|
+
ts_nanos = timestamp.tv_nsec
|
1430
|
+
|
1431
|
+
next_year = Time.mktime(current_time.year + 1)
|
1432
|
+
one_day_later = current_time.to_datetime.next_day.to_time
|
1433
|
+
if timestamp < one_day_later # Case 0.
|
1434
|
+
# Leave the timestamp as-is.
|
1435
|
+
elsif timestamp >= next_year # Case 2.
|
1436
|
+
ts_secs = 0
|
1437
|
+
ts_nanos = 0
|
1438
|
+
else # Case 1.
|
1439
|
+
adjusted_timestamp = timestamp.to_datetime.prev_year.to_time
|
1440
|
+
ts_secs = adjusted_timestamp.tv_sec
|
1441
|
+
# The value of ts_nanos should not change when subtracting a year.
|
1442
|
+
end
|
1443
|
+
|
1444
|
+
[ts_secs, ts_nanos]
|
1445
|
+
end
|
1446
|
+
|
1447
|
+
def compute_severity(resource_type, record, entry_level_common_labels)
|
1448
|
+
if record.key?('severity')
|
1449
|
+
return parse_severity(record.delete('severity'))
|
1450
|
+
elsif resource_type == GKE_CONSTANTS[:resource_type]
|
1451
|
+
stream = entry_level_common_labels["#{GKE_CONSTANTS[:service]}/stream"]
|
1452
|
+
return GKE_CONSTANTS[:stream_severity_map].fetch(stream, 'DEFAULT')
|
1453
|
+
end
|
1454
|
+
|
1455
|
+
'DEFAULT'
|
1456
|
+
end
|
1457
|
+
|
1458
|
+
def set_log_entry_fields(record, entry)
|
1459
|
+
# TODO(qingling128) On the next major after 0.7.4, make all logEntry
|
1460
|
+
# subfields behave the same way: if the field is not in the correct
|
1461
|
+
# format, log an error in the Fluentd log and remove this field from
|
1462
|
+
# payload. This is the preferred behavior per PM decision.
|
1463
|
+
LOG_ENTRY_FIELDS_MAP.each do |field_name, config|
|
1464
|
+
payload_key, subfields, grpc_class, non_grpc_class = config
|
1465
|
+
begin
|
1466
|
+
payload_key = instance_variable_get(payload_key)
|
1467
|
+
fields = record[payload_key]
|
1468
|
+
record.delete(payload_key) if fields.nil?
|
1469
|
+
next unless fields.is_a?(Hash)
|
1470
|
+
|
1471
|
+
extracted_subfields = subfields.each_with_object({}) \
|
1472
|
+
do |(original_key, destination_key, cast_fn), extracted_fields|
|
1473
|
+
value = fields.delete(original_key)
|
1474
|
+
next if value.nil?
|
1475
|
+
|
1476
|
+
begin
|
1477
|
+
casted_value = send(cast_fn, value)
|
1478
|
+
rescue TypeError
|
1479
|
+
@log.error "Failed to #{cast_fn} for #{field_name}." \
|
1480
|
+
"#{original_key} with value #{value.inspect}.", err
|
1481
|
+
next
|
1482
|
+
end
|
1483
|
+
next if casted_value.nil?
|
1484
|
+
|
1485
|
+
extracted_fields[destination_key] = casted_value
|
1486
|
+
end
|
1487
|
+
|
1488
|
+
next unless extracted_subfields
|
1489
|
+
|
1490
|
+
output = if @use_grpc
|
1491
|
+
Object.const_get(grpc_class).new
|
1492
|
+
else
|
1493
|
+
Object.const_get(non_grpc_class).new
|
1494
|
+
end
|
1495
|
+
extracted_subfields.each do |key, value|
|
1496
|
+
output.send("#{key}=", value)
|
1497
|
+
end
|
1498
|
+
|
1499
|
+
record.delete(payload_key) if fields.empty?
|
1500
|
+
|
1501
|
+
entry.send("#{field_name}=", output)
|
1502
|
+
rescue StandardError => e
|
1503
|
+
@log.error "Failed to set log entry field for #{field_name}.", e
|
1504
|
+
end
|
1505
|
+
end
|
1506
|
+
end
|
1507
|
+
|
1508
|
+
# Parse labels. Return nil if not set.
|
1509
|
+
def parse_labels(record)
|
1510
|
+
payload_labels = record.delete(@labels_key)
|
1511
|
+
return nil unless payload_labels
|
1512
|
+
|
1513
|
+
unless payload_labels.is_a?(Hash)
|
1514
|
+
@log.error "Invalid value of '#{@labels_key}' in the payload: " \
|
1515
|
+
"#{payload_labels}. Labels need to be a JSON object."
|
1516
|
+
return nil
|
1517
|
+
end
|
1518
|
+
|
1519
|
+
non_string_keys = payload_labels.each_with_object([]) do |(k, v), a|
|
1520
|
+
a << k unless k.is_a?(String) && v.is_a?(String)
|
1521
|
+
end
|
1522
|
+
unless non_string_keys.empty?
|
1523
|
+
@log.error "Invalid value of '#{@labels_key}' in the payload: " \
|
1524
|
+
"#{payload_labels}. Labels need string values for all " \
|
1525
|
+
"keys; keys #{non_string_keys} don't."
|
1526
|
+
return nil
|
1527
|
+
end
|
1528
|
+
payload_labels
|
1529
|
+
rescue StandardError => e
|
1530
|
+
@log.error "Failed to extract '#{@labels_key}' from payload.", e
|
1531
|
+
nil
|
1532
|
+
end
|
1533
|
+
|
1534
|
+
# Values permitted by the API for 'severity' (which is an enum).
|
1535
|
+
VALID_SEVERITIES = Set.new(
|
1536
|
+
%w[DEFAULT DEBUG INFO NOTICE WARNING ERROR CRITICAL ALERT EMERGENCY]
|
1537
|
+
).freeze
|
1538
|
+
|
1539
|
+
# Translates other severity strings to one of the valid values above.
|
1540
|
+
SEVERITY_TRANSLATIONS = {
|
1541
|
+
# log4j levels (both current and obsolete).
|
1542
|
+
'WARN' => 'WARNING',
|
1543
|
+
'FATAL' => 'CRITICAL',
|
1544
|
+
'TRACE' => 'DEBUG',
|
1545
|
+
'TRACE_INT' => 'DEBUG',
|
1546
|
+
'FINE' => 'DEBUG',
|
1547
|
+
'FINER' => 'DEBUG',
|
1548
|
+
'FINEST' => 'DEBUG',
|
1549
|
+
# java.util.logging levels (only missing ones from above listed).
|
1550
|
+
'SEVERE' => 'ERROR',
|
1551
|
+
'CONFIG' => 'DEBUG',
|
1552
|
+
# nginx levels (only missing ones from above listed).
|
1553
|
+
'CRIT' => 'CRITICAL',
|
1554
|
+
'EMERG' => 'EMERGENCY',
|
1555
|
+
# single-letter levels. Note E->ERROR and D->DEBUG.
|
1556
|
+
'D' => 'DEBUG',
|
1557
|
+
'I' => 'INFO',
|
1558
|
+
'N' => 'NOTICE',
|
1559
|
+
'W' => 'WARNING',
|
1560
|
+
'E' => 'ERROR',
|
1561
|
+
'C' => 'CRITICAL',
|
1562
|
+
'A' => 'ALERT',
|
1563
|
+
# other misc. translations.
|
1564
|
+
'INFORMATION' => 'INFO',
|
1565
|
+
'ERR' => 'ERROR',
|
1566
|
+
'F' => 'CRITICAL'
|
1567
|
+
}.freeze
|
1568
|
+
|
1569
|
+
def parse_severity(severity_str)
|
1570
|
+
# The API is case insensitive, but uppercase to make things simpler.
|
1571
|
+
severity = severity_str.to_s.upcase.strip
|
1572
|
+
|
1573
|
+
# If the severity is already valid, just return it.
|
1574
|
+
return severity if VALID_SEVERITIES.include?(severity)
|
1575
|
+
|
1576
|
+
# If the severity is an integer (string) return it as an integer,
|
1577
|
+
# truncated to the closest valid value (multiples of 100 between 0-800).
|
1578
|
+
if /\A\d+\z/ =~ severity
|
1579
|
+
begin
|
1580
|
+
numeric_severity = (severity.to_i / 100) * 100
|
1581
|
+
case
|
1582
|
+
when numeric_severity.negative?
|
1583
|
+
return 0
|
1584
|
+
when numeric_severity > 800
|
1585
|
+
return 800
|
1586
|
+
else
|
1587
|
+
return numeric_severity
|
1588
|
+
end
|
1589
|
+
rescue StandardError
|
1590
|
+
return 'DEFAULT'
|
1591
|
+
end
|
1592
|
+
end
|
1593
|
+
|
1594
|
+
# Try to translate the severity.
|
1595
|
+
return SEVERITY_TRANSLATIONS[severity] if SEVERITY_TRANSLATIONS.key?(severity)
|
1596
|
+
|
1597
|
+
# If all else fails, use 'DEFAULT'.
|
1598
|
+
'DEFAULT'
|
1599
|
+
end
|
1600
|
+
|
1601
|
+
GRPC_SEVERITY_MAPPING = {
|
1602
|
+
'DEFAULT' => Google::Cloud::Logging::Type::LogSeverity::DEFAULT,
|
1603
|
+
'DEBUG' => Google::Cloud::Logging::Type::LogSeverity::DEBUG,
|
1604
|
+
'INFO' => Google::Cloud::Logging::Type::LogSeverity::INFO,
|
1605
|
+
'NOTICE' => Google::Cloud::Logging::Type::LogSeverity::NOTICE,
|
1606
|
+
'WARNING' => Google::Cloud::Logging::Type::LogSeverity::WARNING,
|
1607
|
+
'ERROR' => Google::Cloud::Logging::Type::LogSeverity::ERROR,
|
1608
|
+
'CRITICAL' => Google::Cloud::Logging::Type::LogSeverity::CRITICAL,
|
1609
|
+
'ALERT' => Google::Cloud::Logging::Type::LogSeverity::ALERT,
|
1610
|
+
'EMERGENCY' => Google::Cloud::Logging::Type::LogSeverity::EMERGENCY,
|
1611
|
+
0 => Google::Cloud::Logging::Type::LogSeverity::DEFAULT,
|
1612
|
+
100 => Google::Cloud::Logging::Type::LogSeverity::DEBUG,
|
1613
|
+
200 => Google::Cloud::Logging::Type::LogSeverity::INFO,
|
1614
|
+
300 => Google::Cloud::Logging::Type::LogSeverity::NOTICE,
|
1615
|
+
400 => Google::Cloud::Logging::Type::LogSeverity::WARNING,
|
1616
|
+
500 => Google::Cloud::Logging::Type::LogSeverity::ERROR,
|
1617
|
+
600 => Google::Cloud::Logging::Type::LogSeverity::CRITICAL,
|
1618
|
+
700 => Google::Cloud::Logging::Type::LogSeverity::ALERT,
|
1619
|
+
800 => Google::Cloud::Logging::Type::LogSeverity::EMERGENCY
|
1620
|
+
}.freeze
|
1621
|
+
|
1622
|
+
def grpc_severity(severity)
|
1623
|
+
# TODO: find out why this doesn't work.
|
1624
|
+
# if severity.is_a? String
|
1625
|
+
# return Google::Cloud::Logging::Type::LogSeverity.resolve(severity)
|
1626
|
+
# end
|
1627
|
+
return GRPC_SEVERITY_MAPPING[severity] if GRPC_SEVERITY_MAPPING.key?(severity)
|
1628
|
+
|
1629
|
+
severity
|
1630
|
+
end
|
1631
|
+
|
1632
|
+
def parse_string(value)
|
1633
|
+
value.to_s
|
1634
|
+
end
|
1635
|
+
|
1636
|
+
def parse_int(value)
|
1637
|
+
value.to_i
|
1638
|
+
end
|
1639
|
+
|
1640
|
+
def parse_bool(value)
|
1641
|
+
[true, 'true', 1].include?(value)
|
1642
|
+
end
|
1643
|
+
|
1644
|
+
def parse_latency(latency)
|
1645
|
+
# Parse latency.
|
1646
|
+
# If no valid format is detected, return nil so we can later skip
|
1647
|
+
# setting latency.
|
1648
|
+
# Format: whitespace (opt.) + integer + point & decimal (opt.)
|
1649
|
+
# + whitespace (opt.) + "s" + whitespace (opt.)
|
1650
|
+
# e.g.: "1.42 s"
|
1651
|
+
match = @compiled_http_latency_regexp.match(latency)
|
1652
|
+
return nil unless match
|
1653
|
+
|
1654
|
+
# Split the integer and decimal parts in order to calculate
|
1655
|
+
# seconds and nanos.
|
1656
|
+
seconds = match['seconds'].to_i
|
1657
|
+
nanos = (match['decimal'].to_f * 1000 * 1000 * 1000).round
|
1658
|
+
if @use_grpc
|
1659
|
+
Google::Protobuf::Duration.new(
|
1660
|
+
seconds: seconds,
|
1661
|
+
nanos: nanos
|
1662
|
+
)
|
1663
|
+
else
|
1664
|
+
{
|
1665
|
+
seconds: seconds,
|
1666
|
+
nanos: nanos
|
1667
|
+
}.delete_if { |_, v| v.zero? }
|
1668
|
+
end
|
1669
|
+
end
|
1670
|
+
|
1671
|
+
def format(tag, time, record)
|
1672
|
+
Fluent::MessagePackFactory
|
1673
|
+
.engine_factory
|
1674
|
+
.packer
|
1675
|
+
.write([tag, time, record])
|
1676
|
+
.to_s
|
1677
|
+
end
|
1678
|
+
|
1679
|
+
# Given a tag, returns the corresponding valid tag if possible, or nil if
|
1680
|
+
# the tag should be rejected. If 'require_valid_tags' is false, non-string
|
1681
|
+
# tags are converted to strings, and invalid characters are sanitized;
|
1682
|
+
# otherwise such tags are rejected.
|
1683
|
+
def sanitize_tag(tag)
|
1684
|
+
if @require_valid_tags &&
|
1685
|
+
(!tag.is_a?(String) || tag == '' || convert_to_utf8(tag) != tag)
|
1686
|
+
return nil
|
1687
|
+
end
|
1688
|
+
|
1689
|
+
tag = convert_to_utf8(tag.to_s)
|
1690
|
+
tag = '_' if tag == ''
|
1691
|
+
tag
|
1692
|
+
end
|
1693
|
+
|
1694
|
+
# For every original_label => new_label pair in the label_map, delete the
|
1695
|
+
# original_label from the hash map if it exists, and extract the value to
|
1696
|
+
# form a map with the new_label as the key.
|
1697
|
+
def delete_and_extract_labels(hash, label_map)
|
1698
|
+
return {} if label_map.nil? || !label_map.is_a?(Hash) ||
|
1699
|
+
hash.nil? || !hash.is_a?(Hash)
|
1700
|
+
|
1701
|
+
label_map.each_with_object({}) \
|
1702
|
+
do |(original_label, new_label), extracted_labels|
|
1703
|
+
value = hash.delete(original_label)
|
1704
|
+
extracted_labels[new_label] = convert_to_utf8(value.to_s) if value
|
1705
|
+
end
|
1706
|
+
end
|
1707
|
+
|
1708
|
+
def value_from_ruby(value)
|
1709
|
+
ret = Google::Protobuf::Value.new
|
1710
|
+
case value
|
1711
|
+
when NilClass
|
1712
|
+
ret.null_value = 0
|
1713
|
+
when Numeric
|
1714
|
+
ret.number_value = value
|
1715
|
+
when String
|
1716
|
+
ret.string_value = convert_to_utf8(value)
|
1717
|
+
when TrueClass
|
1718
|
+
ret.bool_value = true
|
1719
|
+
when FalseClass
|
1720
|
+
ret.bool_value = false
|
1721
|
+
when Google::Protobuf::Struct
|
1722
|
+
ret.struct_value = value
|
1723
|
+
when Hash
|
1724
|
+
ret.struct_value = struct_from_ruby(value)
|
1725
|
+
when Google::Protobuf::ListValue
|
1726
|
+
ret.list_value = value
|
1727
|
+
when Array
|
1728
|
+
ret.list_value = list_from_ruby(value)
|
1729
|
+
else
|
1730
|
+
@log.error "Unknown type: #{value.class}"
|
1731
|
+
raise Google::Protobuf::Error, "Unknown type: #{value.class}"
|
1732
|
+
end
|
1733
|
+
ret
|
1734
|
+
end
|
1735
|
+
|
1736
|
+
def list_from_ruby(arr)
|
1737
|
+
ret = Google::Protobuf::ListValue.new
|
1738
|
+
arr.each do |v|
|
1739
|
+
ret.values << value_from_ruby(v)
|
1740
|
+
end
|
1741
|
+
ret
|
1742
|
+
end
|
1743
|
+
|
1744
|
+
def struct_from_ruby(hash)
|
1745
|
+
ret = Google::Protobuf::Struct.new
|
1746
|
+
hash.each do |k, v|
|
1747
|
+
ret.fields[convert_to_utf8(k.to_s)] ||= value_from_ruby(v)
|
1748
|
+
end
|
1749
|
+
ret
|
1750
|
+
end
|
1751
|
+
|
1752
|
+
# TODO(qingling128): Fix the inconsistent behavior of 'message', 'log' and
|
1753
|
+
# 'msg' in the next major version 1.0.0.
|
1754
|
+
def set_payload(resource_type, record, entry, is_json)
|
1755
|
+
# Only one of {text_payload, json_payload} will be set.
|
1756
|
+
text_payload = nil
|
1757
|
+
json_payload = nil
|
1758
|
+
# Use JSON if we found valid JSON, or text payload in the following
|
1759
|
+
# cases:
|
1760
|
+
# 1. This is an unstructured Container log and the 'log' key is available
|
1761
|
+
# 2. The only remaining key is 'message'
|
1762
|
+
if is_json
|
1763
|
+
json_payload = record
|
1764
|
+
elsif GKE_CONSTANTS[:resource_type] == resource_type && record.key?('log')
|
1765
|
+
text_payload = record['log']
|
1766
|
+
elsif record.size == 1 && record.key?('message')
|
1767
|
+
text_payload = record['message']
|
1768
|
+
else
|
1769
|
+
json_payload = record
|
1770
|
+
end
|
1771
|
+
|
1772
|
+
if json_payload
|
1773
|
+
entry.json_payload = if @use_grpc
|
1774
|
+
struct_from_ruby(json_payload)
|
1775
|
+
else
|
1776
|
+
json_payload
|
1777
|
+
end
|
1778
|
+
elsif text_payload
|
1779
|
+
text_payload = text_payload.to_s
|
1780
|
+
entry.text_payload = if @use_grpc
|
1781
|
+
convert_to_utf8(text_payload)
|
1782
|
+
else
|
1783
|
+
text_payload
|
1784
|
+
end
|
1785
|
+
end
|
1786
|
+
end
|
1787
|
+
|
1788
|
+
def log_name(tag, resource)
|
1789
|
+
if resource.type == APPENGINE_CONSTANTS[:resource_type]
|
1790
|
+
# Add a prefix to Managed VM logs to prevent namespace collisions.
|
1791
|
+
tag = "#{APPENGINE_CONSTANTS[:service]}/#{tag}"
|
1792
|
+
elsif resource.type == GKE_CONSTANTS[:resource_type]
|
1793
|
+
# For Kubernetes logs, use just the container name as the log name
|
1794
|
+
# if we have it.
|
1795
|
+
if resource.labels&.key?('container_name')
|
1796
|
+
sanitized_tag = sanitize_tag(resource.labels['container_name'])
|
1797
|
+
tag = sanitized_tag unless sanitized_tag.nil?
|
1798
|
+
end
|
1799
|
+
end
|
1800
|
+
ERB::Util.url_encode(tag)
|
1801
|
+
end
|
1802
|
+
|
1803
|
+
def init_api_client
|
1804
|
+
# Set up the logger for the auto-generated Google Cloud APIs.
|
1805
|
+
Google::Apis.logger = @log
|
1806
|
+
if @use_grpc
|
1807
|
+
uri = URI.parse(@logging_api_url)
|
1808
|
+
host = uri.host
|
1809
|
+
unless host
|
1810
|
+
raise Fluent::ConfigError,
|
1811
|
+
'The logging_api_url option specifies an invalid URL:' \
|
1812
|
+
" #{@logging_api_url}."
|
1813
|
+
end
|
1814
|
+
if @grpc_compression_algorithm
|
1815
|
+
compression_options =
|
1816
|
+
GRPC::Core::CompressionOptions.new(
|
1817
|
+
default_algorithm: @grpc_compression_algorithm
|
1818
|
+
)
|
1819
|
+
compression_channel_args = compression_options.to_channel_arg_hash
|
1820
|
+
else
|
1821
|
+
compression_channel_args = {}
|
1822
|
+
end
|
1823
|
+
if uri.scheme == 'https'
|
1824
|
+
ssl_creds = GRPC::Core::ChannelCredentials.new
|
1825
|
+
authentication = Google::Auth.get_application_default
|
1826
|
+
creds = GRPC::Core::CallCredentials.new(authentication.updater_proc)
|
1827
|
+
creds = ssl_creds.compose(creds)
|
1828
|
+
else
|
1829
|
+
creds = :this_channel_is_insecure
|
1830
|
+
end
|
1831
|
+
port = ":#{uri.port}" if uri.port
|
1832
|
+
user_agent = \
|
1833
|
+
"#{PLUGIN_NAME}/#{PLUGIN_VERSION} grpc-ruby/#{GRPC::VERSION} " \
|
1834
|
+
"#{Google::Apis::OS_VERSION}"
|
1835
|
+
channel_args = { 'grpc.primary_user_agent' => user_agent }
|
1836
|
+
.merge!(compression_channel_args)
|
1837
|
+
@client = Google::Cloud::Logging::V2::LoggingService::Client.new do |config|
|
1838
|
+
config.credentials = GRPC::Core::Channel.new(
|
1839
|
+
"#{host}#{port}", channel_args, creds
|
1840
|
+
)
|
1841
|
+
end
|
1842
|
+
else
|
1843
|
+
# TODO: Use a non-default ClientOptions object.
|
1844
|
+
Google::Apis::ClientOptions.default.application_name = PLUGIN_NAME
|
1845
|
+
Google::Apis::ClientOptions.default.application_version = PLUGIN_VERSION
|
1846
|
+
@client = Google::Apis::LoggingV2::LoggingService.new
|
1847
|
+
@client.authorization = Google::Auth.get_application_default(
|
1848
|
+
Common::LOGGING_SCOPE
|
1849
|
+
)
|
1850
|
+
end
|
1851
|
+
end
|
1852
|
+
|
1853
|
+
def api_client
|
1854
|
+
# For gRPC side, the Channel will take care of tokens and their renewal
|
1855
|
+
# (https://grpc.io/docs/guides/auth.html#authentication-api).
|
1856
|
+
if !@use_grpc && @client.authorization.expired?
|
1857
|
+
begin
|
1858
|
+
@client.authorization.fetch_access_token!
|
1859
|
+
rescue MultiJson::ParseError
|
1860
|
+
# Workaround an issue in the API client; just re-raise a more
|
1861
|
+
# descriptive error for the user (which will still cause a retry).
|
1862
|
+
raise Google::APIClient::ClientError,
|
1863
|
+
'Unable to fetch access token (no scopes configured?)'
|
1864
|
+
end
|
1865
|
+
end
|
1866
|
+
@client
|
1867
|
+
end
|
1868
|
+
|
1869
|
+
# Encode as UTF-8. If 'coerce_to_utf8' is set to true in the config, any
|
1870
|
+
# non-UTF-8 character would be replaced by the string specified by
|
1871
|
+
# 'non_utf8_replacement_string'. If 'coerce_to_utf8' is set to false, any
|
1872
|
+
# non-UTF-8 character would trigger the plugin to error out.
|
1873
|
+
def convert_to_utf8(input)
|
1874
|
+
if @coerce_to_utf8
|
1875
|
+
input.encode(
|
1876
|
+
'utf-8',
|
1877
|
+
invalid: :replace,
|
1878
|
+
undef: :replace,
|
1879
|
+
replace: @non_utf8_replacement_string
|
1880
|
+
)
|
1881
|
+
else
|
1882
|
+
begin
|
1883
|
+
input.encode('utf-8')
|
1884
|
+
rescue EncodingError
|
1885
|
+
@log.error 'Encountered encoding issues potentially due to non ' \
|
1886
|
+
'UTF-8 characters. To allow non-UTF-8 characters and ' \
|
1887
|
+
'replace them with spaces, please set "coerce_to_utf8" ' \
|
1888
|
+
'to true.'
|
1889
|
+
raise
|
1890
|
+
end
|
1891
|
+
end
|
1892
|
+
end
|
1893
|
+
|
1894
|
+
# Extract a map of error details from a potentially partially successful
|
1895
|
+
# REST request.
|
1896
|
+
#
|
1897
|
+
# The keys in this map are [error_code, error_message] pairs, and the values
|
1898
|
+
# are a list of stringified indexes of log entries that failed due to this
|
1899
|
+
# error.
|
1900
|
+
#
|
1901
|
+
# A sample error.body looks like:
|
1902
|
+
# {
|
1903
|
+
# "error": {
|
1904
|
+
# "code": 403,
|
1905
|
+
# "message": "User not authorized.",
|
1906
|
+
# "status": "PERMISSION_DENIED",
|
1907
|
+
# "details": [
|
1908
|
+
# {
|
1909
|
+
# "@type": "type.googleapis.com/google.logging.v2.WriteLogEntriesPar
|
1910
|
+
# tialErrors",
|
1911
|
+
# "logEntryErrors": {
|
1912
|
+
# "0": {
|
1913
|
+
# "code": 7,
|
1914
|
+
# "message": "User not authorized."
|
1915
|
+
# },
|
1916
|
+
# "1": {
|
1917
|
+
# "code": 3,
|
1918
|
+
# "message": "Log name contains illegal character :"
|
1919
|
+
# },
|
1920
|
+
# "3": {
|
1921
|
+
# "code": 3,
|
1922
|
+
# "message": "Log name contains illegal character :"
|
1923
|
+
# }
|
1924
|
+
# }
|
1925
|
+
# },
|
1926
|
+
# {
|
1927
|
+
# "@type": "type.googleapis.com/google.rpc.DebugInfo",
|
1928
|
+
# "detail": ...
|
1929
|
+
# }
|
1930
|
+
# ]
|
1931
|
+
# }
|
1932
|
+
# }
|
1933
|
+
#
|
1934
|
+
# The root level "code", "message", and "status" simply match the root
|
1935
|
+
# cause of the first failed log entry. For example, if we switched the order
|
1936
|
+
# of the log entries, then we would get:
|
1937
|
+
# {
|
1938
|
+
# "error" : {
|
1939
|
+
# "code" : 400,
|
1940
|
+
# "message" : "Log name contains illegal character :",
|
1941
|
+
# "status" : "INVALID_ARGUMENT",
|
1942
|
+
# "details": ...
|
1943
|
+
# }
|
1944
|
+
# }
|
1945
|
+
# We will ignore it anyway and look at the details instead which includes
|
1946
|
+
# info for all failed log entries.
|
1947
|
+
#
|
1948
|
+
# In this example, the logEntryErrors that we care are:
|
1949
|
+
# {
|
1950
|
+
# "0": {
|
1951
|
+
# "code": 7,
|
1952
|
+
# "message": "User not authorized."
|
1953
|
+
# },
|
1954
|
+
# "1": {
|
1955
|
+
# "code": 3,
|
1956
|
+
# "message": "Log name contains illegal character :"
|
1957
|
+
# },
|
1958
|
+
# "3": {
|
1959
|
+
# "code": 3,
|
1960
|
+
# "message": "Log name contains illegal character :"
|
1961
|
+
# }
|
1962
|
+
# }
|
1963
|
+
#
|
1964
|
+
# The ultimate map that is constructed is:
|
1965
|
+
# {
|
1966
|
+
# [7, 'User not authorized.']: ['0'],
|
1967
|
+
# [3, 'Log name contains illegal character :']: ['1', '3']
|
1968
|
+
# }
|
1969
|
+
def construct_error_details_map(error)
|
1970
|
+
error_details_map = Hash.new { |h, k| h[k] = [] }
|
1971
|
+
|
1972
|
+
error_details = ensure_array(
|
1973
|
+
ensure_hash(ensure_hash(JSON.parse(error.body))['error'])['details']
|
1974
|
+
)
|
1975
|
+
partial_errors = error_details.detect(
|
1976
|
+
-> { raise JSON::ParserError, "No type #{PARTIAL_ERROR_FIELD}." }
|
1977
|
+
) do |error_detail|
|
1978
|
+
ensure_hash(error_detail)['@type'] == PARTIAL_ERROR_FIELD
|
1979
|
+
end
|
1980
|
+
log_entry_errors = ensure_hash(
|
1981
|
+
ensure_hash(partial_errors)['logEntryErrors']
|
1982
|
+
)
|
1983
|
+
log_entry_errors.each do |index, log_entry_error|
|
1984
|
+
error_hash = ensure_hash(log_entry_error)
|
1985
|
+
unless error_hash['code'] && error_hash['message']
|
1986
|
+
raise JSON::ParserError,
|
1987
|
+
"Entry #{index} is missing 'code' or 'message'."
|
1988
|
+
end
|
1989
|
+
error_key = [error_hash['code'], error_hash['message']].freeze
|
1990
|
+
# TODO(qingling128): Convert indexes to integers.
|
1991
|
+
error_details_map[error_key] << index
|
1992
|
+
end
|
1993
|
+
error_details_map
|
1994
|
+
rescue JSON::ParserError => e
|
1995
|
+
@log.warn 'Failed to extract log entry errors from the error details:' \
|
1996
|
+
" #{error.body}.", error: e
|
1997
|
+
{}
|
1998
|
+
end
|
1999
|
+
|
2000
|
+
# Extract a map of error details from a potentially partially successful
|
2001
|
+
# gRPC request.
|
2002
|
+
#
|
2003
|
+
# The keys in this map are [error_code, error_message] pairs, and the values
|
2004
|
+
# are a list of indexes of log entries that failed due to this error.
|
2005
|
+
#
|
2006
|
+
# A sample error looks like:
|
2007
|
+
# <Google::Cloud::PermissionDeniedError:
|
2008
|
+
# message: 'User not authorized.',
|
2009
|
+
# details: [
|
2010
|
+
# <Google::Cloud::Logging::V2::WriteLogEntriesPartialErrors:
|
2011
|
+
# log_entry_errors: {
|
2012
|
+
# 0 => <Google::Rpc::Status:
|
2013
|
+
# code: 7,
|
2014
|
+
# message: "User not authorized.",
|
2015
|
+
# details: []>,
|
2016
|
+
# 1 => <Google::Rpc::Status:
|
2017
|
+
# code: 3,
|
2018
|
+
# message: "Log name contains illegal character :",
|
2019
|
+
# details: []>,
|
2020
|
+
# 3 => <Google::Rpc::Status:
|
2021
|
+
# code: 3,
|
2022
|
+
# message: "Log name contains illegal character :",
|
2023
|
+
# details: []>
|
2024
|
+
# }
|
2025
|
+
# >,
|
2026
|
+
# <Google::Rpc::DebugInfo:
|
2027
|
+
# stack_entries: [],
|
2028
|
+
# detail: "..."
|
2029
|
+
# >
|
2030
|
+
# ]
|
2031
|
+
# cause: <GRPC::PermissionDenied: 7:User not authorized.>
|
2032
|
+
# }
|
2033
|
+
#
|
2034
|
+
# The ultimate map that is constructed is:
|
2035
|
+
# {
|
2036
|
+
# [7, 'User not authorized.']: [0],
|
2037
|
+
# [3, 'Log name contains illegal character :']: [1, 3]
|
2038
|
+
# }
|
2039
|
+
def construct_error_details_map_grpc(gax_error)
|
2040
|
+
@log.error "construct_error_details_map_grpc: #{gax_error}"
|
2041
|
+
error_details_map = Hash.new { |h, k| h[k] = [] }
|
2042
|
+
error_details = ensure_array(gax_error.status_details)
|
2043
|
+
raise JSON::ParserError, 'The error details are empty.' if
|
2044
|
+
error_details.empty?
|
2045
|
+
raise JSON::ParserError, 'No partial error info in error details.' unless
|
2046
|
+
error_details[0].is_a?(
|
2047
|
+
Google::Cloud::Logging::V2::WriteLogEntriesPartialErrors
|
2048
|
+
)
|
2049
|
+
|
2050
|
+
log_entry_errors = ensure_hash(error_details[0].log_entry_errors)
|
2051
|
+
log_entry_errors.each do |index, log_entry_error|
|
2052
|
+
error_key = [log_entry_error[:code], log_entry_error[:message]].freeze
|
2053
|
+
error_details_map[error_key] << index
|
2054
|
+
end
|
2055
|
+
error_details_map
|
2056
|
+
rescue JSON::ParserError => e
|
2057
|
+
@log.warn 'Failed to extract log entry errors from the error details:' \
|
2058
|
+
" #{gax_error.details.inspect}.", error: e
|
2059
|
+
{}
|
2060
|
+
end
|
2061
|
+
|
2062
|
+
# Take a locally unique resource id and convert it to the globally unique
|
2063
|
+
# monitored resource.
|
2064
|
+
def monitored_resource_from_local_resource_id(local_resource_id)
|
2065
|
+
return unless
|
2066
|
+
/^
|
2067
|
+
(?<resource_type>k8s_container)
|
2068
|
+
\.(?<namespace_name>[0-9a-z-]+)
|
2069
|
+
\.(?<pod_name>[.0-9a-z-]+)
|
2070
|
+
\.(?<container_name>[0-9a-z-]+)$/x =~ local_resource_id ||
|
2071
|
+
/^
|
2072
|
+
(?<resource_type>k8s_pod)
|
2073
|
+
\.(?<namespace_name>[0-9a-z-]+)
|
2074
|
+
\.(?<pod_name>[.0-9a-z-]+)$/x =~ local_resource_id ||
|
2075
|
+
/^
|
2076
|
+
(?<resource_type>k8s_node)
|
2077
|
+
\.(?<node_name>[0-9a-z-]+)$/x =~ local_resource_id
|
2078
|
+
|
2079
|
+
# Clear name and location if they're explicitly set to empty.
|
2080
|
+
@k8s_cluster_name = nil if @k8s_cluster_name == ''
|
2081
|
+
@k8s_cluster_location = nil if @k8s_cluster_location == ''
|
2082
|
+
|
2083
|
+
begin
|
2084
|
+
@k8s_cluster_name ||= @utils.fetch_gce_metadata(
|
2085
|
+
@platform, 'instance/attributes/cluster-name'
|
2086
|
+
)
|
2087
|
+
@k8s_cluster_location ||= @utils.fetch_gce_metadata(
|
2088
|
+
@platform, 'instance/attributes/cluster-location'
|
2089
|
+
)
|
2090
|
+
rescue StandardError => e
|
2091
|
+
@log.error 'Failed to retrieve k8s cluster name and location.', \
|
2092
|
+
error: e
|
2093
|
+
end
|
2094
|
+
case resource_type
|
2095
|
+
when K8S_CONTAINER_CONSTANTS[:resource_type]
|
2096
|
+
labels = {
|
2097
|
+
'namespace_name' => namespace_name,
|
2098
|
+
'pod_name' => pod_name,
|
2099
|
+
'container_name' => container_name,
|
2100
|
+
'cluster_name' => @k8s_cluster_name,
|
2101
|
+
'location' => @k8s_cluster_location
|
2102
|
+
}
|
2103
|
+
fallback_resource = GKE_CONSTANTS[:resource_type]
|
2104
|
+
when K8S_POD_CONSTANTS[:resource_type]
|
2105
|
+
labels = {
|
2106
|
+
'namespace_name' => namespace_name,
|
2107
|
+
'pod_name' => pod_name,
|
2108
|
+
'cluster_name' => @k8s_cluster_name,
|
2109
|
+
'location' => @k8s_cluster_location
|
2110
|
+
}
|
2111
|
+
fallback_resource = GKE_CONSTANTS[:resource_type]
|
2112
|
+
when K8S_NODE_CONSTANTS[:resource_type]
|
2113
|
+
labels = {
|
2114
|
+
'node_name' => node_name,
|
2115
|
+
'cluster_name' => @k8s_cluster_name,
|
2116
|
+
'location' => @k8s_cluster_location
|
2117
|
+
}
|
2118
|
+
fallback_resource = COMPUTE_CONSTANTS[:resource_type]
|
2119
|
+
end
|
2120
|
+
unless @k8s_cluster_name && @k8s_cluster_location
|
2121
|
+
@log.error "Failed to construct #{resource_type} resource locally." \
|
2122
|
+
' Falling back to writing logs against' \
|
2123
|
+
" #{fallback_resource} resource.", error: e
|
2124
|
+
return
|
2125
|
+
end
|
2126
|
+
constructed_resource = Google::Apis::LoggingV2::MonitoredResource.new(
|
2127
|
+
type: resource_type,
|
2128
|
+
labels: labels
|
2129
|
+
)
|
2130
|
+
@log.debug("Constructed #{resource_type} resource locally: " \
|
2131
|
+
"#{constructed_resource.inspect}")
|
2132
|
+
constructed_resource
|
2133
|
+
end
|
2134
|
+
|
2135
|
+
# Convert the value to a Ruby array.
|
2136
|
+
def ensure_array(value)
|
2137
|
+
Array.try_convert(value) || (raise JSON::ParserError, value.class.to_s)
|
2138
|
+
end
|
2139
|
+
|
2140
|
+
# Convert the value to a Ruby hash.
|
2141
|
+
def ensure_hash(value)
|
2142
|
+
Hash.try_convert(value) || (raise JSON::ParserError, value.class.to_s)
|
2143
|
+
end
|
2144
|
+
|
2145
|
+
# Increment the metric for the number of successful requests.
|
2146
|
+
def increment_successful_requests_count
|
2147
|
+
return unless @successful_requests_count
|
2148
|
+
|
2149
|
+
@successful_requests_count.increment(
|
2150
|
+
labels: { grpc: @use_grpc, code: @ok_code }
|
2151
|
+
)
|
2152
|
+
end
|
2153
|
+
|
2154
|
+
# Increment the metric for the number of failed requests, labeled by
|
2155
|
+
# the provided status code.
|
2156
|
+
def increment_failed_requests_count(code)
|
2157
|
+
return unless @failed_requests_count
|
2158
|
+
|
2159
|
+
@failed_requests_count.increment(
|
2160
|
+
labels: { grpc: @use_grpc, code: code }
|
2161
|
+
)
|
2162
|
+
end
|
2163
|
+
|
2164
|
+
# Increment the metric for the number of log entries, successfully
|
2165
|
+
# ingested by the Stackdriver Logging API.
|
2166
|
+
def increment_ingested_entries_count(count)
|
2167
|
+
return unless @ingested_entries_count
|
2168
|
+
|
2169
|
+
@ingested_entries_count.increment(
|
2170
|
+
labels: { grpc: @use_grpc, code: @ok_code }, by: count
|
2171
|
+
)
|
2172
|
+
end
|
2173
|
+
|
2174
|
+
# Increment the metric for the number of log entries that were dropped
|
2175
|
+
# and not ingested by the Stackdriver Logging API.
|
2176
|
+
def increment_dropped_entries_count(count, code)
|
2177
|
+
return unless @dropped_entries_count
|
2178
|
+
|
2179
|
+
@dropped_entries_count.increment(
|
2180
|
+
labels: { grpc: @use_grpc, code: code }, by: count
|
2181
|
+
)
|
2182
|
+
end
|
2183
|
+
|
2184
|
+
# Increment the metric for the number of log entries that were dropped
|
2185
|
+
# and not ingested by the Stackdriver Logging API.
|
2186
|
+
def increment_retried_entries_count(count, code)
|
2187
|
+
return unless @retried_entries_count
|
2188
|
+
|
2189
|
+
@retried_entries_count.increment(
|
2190
|
+
labels: { grpc: @use_grpc, code: code }, by: count
|
2191
|
+
)
|
2192
|
+
end
|
2193
|
+
end
|
2194
|
+
end
|
2195
|
+
|
2196
|
+
module Google
|
2197
|
+
module Apis
|
2198
|
+
module LoggingV2
|
2199
|
+
# Override MonitoredResource::dup to make a deep copy.
|
2200
|
+
class MonitoredResource
|
2201
|
+
def dup
|
2202
|
+
ret = super
|
2203
|
+
ret.labels = labels.dup
|
2204
|
+
ret
|
2205
|
+
end
|
2206
|
+
end
|
2207
|
+
end
|
2208
|
+
end
|
2209
|
+
end
|