fluent-plugin-google-cloud 0.8.4 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +49 -35
- data/fluent-plugin-google-cloud.gemspec +3 -3
- data/lib/fluent/plugin/common.rb +386 -0
- data/lib/fluent/plugin/filter_analyze_config.rb +344 -0
- data/lib/fluent/plugin/out_google_cloud.rb +81 -360
- data/test/plugin/asserts.rb +77 -0
- data/test/plugin/base_test.rb +71 -186
- data/test/plugin/constants.rb +67 -1
- data/test/plugin/data/google-fluentd-baseline.conf +24 -0
- data/test/plugin/data/google-fluentd-custom.conf +40 -0
- data/test/plugin/test_driver.rb +1 -14
- data/test/plugin/test_filter_analyze_config.rb +200 -0
- data/test/plugin/utils.rb +147 -0
- metadata +19 -7
@@ -0,0 +1,344 @@
|
|
1
|
+
# Copyright 2020 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
require 'fileutils'
|
16
|
+
require 'fluent/config'
|
17
|
+
require 'fluent/config/v1_parser'
|
18
|
+
require 'set'
|
19
|
+
|
20
|
+
require_relative 'common'
|
21
|
+
require_relative 'monitoring'
|
22
|
+
|
23
|
+
module Fluent
|
24
|
+
# Fluentd filter plugin to analyze configuration usage.
|
25
|
+
#
|
26
|
+
# For documentation on inspecting parsed configuration elements, see
|
27
|
+
# https://www.rubydoc.info/github/fluent/fluentd/Fluent/Config/Element
|
28
|
+
class AnalyzeConfigFilter < Filter
|
29
|
+
include Fluent::Config
|
30
|
+
Fluent::Plugin.register_filter('analyze_config', self)
|
31
|
+
|
32
|
+
module Constants
|
33
|
+
# Built-in plugins that are ok to reference in metrics.
|
34
|
+
KNOWN_PLUGINS = {
|
35
|
+
'filter' => Set[
|
36
|
+
'geoip',
|
37
|
+
'grep',
|
38
|
+
'parser',
|
39
|
+
'record_transformer',
|
40
|
+
'stdout',
|
41
|
+
],
|
42
|
+
'match' => Set[
|
43
|
+
'copy',
|
44
|
+
'elasticsearch',
|
45
|
+
'exec',
|
46
|
+
'exec_filter',
|
47
|
+
'file',
|
48
|
+
'forward',
|
49
|
+
'http',
|
50
|
+
'kafka',
|
51
|
+
'mongo',
|
52
|
+
'mongo_replset',
|
53
|
+
'null',
|
54
|
+
'relabel',
|
55
|
+
'rewrite_tag_filter',
|
56
|
+
'roundrobin',
|
57
|
+
's3',
|
58
|
+
'secondary_file',
|
59
|
+
'stdout',
|
60
|
+
'webhdfs',
|
61
|
+
],
|
62
|
+
'source' => Set[
|
63
|
+
'dummy',
|
64
|
+
'exec',
|
65
|
+
'forward',
|
66
|
+
'http',
|
67
|
+
'monitor_agent',
|
68
|
+
'syslog',
|
69
|
+
'tail',
|
70
|
+
'tcp',
|
71
|
+
'udp',
|
72
|
+
'unix',
|
73
|
+
'windows_eventlog',
|
74
|
+
]
|
75
|
+
}.freeze
|
76
|
+
|
77
|
+
# For Google plugins, we collect metrics on the params listed here.
|
78
|
+
GOOGLE_PLUGIN_PARAMS = {
|
79
|
+
'google_cloud' => %w(
|
80
|
+
adjust_invalid_timestamps
|
81
|
+
auth_method
|
82
|
+
autoformat_stackdriver_trace
|
83
|
+
coerce_to_utf8
|
84
|
+
detect_json
|
85
|
+
enable_monitoring
|
86
|
+
gcm_service_address
|
87
|
+
grpc_compression_algorithm
|
88
|
+
http_request_key
|
89
|
+
insert_id_key
|
90
|
+
label_map
|
91
|
+
labels
|
92
|
+
labels_key
|
93
|
+
logging_api_url
|
94
|
+
monitoring_type
|
95
|
+
non_utf8_replacement_string
|
96
|
+
operation_key
|
97
|
+
private_key_email
|
98
|
+
private_key_passphrase
|
99
|
+
private_key_path
|
100
|
+
project_id
|
101
|
+
source_location_key
|
102
|
+
span_id_key
|
103
|
+
statusz_port
|
104
|
+
trace_key
|
105
|
+
trace_sampled_key
|
106
|
+
use_grpc
|
107
|
+
use_metadata_service
|
108
|
+
vm_id
|
109
|
+
vm_name
|
110
|
+
zone
|
111
|
+
),
|
112
|
+
'detect_exceptions' => %w(
|
113
|
+
languages
|
114
|
+
max_bytes
|
115
|
+
max_lines
|
116
|
+
message
|
117
|
+
multiline_flush_interval
|
118
|
+
remove_tag_prefix
|
119
|
+
stream
|
120
|
+
)
|
121
|
+
}.freeze
|
122
|
+
end
|
123
|
+
|
124
|
+
include self::Constants
|
125
|
+
|
126
|
+
# Disable this warning to conform to fluentd config_param conventions.
|
127
|
+
# rubocop:disable Style/HashSyntax
|
128
|
+
|
129
|
+
# The root configuration file of google-fluentd package.
|
130
|
+
# This only applies to Linux.
|
131
|
+
config_param :google_fluentd_config_path,
|
132
|
+
:string,
|
133
|
+
:default => '/etc/google-fluentd/google-fluentd.conf'
|
134
|
+
# Baseline configuration for comparing with local
|
135
|
+
# customizations.
|
136
|
+
config_param :google_fluentd_baseline_config_path,
|
137
|
+
:string,
|
138
|
+
:default => '/etc/google-fluentd/baseline/google-fluentd.conf'
|
139
|
+
|
140
|
+
# What system to use when collecting metrics. Possible values are:
|
141
|
+
# - 'prometheus', in this case default registry in the Prometheus
|
142
|
+
# client library is used, without actually exposing the endpoint
|
143
|
+
# to serve metrics in the Prometheus format.
|
144
|
+
# - any other value will result in the absence of metrics.
|
145
|
+
config_param :monitoring_type, :string,
|
146
|
+
:default => Monitoring::PrometheusMonitoringRegistry.name
|
147
|
+
|
148
|
+
# Override for the Google Cloud Monitoring service hostname, or
|
149
|
+
# `nil` to leave as the default.
|
150
|
+
config_param :gcm_service_address, :string, :default => nil
|
151
|
+
|
152
|
+
# rubocop:enable Style/HashSyntax
|
153
|
+
|
154
|
+
def start
|
155
|
+
super
|
156
|
+
@log = $log # rubocop:disable Style/GlobalVars
|
157
|
+
|
158
|
+
# Initialize the insertID.
|
159
|
+
@log.info 'Started the analyze_config plugin to analyze configuration.'
|
160
|
+
end
|
161
|
+
|
162
|
+
def parse_config(path)
|
163
|
+
data = File.open(path, 'r', &:read)
|
164
|
+
fname = File.basename(path)
|
165
|
+
basepath = File.dirname(path)
|
166
|
+
eval_context = Kernel.binding
|
167
|
+
# Override instance_eval so that LiteralParser does not actually
|
168
|
+
# evaluate the embedded Ruby, but instead just returns the
|
169
|
+
# source string. See
|
170
|
+
# https://github.com/fluent/fluentd/blob/master/lib/fluent/config/literal_parser.rb
|
171
|
+
def eval_context.instance_eval(code)
|
172
|
+
code
|
173
|
+
end
|
174
|
+
Fluent::Config::V1Parser.parse(data, fname, basepath, eval_context)
|
175
|
+
end
|
176
|
+
|
177
|
+
# Returns a name for identifying plugins we ship by default.
|
178
|
+
def default_plugin_name(e)
|
179
|
+
case e['@type']
|
180
|
+
when 'syslog'
|
181
|
+
"#{e.name}/syslog/#{e['protocol_type']}"
|
182
|
+
when 'tail'
|
183
|
+
"#{e.name}/tail/#{File.basename(e['pos_file'], '.pos')}"
|
184
|
+
else
|
185
|
+
"#{e.name}/#{e['@type']}"
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
# Returns a name for identifying plugins not in our default
|
190
|
+
# config. This should not contain arbitrary user-supplied data.
|
191
|
+
def custom_plugin_name(e)
|
192
|
+
if KNOWN_PLUGINS.key?(e.name) &&
|
193
|
+
KNOWN_PLUGINS[e.name].include?(e['@type'])
|
194
|
+
"#{e.name}/#{e['@type']}"
|
195
|
+
else
|
196
|
+
e.name.to_s
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def embedded_ruby?(e)
|
201
|
+
(e.arg.include?('#{') ||
|
202
|
+
e.any? { |_, v| v.include?('#{') } ||
|
203
|
+
e.elements.any? { |ee| embedded_ruby?(ee) })
|
204
|
+
end
|
205
|
+
|
206
|
+
def configure(conf)
|
207
|
+
super
|
208
|
+
if File.file?(@google_fluentd_config_path) &&
|
209
|
+
File.file?(@google_fluentd_baseline_config_path)
|
210
|
+
@log.info(
|
211
|
+
'google-fluentd configuration file found at' \
|
212
|
+
" #{@google_fluentd_config_path}. " \
|
213
|
+
'google-fluentd baseline configuration file found at' \
|
214
|
+
" #{@google_fluentd_baseline_config_path}. " \
|
215
|
+
'google-fluentd Analyzing configuration.')
|
216
|
+
|
217
|
+
utils = Common::Utils.new(@log)
|
218
|
+
platform = utils.detect_platform(true)
|
219
|
+
project_id = utils.get_project_id(platform, nil)
|
220
|
+
vm_id = utils.get_vm_id(platform, nil)
|
221
|
+
zone = utils.get_location(platform, nil, true)
|
222
|
+
|
223
|
+
# All metadata parameters must now be set.
|
224
|
+
utils.check_required_metadata_variables(
|
225
|
+
platform, project_id, zone, vm_id)
|
226
|
+
|
227
|
+
# Retrieve monitored resource.
|
228
|
+
# Fail over to retrieve monitored resource via the legacy path if we
|
229
|
+
# fail to get it from Metadata Agent.
|
230
|
+
resource = utils.determine_agent_level_monitored_resource_via_legacy(
|
231
|
+
platform, nil, false, vm_id, zone)
|
232
|
+
|
233
|
+
unless Monitoring::MonitoringRegistryFactory.supports_monitoring_type(
|
234
|
+
@monitoring_type)
|
235
|
+
@log.warn "monitoring_type '#{@monitoring_type}' is unknown; "\
|
236
|
+
'there will be no metrics'
|
237
|
+
end
|
238
|
+
registry = Monitoring::MonitoringRegistryFactory.create(
|
239
|
+
@monitoring_type, project_id, resource, @gcm_service_address)
|
240
|
+
|
241
|
+
plugin_usage = registry.counter(
|
242
|
+
:stackdriver_enabled_plugins,
|
243
|
+
[:plugin_name, :is_default_plugin, :has_default_value],
|
244
|
+
'Enabled plugins')
|
245
|
+
config_usage = registry.counter(
|
246
|
+
:stackdriver_config_usage,
|
247
|
+
[:plugin_name, :param, :is_present, :has_default_value],
|
248
|
+
'Parameter usage for Google Cloud plugins')
|
249
|
+
config_bool_values = registry.counter(
|
250
|
+
:stackdriver_config_bool_values,
|
251
|
+
[:plugin_name, :param, :value],
|
252
|
+
'Values for bool parameters in Google Cloud plugins')
|
253
|
+
|
254
|
+
config = parse_config(@google_fluentd_config_path)
|
255
|
+
baseline_config = parse_config(@google_fluentd_baseline_config_path)
|
256
|
+
|
257
|
+
# Create hash of all baseline elements by their plugin names.
|
258
|
+
baseline_elements = Hash[baseline_config.elements.collect do |e|
|
259
|
+
[default_plugin_name(e), e]
|
260
|
+
end]
|
261
|
+
baseline_google_element = baseline_config.elements.find do |e|
|
262
|
+
e['@type'] == 'google_cloud'
|
263
|
+
end
|
264
|
+
|
265
|
+
# Look at each top-level config element and see whether it
|
266
|
+
# matches the baseline value.
|
267
|
+
#
|
268
|
+
# Note on custom configurations: If the plugin has a custom
|
269
|
+
# value (e.g. if a tail plugin has pos_file
|
270
|
+
# /var/lib/google-fluentd/pos/my-custom-value.pos), then the
|
271
|
+
# default_plugin_name (e.g. source/tail/my-custom-value) won't
|
272
|
+
# be a key in baseline_elements below, so it won't be
|
273
|
+
# used. Instead it will use the custom_plugin_name
|
274
|
+
# (e.g. source/tail).
|
275
|
+
config.elements.each do |e|
|
276
|
+
plugin_name = default_plugin_name(e)
|
277
|
+
if baseline_elements.key?(plugin_name)
|
278
|
+
is_default_plugin = true
|
279
|
+
has_default_value = (baseline_elements[plugin_name] == e)
|
280
|
+
else
|
281
|
+
plugin_name = custom_plugin_name(e)
|
282
|
+
is_default_plugin = false
|
283
|
+
has_default_value = false
|
284
|
+
end
|
285
|
+
plugin_usage.increment(
|
286
|
+
labels: {
|
287
|
+
plugin_name: plugin_name,
|
288
|
+
is_default_plugin: is_default_plugin,
|
289
|
+
has_default_value: has_default_value,
|
290
|
+
has_ruby_snippet: embedded_ruby?(e)
|
291
|
+
},
|
292
|
+
by: 1)
|
293
|
+
|
294
|
+
# Additional metric for Google plugins (google_cloud and
|
295
|
+
# detect_exceptions).
|
296
|
+
next unless GOOGLE_PLUGIN_PARAMS.key?(e['@type'])
|
297
|
+
GOOGLE_PLUGIN_PARAMS[e['@type']].each do |p|
|
298
|
+
config_usage.increment(
|
299
|
+
labels: {
|
300
|
+
plugin_name: e['@type'],
|
301
|
+
param: p,
|
302
|
+
is_present: e.key?(p),
|
303
|
+
has_default_value: (e.key?(p) &&
|
304
|
+
baseline_google_element.key?(p) &&
|
305
|
+
e[p] == baseline_google_element[p])
|
306
|
+
},
|
307
|
+
by: 1)
|
308
|
+
next unless e.key?(p) && %w(true false).include?(e[p])
|
309
|
+
config_bool_values.increment(
|
310
|
+
labels: {
|
311
|
+
plugin_name: e['@type'],
|
312
|
+
param: p,
|
313
|
+
value: e[p] == 'true'
|
314
|
+
},
|
315
|
+
by: 1)
|
316
|
+
end
|
317
|
+
end
|
318
|
+
else
|
319
|
+
@log.info(
|
320
|
+
'google-fluentd configuration file does not exist at' \
|
321
|
+
" #{@google_fluentd_config_path} or " \
|
322
|
+
'google-fluentd baseline configuration file does not exist at' \
|
323
|
+
" #{@google_fluentd_baseline_config_path} or " \
|
324
|
+
'. Skipping configuration analysis.')
|
325
|
+
end
|
326
|
+
rescue => e
|
327
|
+
# Do not crash the agent due to configuration analysis failures.
|
328
|
+
@log.warn(
|
329
|
+
'Failed to optionally analyze the google-fluentd configuration' \
|
330
|
+
" file. Proceeding anyway. Error: #{e}")
|
331
|
+
end
|
332
|
+
|
333
|
+
def shutdown
|
334
|
+
super
|
335
|
+
end
|
336
|
+
|
337
|
+
# rubocop:disable Lint/UnusedMethodArgument
|
338
|
+
def filter(tag, time, record)
|
339
|
+
# Skip the actual filtering process.
|
340
|
+
record
|
341
|
+
end
|
342
|
+
# rubocop:enable Lint/UnusedMethodArgument
|
343
|
+
end
|
344
|
+
end
|
@@ -28,6 +28,7 @@ require 'google/logging/v2/logging_services_pb'
|
|
28
28
|
require 'google/logging/v2/log_entry_pb'
|
29
29
|
require 'googleauth'
|
30
30
|
|
31
|
+
require_relative 'common'
|
31
32
|
require_relative 'monitoring'
|
32
33
|
require_relative 'statusz'
|
33
34
|
|
@@ -90,73 +91,6 @@ end
|
|
90
91
|
module Fluent
|
91
92
|
# fluentd output plugin for the Stackdriver Logging API
|
92
93
|
class GoogleCloudOutput < BufferedOutput
|
93
|
-
# Constants for service names, resource types and etc.
|
94
|
-
module ServiceConstants
|
95
|
-
APPENGINE_CONSTANTS = {
|
96
|
-
service: 'appengine.googleapis.com',
|
97
|
-
resource_type: 'gae_app',
|
98
|
-
metadata_attributes: %w(gae_backend_name gae_backend_version)
|
99
|
-
}.freeze
|
100
|
-
COMPUTE_CONSTANTS = {
|
101
|
-
service: 'compute.googleapis.com',
|
102
|
-
resource_type: 'gce_instance'
|
103
|
-
}.freeze
|
104
|
-
GKE_CONSTANTS = {
|
105
|
-
service: 'container.googleapis.com',
|
106
|
-
resource_type: 'container',
|
107
|
-
extra_resource_labels: %w(namespace_id pod_id container_name),
|
108
|
-
extra_common_labels: %w(namespace_name pod_name),
|
109
|
-
metadata_attributes: %w(cluster-name cluster-location),
|
110
|
-
stream_severity_map: {
|
111
|
-
'stdout' => 'INFO',
|
112
|
-
'stderr' => 'ERROR'
|
113
|
-
}
|
114
|
-
}.freeze
|
115
|
-
K8S_CONTAINER_CONSTANTS = {
|
116
|
-
resource_type: 'k8s_container'
|
117
|
-
}.freeze
|
118
|
-
K8S_POD_CONSTANTS = {
|
119
|
-
resource_type: 'k8s_pod'
|
120
|
-
}.freeze
|
121
|
-
K8S_NODE_CONSTANTS = {
|
122
|
-
resource_type: 'k8s_node'
|
123
|
-
}.freeze
|
124
|
-
DATAFLOW_CONSTANTS = {
|
125
|
-
service: 'dataflow.googleapis.com',
|
126
|
-
resource_type: 'dataflow_step',
|
127
|
-
extra_resource_labels: %w(region job_name job_id step_id)
|
128
|
-
}.freeze
|
129
|
-
DATAPROC_CONSTANTS = {
|
130
|
-
service: 'cluster.dataproc.googleapis.com',
|
131
|
-
resource_type: 'cloud_dataproc_cluster',
|
132
|
-
metadata_attributes: %w(dataproc-cluster-uuid dataproc-cluster-name)
|
133
|
-
}.freeze
|
134
|
-
EC2_CONSTANTS = {
|
135
|
-
service: 'ec2.amazonaws.com',
|
136
|
-
resource_type: 'aws_ec2_instance'
|
137
|
-
}.freeze
|
138
|
-
ML_CONSTANTS = {
|
139
|
-
service: 'ml.googleapis.com',
|
140
|
-
resource_type: 'ml_job',
|
141
|
-
extra_resource_labels: %w(job_id task_name)
|
142
|
-
}.freeze
|
143
|
-
|
144
|
-
# The map between a subservice name and a resource type.
|
145
|
-
SUBSERVICE_MAP =
|
146
|
-
[APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAFLOW_CONSTANTS,
|
147
|
-
DATAPROC_CONSTANTS, ML_CONSTANTS]
|
148
|
-
.map { |consts| [consts[:service], consts[:resource_type]] }.to_h
|
149
|
-
# Default back to GCE if invalid value is detected.
|
150
|
-
SUBSERVICE_MAP.default = COMPUTE_CONSTANTS[:resource_type]
|
151
|
-
SUBSERVICE_MAP.freeze
|
152
|
-
|
153
|
-
# The map between a resource type and expected subservice attributes.
|
154
|
-
SUBSERVICE_METADATA_ATTRIBUTES =
|
155
|
-
[APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAPROC_CONSTANTS].map do |consts|
|
156
|
-
[consts[:resource_type], consts[:metadata_attributes].to_set]
|
157
|
-
end.to_h.freeze
|
158
|
-
end
|
159
|
-
|
160
94
|
# Constants for configuration.
|
161
95
|
module ConfigConstants
|
162
96
|
# Default values for JSON payload keys to set the "httpRequest",
|
@@ -247,7 +181,7 @@ module Fluent
|
|
247
181
|
.freeze
|
248
182
|
end
|
249
183
|
|
250
|
-
include
|
184
|
+
include Common::ServiceConstants
|
251
185
|
include self::ConfigConstants
|
252
186
|
include self::InternalConstants
|
253
187
|
|
@@ -278,12 +212,6 @@ module Fluent
|
|
278
212
|
end
|
279
213
|
end.freeze
|
280
214
|
|
281
|
-
# Name of the the Google cloud logging write scope.
|
282
|
-
LOGGING_SCOPE = 'https://www.googleapis.com/auth/logging.write'.freeze
|
283
|
-
|
284
|
-
# Address of the metadata service.
|
285
|
-
METADATA_SERVICE_ADDR = '169.254.169.254'.freeze
|
286
|
-
|
287
215
|
# Disable this warning to conform to fluentd config_param conventions.
|
288
216
|
# rubocop:disable Style/HashSyntax
|
289
217
|
|
@@ -442,10 +370,20 @@ module Fluent
|
|
442
370
|
# - 'prometheus', in this case default registry in the Prometheus
|
443
371
|
# client library is used, without actually exposing the endpoint
|
444
372
|
# to serve metrics in the Prometheus format.
|
445
|
-
#
|
373
|
+
# - 'opencensus', in this case the OpenCensus implementation is
|
374
|
+
# used to send metrics directly to Google Cloud Monitoring.
|
375
|
+
# - any other value will result in the absence of metrics.
|
446
376
|
config_param :monitoring_type, :string,
|
447
377
|
:default => Monitoring::PrometheusMonitoringRegistry.name
|
448
378
|
|
379
|
+
# The monitored resource to use for OpenCensus metrics. Only valid
|
380
|
+
# when monitoring_type is set to 'opencensus'. This value is a hash in
|
381
|
+
# the form:
|
382
|
+
# {"type":"gce_instance","labels":{"instance_id":"aaa","zone":"bbb"} (JSON)
|
383
|
+
# or type:gce_instance,labels.instance_id:aaa,labels.zone:bbb (Hash)
|
384
|
+
config_param :metrics_resource, :hash,
|
385
|
+
:symbolize_keys => true, :default => nil
|
386
|
+
|
449
387
|
# Whether to call metadata agent to retrieve monitored resource. This flag
|
450
388
|
# is kept for backwards compatibility, and is no longer used.
|
451
389
|
# TODO: Breaking change. Remove this flag in Logging Agent 2.0.0 release.
|
@@ -495,6 +433,7 @@ module Fluent
|
|
495
433
|
attr_reader :vm_id
|
496
434
|
attr_reader :resource
|
497
435
|
attr_reader :common_labels
|
436
|
+
attr_reader :monitoring_resource
|
498
437
|
|
499
438
|
def initialize
|
500
439
|
super
|
@@ -539,7 +478,9 @@ module Fluent
|
|
539
478
|
|
540
479
|
set_regexp_patterns
|
541
480
|
|
542
|
-
@
|
481
|
+
@utils = Common::Utils.new(@log)
|
482
|
+
|
483
|
+
@platform = @utils.detect_platform(@use_metadata_service)
|
543
484
|
|
544
485
|
# Treat an empty setting of the credentials file path environment variable
|
545
486
|
# as unset. This way the googleauth lib could fetch the credentials
|
@@ -548,12 +489,57 @@ module Fluent
|
|
548
489
|
ENV[CREDENTIALS_PATH_ENV_VAR] == ''
|
549
490
|
|
550
491
|
# Set required variables: @project_id, @vm_id, @vm_name and @zone.
|
551
|
-
|
492
|
+
@project_id = @utils.get_project_id(@platform, @project_id)
|
493
|
+
@vm_id = @utils.get_vm_id(@platform, @vm_id)
|
494
|
+
@vm_name = @utils.get_vm_name(@vm_name)
|
495
|
+
@zone = @utils.get_location(@platform, @zone, @use_aws_availability_zone)
|
496
|
+
|
497
|
+
# All metadata parameters must now be set.
|
498
|
+
@utils.check_required_metadata_variables(
|
499
|
+
@platform, @project_id, @zone, @vm_id)
|
552
500
|
|
553
501
|
# Retrieve monitored resource.
|
554
502
|
# Fail over to retrieve monitored resource via the legacy path if we fail
|
555
503
|
# to get it from Metadata Agent.
|
556
|
-
@resource ||= determine_agent_level_monitored_resource_via_legacy
|
504
|
+
@resource ||= @utils.determine_agent_level_monitored_resource_via_legacy(
|
505
|
+
@platform, @subservice_name, @detect_subservice, @vm_id, @zone)
|
506
|
+
|
507
|
+
if @metrics_resource
|
508
|
+
unless @metrics_resource[:type].is_a?(String)
|
509
|
+
raise Fluent::ConfigError,
|
510
|
+
'metrics_resource.type must be a string:' \
|
511
|
+
" #{@metrics_resource}."
|
512
|
+
end
|
513
|
+
if @metrics_resource.key?(:labels)
|
514
|
+
unless @metrics_resource[:labels].is_a?(Hash)
|
515
|
+
raise Fluent::ConfigError,
|
516
|
+
'metrics_resource.labels must be a hash:' \
|
517
|
+
" #{@metrics_resource}."
|
518
|
+
end
|
519
|
+
extra_keys = @metrics_resource.reject do |k, _|
|
520
|
+
k == :type || k == :labels
|
521
|
+
end
|
522
|
+
unless extra_keys.empty?
|
523
|
+
raise Fluent::ConfigError,
|
524
|
+
"metrics_resource has unrecognized keys: #{extra_keys.keys}."
|
525
|
+
end
|
526
|
+
else
|
527
|
+
extra_keys = @metrics_resource.reject do |k, _|
|
528
|
+
k == :type || k.to_s.start_with?('labels.')
|
529
|
+
end
|
530
|
+
unless extra_keys.empty?
|
531
|
+
raise Fluent::ConfigError,
|
532
|
+
"metrics_resource has unrecognized keys: #{extra_keys.keys}."
|
533
|
+
end
|
534
|
+
# Transform the Hash form of the metrics_resource config if necessary.
|
535
|
+
resource_type = @metrics_resource[:type]
|
536
|
+
resource_labels = @metrics_resource.each_with_object({}) \
|
537
|
+
do |(k, v), h|
|
538
|
+
h[k.to_s.sub('labels.', '')] = v if k.to_s.start_with? 'labels.'
|
539
|
+
end
|
540
|
+
@metrics_resource = { type: resource_type, labels: resource_labels }
|
541
|
+
end
|
542
|
+
end
|
557
543
|
|
558
544
|
# If monitoring is enabled, register metrics in the default registry
|
559
545
|
# and store metric objects for future use.
|
@@ -563,9 +549,15 @@ module Fluent
|
|
563
549
|
@log.warn "monitoring_type '#{@monitoring_type}' is unknown; "\
|
564
550
|
'there will be no metrics'
|
565
551
|
end
|
552
|
+
if @metrics_resource
|
553
|
+
@monitoring_resource = @utils.create_monitored_resource(
|
554
|
+
@metrics_resource[:type], @metrics_resource[:labels])
|
555
|
+
else
|
556
|
+
@monitoring_resource = @resource
|
557
|
+
end
|
566
558
|
@registry = Monitoring::MonitoringRegistryFactory
|
567
|
-
.create(@monitoring_type, @project_id,
|
568
|
-
@gcm_service_address)
|
559
|
+
.create(@monitoring_type, @project_id,
|
560
|
+
@monitoring_resource, @gcm_service_address)
|
569
561
|
# Export metrics every 60 seconds.
|
570
562
|
timer_execute(:export_metrics, 60) { @registry.export }
|
571
563
|
# Uptime should be a gauge, but the metric definition is a counter and
|
@@ -611,7 +603,7 @@ module Fluent
|
|
611
603
|
|
612
604
|
# Determine the common labels that should be added to all log entries
|
613
605
|
# processed by this logging agent.
|
614
|
-
@common_labels = determine_agent_level_common_labels
|
606
|
+
@common_labels = determine_agent_level_common_labels(@resource)
|
615
607
|
|
616
608
|
# The resource and labels are now set up; ensure they can't be modified
|
617
609
|
# without first duping them.
|
@@ -627,7 +619,7 @@ module Fluent
|
|
627
619
|
@write_request = method(:write_request_via_rest)
|
628
620
|
end
|
629
621
|
|
630
|
-
if [Platform::GCE, Platform::EC2].include?(@platform)
|
622
|
+
if [Common::Platform::GCE, Common::Platform::EC2].include?(@platform)
|
631
623
|
# Log an informational message containing the Logs viewer URL
|
632
624
|
@log.info 'Logs viewer address: https://console.cloud.google.com/logs/',
|
633
625
|
"viewer?project=#{@project_id}&resource=#{@resource.type}/",
|
@@ -1094,66 +1086,6 @@ module Fluent
|
|
1094
1086
|
nil
|
1095
1087
|
end
|
1096
1088
|
|
1097
|
-
# "enum" of Platform values
|
1098
|
-
module Platform
|
1099
|
-
OTHER = 0 # Other/unkown platform
|
1100
|
-
GCE = 1 # Google Compute Engine
|
1101
|
-
EC2 = 2 # Amazon EC2
|
1102
|
-
end
|
1103
|
-
|
1104
|
-
# Determine what platform we are running on by consulting the metadata
|
1105
|
-
# service (unless the user has explicitly disabled using that).
|
1106
|
-
def detect_platform
|
1107
|
-
unless @use_metadata_service
|
1108
|
-
@log.info 'use_metadata_service is false; not detecting platform'
|
1109
|
-
return Platform::OTHER
|
1110
|
-
end
|
1111
|
-
|
1112
|
-
begin
|
1113
|
-
open('http://' + METADATA_SERVICE_ADDR, proxy: false) do |f|
|
1114
|
-
if f.meta['metadata-flavor'] == 'Google'
|
1115
|
-
@log.info 'Detected GCE platform'
|
1116
|
-
return Platform::GCE
|
1117
|
-
end
|
1118
|
-
if f.meta['server'] == 'EC2ws'
|
1119
|
-
@log.info 'Detected EC2 platform'
|
1120
|
-
return Platform::EC2
|
1121
|
-
end
|
1122
|
-
end
|
1123
|
-
rescue StandardError => e
|
1124
|
-
@log.error 'Failed to access metadata service: ', error: e
|
1125
|
-
end
|
1126
|
-
|
1127
|
-
@log.info 'Unable to determine platform'
|
1128
|
-
Platform::OTHER
|
1129
|
-
end
|
1130
|
-
|
1131
|
-
def fetch_gce_metadata(metadata_path)
|
1132
|
-
raise "Called fetch_gce_metadata with platform=#{@platform}" unless
|
1133
|
-
@platform == Platform::GCE
|
1134
|
-
# See https://cloud.google.com/compute/docs/metadata
|
1135
|
-
open('http://' + METADATA_SERVICE_ADDR + '/computeMetadata/v1/' +
|
1136
|
-
metadata_path, 'Metadata-Flavor' => 'Google', :proxy => false,
|
1137
|
-
&:read)
|
1138
|
-
end
|
1139
|
-
|
1140
|
-
# EC2 Metadata server returns everything in one call. Store it after the
|
1141
|
-
# first fetch to avoid making multiple calls.
|
1142
|
-
def ec2_metadata
|
1143
|
-
raise "Called ec2_metadata with platform=#{@platform}" unless
|
1144
|
-
@platform == Platform::EC2
|
1145
|
-
unless @ec2_metadata
|
1146
|
-
# See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html
|
1147
|
-
open('http://' + METADATA_SERVICE_ADDR +
|
1148
|
-
'/latest/dynamic/instance-identity/document', proxy: false) do |f|
|
1149
|
-
contents = f.read
|
1150
|
-
@ec2_metadata = JSON.parse(contents)
|
1151
|
-
end
|
1152
|
-
end
|
1153
|
-
|
1154
|
-
@ec2_metadata
|
1155
|
-
end
|
1156
|
-
|
1157
1089
|
# Set regexp patterns to parse tags and logs.
|
1158
1090
|
def set_regexp_patterns
|
1159
1091
|
@compiled_kubernetes_tag_regexp = Regexp.new(@kubernetes_tag_regexp) if
|
@@ -1163,187 +1095,14 @@ module Fluent
|
|
1163
1095
|
/^\s*(?<seconds>\d+)(?<decimal>\.\d+)?\s*s\s*$/
|
1164
1096
|
end
|
1165
1097
|
|
1166
|
-
# Set required variables like @project_id, @vm_id, @vm_name and @zone.
|
1167
|
-
def set_required_metadata_variables
|
1168
|
-
set_project_id
|
1169
|
-
set_vm_id
|
1170
|
-
set_vm_name
|
1171
|
-
set_location
|
1172
|
-
|
1173
|
-
# All metadata parameters must now be set.
|
1174
|
-
missing = []
|
1175
|
-
missing << 'project_id' unless @project_id
|
1176
|
-
if @platform != Platform::OTHER
|
1177
|
-
missing << 'zone' unless @zone
|
1178
|
-
missing << 'vm_id' unless @vm_id
|
1179
|
-
end
|
1180
|
-
return if missing.empty?
|
1181
|
-
raise Fluent::ConfigError,
|
1182
|
-
"Unable to obtain metadata parameters: #{missing.join(' ')}"
|
1183
|
-
end
|
1184
|
-
|
1185
|
-
# 1. Return the value if it is explicitly set in the config already.
|
1186
|
-
# 2. If not, try to retrieve it by calling metadata server directly.
|
1187
|
-
# 3. If still not set, try to obtain it from the credentials.
|
1188
|
-
def set_project_id
|
1189
|
-
@project_id ||= CredentialsInfo.project_id
|
1190
|
-
@project_id ||= fetch_gce_metadata('project/project-id') if
|
1191
|
-
@platform == Platform::GCE
|
1192
|
-
end
|
1193
|
-
|
1194
|
-
# 1. Return the value if it is explicitly set in the config already.
|
1195
|
-
# 2. If not, try to retrieve it by calling metadata servers directly.
|
1196
|
-
def set_vm_id
|
1197
|
-
@vm_id ||= fetch_gce_metadata('instance/id') if @platform == Platform::GCE
|
1198
|
-
@vm_id ||= ec2_metadata['instanceId'] if @platform == Platform::EC2
|
1199
|
-
rescue StandardError => e
|
1200
|
-
@log.error 'Failed to obtain vm_id: ', error: e
|
1201
|
-
end
|
1202
|
-
|
1203
|
-
# 1. Return the value if it is explicitly set in the config already.
|
1204
|
-
# 2. If not, try to retrieve it locally.
|
1205
|
-
def set_vm_name
|
1206
|
-
@vm_name ||= Socket.gethostname
|
1207
|
-
rescue StandardError => e
|
1208
|
-
@log.error 'Failed to obtain vm name: ', error: e
|
1209
|
-
end
|
1210
|
-
|
1211
|
-
# 1. Return the value if it is explicitly set in the config already.
|
1212
|
-
# 2. If not, try to retrieve it locally.
|
1213
|
-
def set_location
|
1214
|
-
# Response format: "projects/<number>/zones/<zone>"
|
1215
|
-
@zone ||= fetch_gce_metadata('instance/zone').rpartition('/')[2] if
|
1216
|
-
@platform == Platform::GCE
|
1217
|
-
aws_location_key = if @use_aws_availability_zone
|
1218
|
-
'availabilityZone'
|
1219
|
-
else
|
1220
|
-
'region'
|
1221
|
-
end
|
1222
|
-
@zone ||= 'aws:' + ec2_metadata[aws_location_key] if
|
1223
|
-
@platform == Platform::EC2 && ec2_metadata.key?(aws_location_key)
|
1224
|
-
rescue StandardError => e
|
1225
|
-
@log.error 'Failed to obtain location: ', error: e
|
1226
|
-
end
|
1227
|
-
|
1228
|
-
# Retrieve monitored resource via the legacy way.
|
1229
|
-
#
|
1230
|
-
# Note: This is just a failover plan if we fail to get metadata from
|
1231
|
-
# Metadata Agent. Thus it should be equivalent to what Metadata Agent
|
1232
|
-
# returns.
|
1233
|
-
def determine_agent_level_monitored_resource_via_legacy
|
1234
|
-
resource = Google::Apis::LoggingV2::MonitoredResource.new(
|
1235
|
-
labels: {})
|
1236
|
-
resource.type = determine_agent_level_monitored_resource_type
|
1237
|
-
resource.labels = determine_agent_level_monitored_resource_labels(
|
1238
|
-
resource.type)
|
1239
|
-
resource
|
1240
|
-
end
|
1241
|
-
|
1242
|
-
# Determine agent level monitored resource type.
|
1243
|
-
def determine_agent_level_monitored_resource_type
|
1244
|
-
case @platform
|
1245
|
-
when Platform::OTHER
|
1246
|
-
# Unknown platform will be defaulted to GCE instance.
|
1247
|
-
return COMPUTE_CONSTANTS[:resource_type]
|
1248
|
-
|
1249
|
-
when Platform::EC2
|
1250
|
-
return EC2_CONSTANTS[:resource_type]
|
1251
|
-
|
1252
|
-
when Platform::GCE
|
1253
|
-
# Resource types determined by @subservice_name config.
|
1254
|
-
return SUBSERVICE_MAP[@subservice_name] if @subservice_name
|
1255
|
-
|
1256
|
-
# Resource types determined by @detect_subservice config.
|
1257
|
-
if @detect_subservice
|
1258
|
-
begin
|
1259
|
-
attributes = fetch_gce_metadata('instance/attributes/').split.to_set
|
1260
|
-
SUBSERVICE_METADATA_ATTRIBUTES.each do |resource_type, expected|
|
1261
|
-
return resource_type if attributes.superset?(expected)
|
1262
|
-
end
|
1263
|
-
rescue StandardError => e
|
1264
|
-
@log.error 'Failed to detect subservice: ', error: e
|
1265
|
-
end
|
1266
|
-
end
|
1267
|
-
|
1268
|
-
# GCE instance.
|
1269
|
-
return COMPUTE_CONSTANTS[:resource_type]
|
1270
|
-
end
|
1271
|
-
end
|
1272
|
-
|
1273
|
-
# Determine agent level monitored resource labels based on the resource
|
1274
|
-
# type. Each resource type has its own labels that need to be filled in.
|
1275
|
-
def determine_agent_level_monitored_resource_labels(type)
|
1276
|
-
case type
|
1277
|
-
# GAE app.
|
1278
|
-
when APPENGINE_CONSTANTS[:resource_type]
|
1279
|
-
return {
|
1280
|
-
'module_id' =>
|
1281
|
-
fetch_gce_metadata('instance/attributes/gae_backend_name'),
|
1282
|
-
'version_id' =>
|
1283
|
-
fetch_gce_metadata('instance/attributes/gae_backend_version')
|
1284
|
-
}
|
1285
|
-
|
1286
|
-
# GCE.
|
1287
|
-
when COMPUTE_CONSTANTS[:resource_type]
|
1288
|
-
raise "Cannot construct a #{type} resource without vm_id and zone" \
|
1289
|
-
unless @vm_id && @zone
|
1290
|
-
return {
|
1291
|
-
'instance_id' => @vm_id,
|
1292
|
-
'zone' => @zone
|
1293
|
-
}
|
1294
|
-
|
1295
|
-
# GKE container.
|
1296
|
-
when GKE_CONSTANTS[:resource_type]
|
1297
|
-
raise "Cannot construct a #{type} resource without vm_id and zone" \
|
1298
|
-
unless @vm_id && @zone
|
1299
|
-
return {
|
1300
|
-
'instance_id' => @vm_id,
|
1301
|
-
'zone' => @zone,
|
1302
|
-
'cluster_name' =>
|
1303
|
-
fetch_gce_metadata('instance/attributes/cluster-name')
|
1304
|
-
}
|
1305
|
-
|
1306
|
-
# Cloud Dataproc.
|
1307
|
-
when DATAPROC_CONSTANTS[:resource_type]
|
1308
|
-
return {
|
1309
|
-
'cluster_uuid' =>
|
1310
|
-
fetch_gce_metadata('instance/attributes/dataproc-cluster-uuid'),
|
1311
|
-
'cluster_name' =>
|
1312
|
-
fetch_gce_metadata('instance/attributes/dataproc-cluster-name'),
|
1313
|
-
'region' =>
|
1314
|
-
fetch_gce_metadata('instance/attributes/dataproc-region')
|
1315
|
-
}
|
1316
|
-
|
1317
|
-
# EC2.
|
1318
|
-
when EC2_CONSTANTS[:resource_type]
|
1319
|
-
raise "Cannot construct a #{type} resource without vm_id and zone" \
|
1320
|
-
unless @vm_id && @zone
|
1321
|
-
labels = {
|
1322
|
-
'instance_id' => @vm_id,
|
1323
|
-
'region' => @zone
|
1324
|
-
}
|
1325
|
-
labels['aws_account'] = ec2_metadata['accountId'] if
|
1326
|
-
ec2_metadata.key?('accountId')
|
1327
|
-
return labels
|
1328
|
-
end
|
1329
|
-
|
1330
|
-
{}
|
1331
|
-
rescue StandardError => e
|
1332
|
-
if [Platform::GCE, Platform::EC2].include?(@platform)
|
1333
|
-
@log.error "Failed to set monitored resource labels for #{type}: ",
|
1334
|
-
error: e
|
1335
|
-
end
|
1336
|
-
{}
|
1337
|
-
end
|
1338
|
-
|
1339
1098
|
# Determine the common labels that should be added to all log entries
|
1340
1099
|
# processed by this logging agent.
|
1341
|
-
def determine_agent_level_common_labels
|
1100
|
+
def determine_agent_level_common_labels(resource)
|
1342
1101
|
labels = {}
|
1343
1102
|
# User can specify labels via config. We want to capture those as well.
|
1344
1103
|
labels.merge!(@labels) if @labels
|
1345
1104
|
|
1346
|
-
case
|
1105
|
+
case resource.type
|
1347
1106
|
# GAE, Cloud Dataflow, Cloud Dataproc and Cloud ML.
|
1348
1107
|
when APPENGINE_CONSTANTS[:resource_type],
|
1349
1108
|
DATAFLOW_CONSTANTS[:resource_type],
|
@@ -1548,44 +1307,6 @@ module Fluent
|
|
1548
1307
|
[resource, common_labels]
|
1549
1308
|
end
|
1550
1309
|
|
1551
|
-
# TODO: This functionality should eventually be available in another
|
1552
|
-
# library, but implement it ourselves for now.
|
1553
|
-
module CredentialsInfo
|
1554
|
-
# Determine the project ID from the credentials, if possible.
|
1555
|
-
# Returns the project ID (as a string) on success, or nil on failure.
|
1556
|
-
def self.project_id
|
1557
|
-
creds = Google::Auth.get_application_default(LOGGING_SCOPE)
|
1558
|
-
if creds.respond_to?(:project_id)
|
1559
|
-
return creds.project_id if creds.project_id
|
1560
|
-
end
|
1561
|
-
if creds.issuer
|
1562
|
-
id = extract_project_id(creds.issuer)
|
1563
|
-
return id unless id.nil?
|
1564
|
-
end
|
1565
|
-
if creds.client_id
|
1566
|
-
id = extract_project_id(creds.client_id)
|
1567
|
-
return id unless id.nil?
|
1568
|
-
end
|
1569
|
-
nil
|
1570
|
-
end
|
1571
|
-
|
1572
|
-
# Extracts the project id (either name or number) from str and returns
|
1573
|
-
# it (as a string) on success, or nil on failure.
|
1574
|
-
#
|
1575
|
-
# Recognizes IAM format (account@project-name.iam.gserviceaccount.com)
|
1576
|
-
# as well as the legacy format with a project number at the front of the
|
1577
|
-
# string, terminated by a dash (-) which is not part of the ID, i.e.:
|
1578
|
-
# <PROJECT_ID>-<OTHER_PARTS>.apps.googleusercontent.com
|
1579
|
-
def self.extract_project_id(str)
|
1580
|
-
[/^.*@(?<project_id>.+)\.iam\.gserviceaccount\.com/,
|
1581
|
-
/^(?<project_id>\d+)-/].each do |exp|
|
1582
|
-
match_data = exp.match(str)
|
1583
|
-
return match_data['project_id'] unless match_data.nil?
|
1584
|
-
end
|
1585
|
-
nil
|
1586
|
-
end
|
1587
|
-
end
|
1588
|
-
|
1589
1310
|
def time_or_nil(ts_secs, ts_nanos)
|
1590
1311
|
Time.at((Integer ts_secs), (Integer ts_nanos) / 1_000.0)
|
1591
1312
|
rescue ArgumentError, TypeError
|
@@ -2081,7 +1802,7 @@ module Fluent
|
|
2081
1802
|
Google::Apis::ClientOptions.default.application_version = PLUGIN_VERSION
|
2082
1803
|
@client = Google::Apis::LoggingV2::LoggingService.new
|
2083
1804
|
@client.authorization = Google::Auth.get_application_default(
|
2084
|
-
LOGGING_SCOPE)
|
1805
|
+
Common::LOGGING_SCOPE)
|
2085
1806
|
end
|
2086
1807
|
end
|
2087
1808
|
|
@@ -2310,10 +2031,10 @@ module Fluent
|
|
2310
2031
|
@k8s_cluster_location = nil if @k8s_cluster_location == ''
|
2311
2032
|
|
2312
2033
|
begin
|
2313
|
-
@k8s_cluster_name ||= fetch_gce_metadata(
|
2314
|
-
'instance/attributes/cluster-name')
|
2315
|
-
@k8s_cluster_location ||= fetch_gce_metadata(
|
2316
|
-
'instance/attributes/cluster-location')
|
2034
|
+
@k8s_cluster_name ||= @utils.fetch_gce_metadata(
|
2035
|
+
@platform, 'instance/attributes/cluster-name')
|
2036
|
+
@k8s_cluster_location ||= @utils.fetch_gce_metadata(
|
2037
|
+
@platform, 'instance/attributes/cluster-location')
|
2317
2038
|
rescue StandardError => e
|
2318
2039
|
@log.error 'Failed to retrieve k8s cluster name and location.', \
|
2319
2040
|
error: e
|