fluent-plugin-google-cloud 0.6.4.pre.3 → 0.6.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +159 -0
- data/fluent-plugin-google-cloud.gemspec +2 -2
- data/lib/fluent/plugin/monitoring.rb +55 -0
- data/lib/fluent/plugin/out_google_cloud.rb +377 -769
- data/test/plugin/base_test.rb +86 -282
- data/test/plugin/constants.rb +3 -118
- data/test/plugin/test_out_google_cloud.rb +48 -0
- data/test/plugin/test_out_google_cloud_grpc.rb +47 -0
- metadata +21 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ab431f0283f1c6bc88669159be387f21bb1e908f
|
4
|
+
data.tar.gz: 77d320fd8b6e4c5370542142fd296581fcd9186e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15fc471919366c52a612cb2b1941d0e9ffdf5e1d5c94ad6f3395c1860ca2f03edd888959a0ed267375bf80133458251bc1555c7416ef81f8be4f4aa83fd5c405
|
7
|
+
data.tar.gz: 4aed5e4e2686eb4776d32f3b44f61fc86849f4f02a64bd12399d0e0ac4a6cc1a8752489513fa5e482fdb7cac432d517ed5b7b26e71365e344e2dcf2ca8e96832
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,159 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
fluent-plugin-google-cloud (0.6.4)
|
5
|
+
fluentd (~> 0.10)
|
6
|
+
google-api-client (~> 0.9.0)
|
7
|
+
google-cloud-logging (~> 0.23.2)
|
8
|
+
googleapis-common-protos (~> 1.3)
|
9
|
+
googleauth (~> 0.4)
|
10
|
+
grpc (~> 1.0, < 1.3)
|
11
|
+
json (~> 1.8)
|
12
|
+
|
13
|
+
GEM
|
14
|
+
remote: https://rubygems.org/
|
15
|
+
specs:
|
16
|
+
addressable (2.5.1)
|
17
|
+
public_suffix (~> 2.0, >= 2.0.2)
|
18
|
+
ast (2.3.0)
|
19
|
+
astrolabe (1.3.1)
|
20
|
+
parser (~> 2.2)
|
21
|
+
cool.io (1.5.0)
|
22
|
+
crack (0.4.3)
|
23
|
+
safe_yaml (~> 1.0.0)
|
24
|
+
faraday (0.12.1)
|
25
|
+
multipart-post (>= 1.2, < 3)
|
26
|
+
fluentd (0.14.18)
|
27
|
+
cool.io (>= 1.4.5, < 2.0.0)
|
28
|
+
http_parser.rb (>= 0.5.1, < 0.7.0)
|
29
|
+
msgpack (>= 0.7.0, < 2.0.0)
|
30
|
+
serverengine (>= 2.0.4, < 3.0.0)
|
31
|
+
sigdump (~> 0.2.2)
|
32
|
+
strptime (~> 0.1.7)
|
33
|
+
tzinfo (~> 1.0)
|
34
|
+
tzinfo-data (~> 1.0)
|
35
|
+
yajl-ruby (~> 1.0)
|
36
|
+
google-api-client (0.9.28)
|
37
|
+
addressable (~> 2.3)
|
38
|
+
googleauth (~> 0.5)
|
39
|
+
httpclient (~> 2.7)
|
40
|
+
hurley (~> 0.1)
|
41
|
+
memoist (~> 0.11)
|
42
|
+
mime-types (>= 1.6)
|
43
|
+
representable (~> 2.3.0)
|
44
|
+
retriable (~> 2.0)
|
45
|
+
google-cloud-core (0.21.1)
|
46
|
+
googleauth (~> 0.5.1)
|
47
|
+
google-cloud-logging (0.23.2)
|
48
|
+
google-cloud-core (~> 0.21.1)
|
49
|
+
google-gax (~> 0.6.0)
|
50
|
+
google-protobuf (~> 3.0)
|
51
|
+
googleapis-common-protos (~> 1.3)
|
52
|
+
grpc (~> 1.0)
|
53
|
+
orderedhash (= 0.0.6)
|
54
|
+
stackdriver-core (~> 0.21.0)
|
55
|
+
google-gax (0.6.0)
|
56
|
+
googleapis-common-protos (~> 1.3.1)
|
57
|
+
googleauth (~> 0.5.1)
|
58
|
+
grpc (~> 1.0)
|
59
|
+
rly (~> 0.2.3)
|
60
|
+
google-protobuf (3.3.0)
|
61
|
+
googleapis-common-protos (1.3.5)
|
62
|
+
google-protobuf (~> 3.2)
|
63
|
+
grpc (~> 1.0)
|
64
|
+
googleauth (0.5.1)
|
65
|
+
faraday (~> 0.9)
|
66
|
+
jwt (~> 1.4)
|
67
|
+
logging (~> 2.0)
|
68
|
+
memoist (~> 0.12)
|
69
|
+
multi_json (~> 1.11)
|
70
|
+
os (~> 0.9)
|
71
|
+
signet (~> 0.7)
|
72
|
+
grpc (1.2.5)
|
73
|
+
google-protobuf (~> 3.1)
|
74
|
+
googleauth (~> 0.5.1)
|
75
|
+
hashdiff (0.3.4)
|
76
|
+
http_parser.rb (0.6.0)
|
77
|
+
httpclient (2.8.3)
|
78
|
+
hurley (0.2)
|
79
|
+
json (1.8.6)
|
80
|
+
jwt (1.5.6)
|
81
|
+
little-plugger (1.1.4)
|
82
|
+
logging (2.2.2)
|
83
|
+
little-plugger (~> 1.1)
|
84
|
+
multi_json (~> 1.10)
|
85
|
+
memoist (0.16.0)
|
86
|
+
metaclass (0.0.4)
|
87
|
+
mime-types (3.1)
|
88
|
+
mime-types-data (~> 3.2015)
|
89
|
+
mime-types-data (3.2016.0521)
|
90
|
+
mocha (1.2.1)
|
91
|
+
metaclass (~> 0.0.1)
|
92
|
+
msgpack (1.1.0)
|
93
|
+
multi_json (1.12.1)
|
94
|
+
multipart-post (2.0.0)
|
95
|
+
orderedhash (0.0.6)
|
96
|
+
os (0.9.6)
|
97
|
+
parser (2.4.0.0)
|
98
|
+
ast (~> 2.2)
|
99
|
+
power_assert (1.0.2)
|
100
|
+
powerpack (0.1.1)
|
101
|
+
prometheus-client (0.7.1)
|
102
|
+
quantile (~> 0.2.0)
|
103
|
+
public_suffix (2.0.5)
|
104
|
+
quantile (0.2.0)
|
105
|
+
rainbow (2.2.2)
|
106
|
+
rake
|
107
|
+
rake (10.5.0)
|
108
|
+
representable (2.3.0)
|
109
|
+
uber (~> 0.0.7)
|
110
|
+
retriable (2.1.0)
|
111
|
+
rly (0.2.3)
|
112
|
+
rubocop (0.35.1)
|
113
|
+
astrolabe (~> 1.3)
|
114
|
+
parser (>= 2.2.3.0, < 3.0)
|
115
|
+
powerpack (~> 0.1)
|
116
|
+
rainbow (>= 1.99.1, < 3.0)
|
117
|
+
ruby-progressbar (~> 1.7)
|
118
|
+
tins (<= 1.6.0)
|
119
|
+
ruby-progressbar (1.8.1)
|
120
|
+
safe_yaml (1.0.4)
|
121
|
+
serverengine (2.0.5)
|
122
|
+
sigdump (~> 0.2.2)
|
123
|
+
sigdump (0.2.4)
|
124
|
+
signet (0.7.3)
|
125
|
+
addressable (~> 2.3)
|
126
|
+
faraday (~> 0.9)
|
127
|
+
jwt (~> 1.5)
|
128
|
+
multi_json (~> 1.10)
|
129
|
+
stackdriver-core (0.21.0)
|
130
|
+
strptime (0.1.9)
|
131
|
+
test-unit (3.2.5)
|
132
|
+
power_assert
|
133
|
+
thread_safe (0.3.6)
|
134
|
+
tins (1.6.0)
|
135
|
+
tzinfo (1.2.3)
|
136
|
+
thread_safe (~> 0.1)
|
137
|
+
tzinfo-data (1.2017.2)
|
138
|
+
tzinfo (>= 1.0.0)
|
139
|
+
uber (0.0.15)
|
140
|
+
webmock (1.24.6)
|
141
|
+
addressable (>= 2.3.6)
|
142
|
+
crack (>= 0.3.2)
|
143
|
+
hashdiff
|
144
|
+
yajl-ruby (1.3.0)
|
145
|
+
|
146
|
+
PLATFORMS
|
147
|
+
ruby
|
148
|
+
|
149
|
+
DEPENDENCIES
|
150
|
+
fluent-plugin-google-cloud!
|
151
|
+
mocha (~> 1.1)
|
152
|
+
prometheus-client (~> 0.7.1)
|
153
|
+
rake (~> 10.3)
|
154
|
+
rubocop (~> 0.35.0)
|
155
|
+
test-unit (~> 3.0)
|
156
|
+
webmock (~> 1.17)
|
157
|
+
|
158
|
+
BUNDLED WITH
|
159
|
+
1.15.0
|
@@ -10,7 +10,7 @@ eos
|
|
10
10
|
gem.homepage = \
|
11
11
|
'https://github.com/GoogleCloudPlatform/fluent-plugin-google-cloud'
|
12
12
|
gem.license = 'Apache-2.0'
|
13
|
-
gem.version = '0.6.4
|
13
|
+
gem.version = '0.6.4'
|
14
14
|
gem.authors = ['Todd Derr', 'Alex Robinson']
|
15
15
|
gem.email = ['salty@google.com']
|
16
16
|
gem.required_ruby_version = Gem::Requirement.new('>= 2.0')
|
@@ -19,7 +19,6 @@ eos
|
|
19
19
|
gem.test_files = gem.files.grep(/^(test)/)
|
20
20
|
gem.require_paths = ['lib']
|
21
21
|
|
22
|
-
gem.add_runtime_dependency 'excon', '~> 0.57.1'
|
23
22
|
gem.add_runtime_dependency 'fluentd', '~> 0.10'
|
24
23
|
gem.add_runtime_dependency 'googleapis-common-protos', '~> 1.3'
|
25
24
|
gem.add_runtime_dependency 'google-api-client', '~> 0.9.0'
|
@@ -33,4 +32,5 @@ eos
|
|
33
32
|
gem.add_development_dependency 'rubocop', '~> 0.35.0'
|
34
33
|
gem.add_development_dependency 'webmock', '~> 1.17'
|
35
34
|
gem.add_development_dependency 'test-unit', '~> 3.0'
|
35
|
+
gem.add_development_dependency 'prometheus-client', '~> 0.7.1'
|
36
36
|
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# Copyright 2017 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module Monitoring
|
16
|
+
# Base class for the monitoring registry.
|
17
|
+
class BaseMonitoringRegistry
|
18
|
+
def counter(_name, _desc)
|
19
|
+
_undefined
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Prometheus implementation of the monitoring registry, that uses the default
|
24
|
+
# registry in the official Prometheus client library.
|
25
|
+
class PrometheusMonitoringRegistry < BaseMonitoringRegistry
|
26
|
+
def self.name
|
27
|
+
'prometheus'
|
28
|
+
end
|
29
|
+
|
30
|
+
def initialize
|
31
|
+
require 'prometheus/client'
|
32
|
+
@registry = Prometheus::Client.registry
|
33
|
+
end
|
34
|
+
|
35
|
+
# Exception-driven behavior to avoid synchronization errors.
|
36
|
+
def counter(name, desc)
|
37
|
+
return @registry.counter(name, desc)
|
38
|
+
rescue Prometheus::Client::Registry::AlreadyRegisteredError
|
39
|
+
return @registry.get(name)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Factory that is used to create a monitoring registry based on
|
44
|
+
# the monitoring solution name.
|
45
|
+
class MonitoringRegistryFactory
|
46
|
+
@known_registry_types = {
|
47
|
+
PrometheusMonitoringRegistry.name =>
|
48
|
+
PrometheusMonitoringRegistry
|
49
|
+
}
|
50
|
+
|
51
|
+
def self.create(name)
|
52
|
+
(@known_registry_types[name] || BaseMonitoringRegistry).new
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -11,11 +11,9 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
-
require 'excon'
|
15
14
|
require 'grpc'
|
16
15
|
require 'json'
|
17
16
|
require 'open-uri'
|
18
|
-
require 'rubygems'
|
19
17
|
require 'socket'
|
20
18
|
require 'time'
|
21
19
|
require 'yaml'
|
@@ -26,6 +24,8 @@ require 'google/logging/v2/logging_services_pb'
|
|
26
24
|
require 'google/logging/v2/log_entry_pb'
|
27
25
|
require 'googleauth'
|
28
26
|
|
27
|
+
require_relative 'monitoring'
|
28
|
+
|
29
29
|
module Google
|
30
30
|
module Protobuf
|
31
31
|
# Alias the has_key? method to have the same interface as a regular map.
|
@@ -38,10 +38,8 @@ end
|
|
38
38
|
module Fluent
|
39
39
|
# fluentd output plugin for the Stackdriver Logging API
|
40
40
|
class GoogleCloudOutput < BufferedOutput
|
41
|
-
# Constants.
|
41
|
+
# Constants for service names and resource types.
|
42
42
|
module Constants
|
43
|
-
# Service names and resource types.
|
44
|
-
|
45
43
|
APPENGINE_CONSTANTS = {
|
46
44
|
service: 'appengine.googleapis.com',
|
47
45
|
resource_type: 'gae_app'
|
@@ -58,10 +56,6 @@ module Fluent
|
|
58
56
|
service: 'container.googleapis.com',
|
59
57
|
resource_type: 'container'
|
60
58
|
}
|
61
|
-
DOCKER_CONSTANTS = {
|
62
|
-
service: 'dockercontainer.googleapis.com',
|
63
|
-
resource_type: 'docker_container'
|
64
|
-
}
|
65
59
|
DATAFLOW_CONSTANTS = {
|
66
60
|
service: 'dataflow.googleapis.com',
|
67
61
|
resource_type: 'dataflow_step'
|
@@ -78,20 +72,6 @@ module Fluent
|
|
78
72
|
service: 'ml.googleapis.com',
|
79
73
|
resource_type: 'ml_job'
|
80
74
|
}
|
81
|
-
|
82
|
-
# Metadata Agent support.
|
83
|
-
|
84
|
-
# Use empty string as request path when locally-unique key of monitored
|
85
|
-
# resource can be implicitly inferred by Metadata Agent.
|
86
|
-
IMPLICIT_MONITORED_RESOURCE_UNIQUE_KEY = ''
|
87
|
-
|
88
|
-
# The label name of locally unique id in the json payload. When a record
|
89
|
-
# has this field in the payload, we will use the value to retrieve
|
90
|
-
# monitored resource from Stackdriver Metadata agent.
|
91
|
-
LOCALLY_UNIQUE_ID_LABEL_NAME = 'logging.googleapis.com/locally_unique_id'
|
92
|
-
|
93
|
-
# Docker container support.
|
94
|
-
DEFAULT_DOCKER_API_SOCKET_PATH = '/var/run/docker.sock'
|
95
75
|
end
|
96
76
|
|
97
77
|
include self::Constants
|
@@ -99,7 +79,7 @@ module Fluent
|
|
99
79
|
Fluent::Plugin.register_output('google_cloud', self)
|
100
80
|
|
101
81
|
PLUGIN_NAME = 'Fluentd Google Cloud Logging plugin'
|
102
|
-
PLUGIN_VERSION = '0.6.
|
82
|
+
PLUGIN_VERSION = '0.6.3'
|
103
83
|
|
104
84
|
# Name of the the Google cloud logging write scope.
|
105
85
|
LOGGING_SCOPE = 'https://www.googleapis.com/auth/logging.write'
|
@@ -146,7 +126,7 @@ module Fluent
|
|
146
126
|
config_param :require_valid_tags, :bool, :default => false
|
147
127
|
|
148
128
|
# The regular expression to use on Kubernetes logs to extract some basic
|
149
|
-
# information about the log source. The
|
129
|
+
# information about the log source. The regex must contain capture groups
|
150
130
|
# for pod_name, namespace_name, and container_name.
|
151
131
|
config_param :kubernetes_tag_regexp, :string, :default =>
|
152
132
|
'\.(?<pod_name>[^_]+)_(?<namespace_name>[^_]+)_(?<container_name>.+)$'
|
@@ -208,17 +188,18 @@ module Fluent
|
|
208
188
|
:default => nil,
|
209
189
|
:secret => true
|
210
190
|
|
211
|
-
# Whether to
|
212
|
-
|
213
|
-
|
214
|
-
|
191
|
+
# Whether to collect metrics about the plugin usage. The mechanism for
|
192
|
+
# collecting and exposing metrics is controlled by the monitoring_type
|
193
|
+
# parameter.
|
194
|
+
config_param :enable_monitoring, :bool, :default => false
|
215
195
|
|
216
|
-
#
|
217
|
-
#
|
218
|
-
|
219
|
-
#
|
220
|
-
|
221
|
-
|
196
|
+
# What system to use when collecting metrics. Possible values are:
|
197
|
+
# - 'prometheus', in this case default registry in the Prometheus
|
198
|
+
# client library is used, without actually exposing the endpoint
|
199
|
+
# to serve metrics in the Prometheus format.
|
200
|
+
# - any other value will result in the absence of metrics.
|
201
|
+
config_param :monitoring_type, :string,
|
202
|
+
:default => Monitoring::PrometheusMonitoringRegistry.name
|
222
203
|
|
223
204
|
# rubocop:enable Style/HashSyntax
|
224
205
|
|
@@ -229,6 +210,9 @@ module Fluent
|
|
229
210
|
attr_reader :project_id
|
230
211
|
attr_reader :zone
|
231
212
|
attr_reader :vm_id
|
213
|
+
attr_reader :running_on_managed_vm
|
214
|
+
attr_reader :gae_backend_name
|
215
|
+
attr_reader :gae_backend_version
|
232
216
|
attr_reader :resource
|
233
217
|
attr_reader :common_labels
|
234
218
|
|
@@ -238,38 +222,28 @@ module Fluent
|
|
238
222
|
@log = $log # rubocop:disable Style/GlobalVars
|
239
223
|
end
|
240
224
|
|
241
|
-
# Set up regex patterns used to parse tags and logs.
|
242
|
-
def setup_regex_patterns
|
243
|
-
@compiled_kubernetes_tag_regexp = nil
|
244
|
-
if @kubernetes_tag_regexp
|
245
|
-
@compiled_kubernetes_tag_regexp = Regexp.new(@kubernetes_tag_regexp)
|
246
|
-
end
|
247
|
-
|
248
|
-
@cloudfunctions_tag_regexp =
|
249
|
-
/\.(?<encoded_function_name>.+)\.\d+-[^-]+_default_worker$/
|
250
|
-
@cloudfunctions_log_regexp = /^
|
251
|
-
(?:\[(?<severity>.)\])?
|
252
|
-
\[(?<timestamp>.{24})\]
|
253
|
-
(?:\[(?<execution_id>[^\]]+)\])?
|
254
|
-
[ ](?<text>.*)$/x
|
255
|
-
|
256
|
-
# Docker container tag format:
|
257
|
-
# "container.<container_id>.<container_name>".
|
258
|
-
@dockercontainer_tag_regexp =
|
259
|
-
/^container\.(?<container_id>[a-zA-Z0-9]+)\.
|
260
|
-
(?<container_name>[a-zA-Z0-9_.-]+)$/x
|
261
|
-
# Docker container with application tag format:
|
262
|
-
# "application-container.<container_name>.<additional_tag>".
|
263
|
-
@dockercontainer_tag_with_application_regexp =
|
264
|
-
/^application-container\.(?<container_name>[a-zA-Z0-9_.-]+)\.
|
265
|
-
(?<additional_tag>.+)$/x
|
266
|
-
|
267
|
-
@http_latency_regexp = /^\s*(?<seconds>\d+)(?<decimal>\.\d+)?\s*s\s*$/
|
268
|
-
end
|
269
|
-
|
270
225
|
def configure(conf)
|
271
226
|
super
|
272
227
|
|
228
|
+
# If monitoring is enabled, register metrics in the default registry
|
229
|
+
# and store metric objects for future use.
|
230
|
+
if @enable_monitoring
|
231
|
+
registry = Monitoring::MonitoringRegistryFactory.create @monitoring_type
|
232
|
+
@successful_requests_count = registry.counter(
|
233
|
+
:stackdriver_successful_requests_count,
|
234
|
+
'A number of successful requests to the Stackdriver Logging API')
|
235
|
+
@failed_requests_count = registry.counter(
|
236
|
+
:stackdriver_failed_requests_count,
|
237
|
+
'A number of failed requests to the Stackdriver Logging API,'\
|
238
|
+
' broken down by the error code')
|
239
|
+
@ingested_entries_count = registry.counter(
|
240
|
+
:stackdriver_ingested_entries_count,
|
241
|
+
'A number of log entries ingested by Stackdriver Logging')
|
242
|
+
@dropped_entries_count = registry.counter(
|
243
|
+
:stackdriver_dropped_entries_count,
|
244
|
+
'A number of log entries dropped by the Stackdriver output plugin')
|
245
|
+
end
|
246
|
+
|
273
247
|
# Alert on old authentication configuration.
|
274
248
|
unless @auth_method.nil? && @private_key_email.nil? &&
|
275
249
|
@private_key_path.nil? && @private_key_passphrase.nil?
|
@@ -285,57 +259,160 @@ module Fluent
|
|
285
259
|
extra.join(' ')
|
286
260
|
end
|
287
261
|
|
288
|
-
|
262
|
+
# TODO: Send instance tags as labels as well?
|
263
|
+
@common_labels = {}
|
264
|
+
@common_labels.merge!(@labels) if @labels
|
265
|
+
|
266
|
+
# TODO: Construct Google::Api::MonitoredResource when @use_grpc is
|
267
|
+
# true after the protobuf map corruption issue is fixed.
|
268
|
+
@resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
|
269
|
+
labels: {})
|
270
|
+
|
271
|
+
@compiled_kubernetes_tag_regexp = nil
|
272
|
+
if @kubernetes_tag_regexp
|
273
|
+
@compiled_kubernetes_tag_regexp = Regexp.new(@kubernetes_tag_regexp)
|
274
|
+
end
|
289
275
|
|
276
|
+
@cloudfunctions_tag_regexp =
|
277
|
+
/\.(?<encoded_function_name>.+)\.\d+-[^-]+_default_worker$/
|
278
|
+
@cloudfunctions_log_regexp = /^
|
279
|
+
(?:\[(?<severity>.)\])?
|
280
|
+
\[(?<timestamp>.{24})\]
|
281
|
+
(?:\[(?<execution_id>[^\]]+)\])?
|
282
|
+
[ ](?<text>.*)$/x
|
283
|
+
|
284
|
+
@http_latency_regexp = /^\s*(?<seconds>\d+)(?<decimal>\.\d+)?\s*s\s*$/
|
285
|
+
|
286
|
+
# set attributes from metadata (unless overriden by static config)
|
287
|
+
@vm_name = Socket.gethostname if @vm_name.nil?
|
290
288
|
@platform = detect_platform
|
289
|
+
case @platform
|
290
|
+
when Platform::GCE
|
291
|
+
if @project_id.nil?
|
292
|
+
@project_id = fetch_gce_metadata('project/project-id')
|
293
|
+
end
|
294
|
+
if @zone.nil?
|
295
|
+
# this returns "projects/<number>/zones/<zone>"; we only want
|
296
|
+
# the part after the final slash.
|
297
|
+
fully_qualified_zone = fetch_gce_metadata('instance/zone')
|
298
|
+
@zone = fully_qualified_zone.rpartition('/')[2]
|
299
|
+
end
|
300
|
+
@vm_id = fetch_gce_metadata('instance/id') if @vm_id.nil?
|
301
|
+
when Platform::EC2
|
302
|
+
metadata = fetch_ec2_metadata
|
303
|
+
if @zone.nil? && metadata.key?('availabilityZone')
|
304
|
+
@zone = 'aws:' + metadata['availabilityZone']
|
305
|
+
end
|
306
|
+
if @vm_id.nil? && metadata.key?('instanceId')
|
307
|
+
@vm_id = metadata['instanceId']
|
308
|
+
end
|
309
|
+
if metadata.key?('accountId')
|
310
|
+
@resource.labels['aws_account'] = metadata['accountId']
|
311
|
+
end
|
312
|
+
when Platform::OTHER
|
313
|
+
# do nothing
|
314
|
+
else
|
315
|
+
fail Fluent::ConfigError, 'Unknown platform ' + @platform
|
316
|
+
end
|
291
317
|
|
292
|
-
#
|
293
|
-
#
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
# explicitly send the key.
|
299
|
-
@resource = call_metadata_agent_for_monitored_resource(
|
300
|
-
IMPLICIT_MONITORED_RESOURCE_UNIQUE_KEY)
|
318
|
+
# If we still don't have a project ID, try to obtain it from the
|
319
|
+
# credentials.
|
320
|
+
if @project_id.nil?
|
321
|
+
@project_id = CredentialsInfo.project_id
|
322
|
+
@log.info 'Set Project ID from credentials: ', @project_id unless
|
323
|
+
@project_id.nil?
|
301
324
|
end
|
302
325
|
|
303
|
-
#
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
# Fail over to retrieve monitored resource via the legacy path if we fail
|
313
|
-
# to get it from Metadata Agent.
|
314
|
-
@resource ||= determine_agent_level_monitored_resource_via_legacy
|
315
|
-
|
316
|
-
# Set variables specific to CLoud Functions. This has to be called after
|
317
|
-
# we have determined the resource type. The purpose is to avoid repeated
|
318
|
-
# calls to metadata server.
|
319
|
-
@running_cloudfunctions = false
|
320
|
-
# We only support Cloud Functions logs for GKE right now.
|
321
|
-
if @resource.type == CONTAINER_CONSTANTS[:resource_type] &&
|
322
|
-
fetch_gce_metadata('instance/attributes/').split.include?('gcf_region')
|
323
|
-
# We are not setting resource type as Cloud Functions here because
|
324
|
-
# whether a log entry is truly coming from a Cloud Functions function
|
325
|
-
# depends on the log tag. Only when @running_cloudfunctions is true will
|
326
|
-
# we try to match log tags against Cloud Functions tag regexp when
|
327
|
-
# processing log entries.
|
328
|
-
@running_cloudfunctions = true
|
329
|
-
# Fetch this info and store it to avoid recurring metadata server calls.
|
330
|
-
@gcf_region = fetch_gce_metadata('instance/attributes/gcf_region')
|
326
|
+
# all metadata parameters must now be set
|
327
|
+
unless @project_id && @zone && @vm_id
|
328
|
+
missing = []
|
329
|
+
missing << 'project_id' unless @project_id
|
330
|
+
missing << 'zone' unless @zone
|
331
|
+
missing << 'vm_id' unless @vm_id
|
332
|
+
fail Fluent::ConfigError, 'Unable to obtain metadata parameters: ' +
|
333
|
+
missing.join(' ')
|
331
334
|
end
|
332
335
|
|
333
|
-
#
|
334
|
-
|
335
|
-
@common_labels = determine_agent_level_common_labels
|
336
|
+
# Default this to false; it is only overwritten if we detect Managed VM.
|
337
|
+
@running_on_managed_vm = false
|
336
338
|
|
337
|
-
#
|
338
|
-
#
|
339
|
+
# Default this to false; it is only overwritten if we detect Cloud
|
340
|
+
# Functions.
|
341
|
+
@running_cloudfunctions = false
|
342
|
+
|
343
|
+
# Set up the MonitoredResource, labels, etc. based on the config.
|
344
|
+
case @platform
|
345
|
+
when Platform::GCE
|
346
|
+
@resource.type = COMPUTE_CONSTANTS[:resource_type]
|
347
|
+
# TODO: introduce a new MonitoredResource-centric configuration and
|
348
|
+
# deprecate subservice-name; for now, translate known uses.
|
349
|
+
if @subservice_name
|
350
|
+
# TODO: what should we do if we encounter an unknown value?
|
351
|
+
if @subservice_name == DATAFLOW_CONSTANTS[:service]
|
352
|
+
@resource.type = DATAFLOW_CONSTANTS[:resource_type]
|
353
|
+
elsif @subservice_name == ML_CONSTANTS[:service]
|
354
|
+
@resource.type = ML_CONSTANTS[:resource_type]
|
355
|
+
end
|
356
|
+
elsif @detect_subservice
|
357
|
+
# Check for specialized GCE environments.
|
358
|
+
# TODO: Add config options for these to allow for running outside GCE?
|
359
|
+
attributes = fetch_gce_metadata('instance/attributes/').split
|
360
|
+
# Do nothing, just don't populate other service's labels.
|
361
|
+
if attributes.include?('gae_backend_name') &&
|
362
|
+
attributes.include?('gae_backend_version')
|
363
|
+
# Managed VM
|
364
|
+
@running_on_managed_vm = true
|
365
|
+
@gae_backend_name =
|
366
|
+
fetch_gce_metadata('instance/attributes/gae_backend_name')
|
367
|
+
@gae_backend_version =
|
368
|
+
fetch_gce_metadata('instance/attributes/gae_backend_version')
|
369
|
+
@resource.type = APPENGINE_CONSTANTS[:resource_type]
|
370
|
+
@resource.labels['module_id'] = @gae_backend_name
|
371
|
+
@resource.labels['version_id'] = @gae_backend_version
|
372
|
+
elsif attributes.include?('kube-env')
|
373
|
+
# Kubernetes/Container Engine
|
374
|
+
@resource.type = CONTAINER_CONSTANTS[:resource_type]
|
375
|
+
@raw_kube_env = fetch_gce_metadata('instance/attributes/kube-env')
|
376
|
+
@kube_env = YAML.load(@raw_kube_env)
|
377
|
+
@resource.labels['cluster_name'] =
|
378
|
+
cluster_name_from_kube_env(@kube_env)
|
379
|
+
detect_cloudfunctions(attributes)
|
380
|
+
elsif attributes.include?('dataproc-cluster-uuid') &&
|
381
|
+
attributes.include?('dataproc-cluster-name')
|
382
|
+
# Dataproc
|
383
|
+
@resource.type = DATAPROC_CONSTANTS[:resource_type]
|
384
|
+
@resource.labels['cluster_uuid'] =
|
385
|
+
fetch_gce_metadata('instance/attributes/dataproc-cluster-uuid')
|
386
|
+
@resource.labels['cluster_name'] =
|
387
|
+
fetch_gce_metadata('instance/attributes/dataproc-cluster-name')
|
388
|
+
@resource.labels['region'] =
|
389
|
+
fetch_gce_metadata('instance/attributes/dataproc-region')
|
390
|
+
end
|
391
|
+
end
|
392
|
+
# Some services have the GCE instance_id and zone as MonitoredResource
|
393
|
+
# labels; for other services we send them as entry labels.
|
394
|
+
if @resource.type == COMPUTE_CONSTANTS[:resource_type] ||
|
395
|
+
@resource.type == CONTAINER_CONSTANTS[:resource_type]
|
396
|
+
@resource.labels['instance_id'] = @vm_id
|
397
|
+
@resource.labels['zone'] = @zone
|
398
|
+
else
|
399
|
+
common_labels["#{COMPUTE_CONSTANTS[:service]}/resource_id"] = @vm_id
|
400
|
+
common_labels["#{COMPUTE_CONSTANTS[:service]}/zone"] = @zone
|
401
|
+
end
|
402
|
+
common_labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
|
403
|
+
when Platform::EC2
|
404
|
+
@resource.type = EC2_CONSTANTS[:resource_type]
|
405
|
+
@resource.labels['instance_id'] = @vm_id
|
406
|
+
@resource.labels['region'] = @zone
|
407
|
+
# the aws_account label is populated above.
|
408
|
+
common_labels["#{EC2_CONSTANTS[:service]}/resource_name"] = @vm_name
|
409
|
+
when Platform::OTHER
|
410
|
+
# Use GCE as the default environment.
|
411
|
+
@resource.type = COMPUTE_CONSTANTS[:resource_type]
|
412
|
+
@resource.labels['instance_id'] = @vm_id
|
413
|
+
@resource.labels['zone'] = @zone
|
414
|
+
common_labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
|
415
|
+
end
|
339
416
|
@resource.labels.merge!(
|
340
417
|
extract_resource_labels(@resource.type, common_labels))
|
341
418
|
|
@@ -347,7 +424,7 @@ module Fluent
|
|
347
424
|
|
348
425
|
# Log an informational message containing the Logs viewer URL
|
349
426
|
@log.info 'Logs viewer address: https://console.cloud.google.com/logs/',
|
350
|
-
"viewer?project=#{@project_id}&resource=#{@
|
427
|
+
"viewer?project=#{@project_id}&resource=#{@resource_type}/",
|
351
428
|
"instance_id/#{@vm_id}"
|
352
429
|
end
|
353
430
|
|
@@ -362,15 +439,135 @@ module Fluent
|
|
362
439
|
super
|
363
440
|
end
|
364
441
|
|
442
|
+
def format(tag, time, record)
|
443
|
+
[tag, time, record].to_msgpack
|
444
|
+
end
|
445
|
+
|
446
|
+
# Given a tag, returns the corresponding valid tag if possible, or nil if
|
447
|
+
# the tag should be rejected. If 'require_valid_tags' is false, non-string
|
448
|
+
# tags are converted to strings, and invalid characters are sanitized;
|
449
|
+
# otherwise such tags are rejected.
|
450
|
+
def sanitize_tag(tag)
|
451
|
+
if @require_valid_tags &&
|
452
|
+
(!tag.is_a?(String) || tag == '' || convert_to_utf8(tag) != tag)
|
453
|
+
return nil
|
454
|
+
end
|
455
|
+
tag = convert_to_utf8(tag.to_s)
|
456
|
+
tag = '_' if tag == ''
|
457
|
+
tag
|
458
|
+
end
|
459
|
+
|
460
|
+
# Compute the monitored resource and common labels shared by a collection of
|
461
|
+
# entries.
|
462
|
+
def compute_group_resource_and_labels(tag)
|
463
|
+
# Note that we assume that labels added to group_common_labels below are
|
464
|
+
# not 'service' labels (i.e. we do not call extract_resource_labels
|
465
|
+
# again).
|
466
|
+
group_resource = @resource.dup
|
467
|
+
group_common_labels = @common_labels.dup
|
468
|
+
|
469
|
+
if @running_cloudfunctions
|
470
|
+
# If the current group of entries is coming from a Cloud Functions
|
471
|
+
# function, the function name can be extracted from the tag.
|
472
|
+
match_data = @cloudfunctions_tag_regexp.match(tag)
|
473
|
+
if match_data
|
474
|
+
# Resource type is set to Cloud Functions only for logs actually
|
475
|
+
# coming from a function, otherwise we leave it as Container.
|
476
|
+
group_resource.type = CLOUDFUNCTIONS_CONSTANTS[:resource_type]
|
477
|
+
group_resource.labels['region'] = @gcf_region
|
478
|
+
group_resource.labels['function_name'] =
|
479
|
+
decode_cloudfunctions_function_name(
|
480
|
+
match_data['encoded_function_name'])
|
481
|
+
# Move GKE container labels from the MonitoredResource to the
|
482
|
+
# LogEntry.
|
483
|
+
instance_id = group_resource.labels.delete('instance_id')
|
484
|
+
group_common_labels["#{CONTAINER_CONSTANTS[:service]}/cluster_name"] =
|
485
|
+
group_resource.labels.delete('cluster_name')
|
486
|
+
group_common_labels["#{CONTAINER_CONSTANTS[:service]}/instance_id"] =
|
487
|
+
instance_id
|
488
|
+
group_common_labels["#{COMPUTE_CONSTANTS[:service]}/resource_id"] =
|
489
|
+
instance_id
|
490
|
+
group_common_labels["#{COMPUTE_CONSTANTS[:service]}/zone"] =
|
491
|
+
group_resource.labels.delete('zone')
|
492
|
+
end
|
493
|
+
end
|
494
|
+
if group_resource.type == CONTAINER_CONSTANTS[:resource_type] &&
|
495
|
+
@compiled_kubernetes_tag_regexp
|
496
|
+
# Container logs in Kubernetes are tagged based on where they came
|
497
|
+
# from, so we can extract useful metadata from the tag.
|
498
|
+
# Do this here to avoid having to repeat it for each record.
|
499
|
+
match_data = @compiled_kubernetes_tag_regexp.match(tag)
|
500
|
+
if match_data
|
501
|
+
group_resource.labels['container_name'] = match_data['container_name']
|
502
|
+
group_resource.labels['namespace_id'] = match_data['namespace_name']
|
503
|
+
group_resource.labels['pod_id'] = match_data['pod_name']
|
504
|
+
%w(namespace_name pod_name).each do |field|
|
505
|
+
group_common_labels["#{CONTAINER_CONSTANTS[:service]}/#{field}"] =
|
506
|
+
match_data[field]
|
507
|
+
end
|
508
|
+
end
|
509
|
+
end
|
510
|
+
|
511
|
+
# Freeze the per-request state. Any further changes must be made on a
|
512
|
+
# per-entry basis.
|
513
|
+
group_resource.freeze
|
514
|
+
group_resource.labels.freeze
|
515
|
+
group_common_labels.freeze
|
516
|
+
|
517
|
+
[group_resource, group_common_labels]
|
518
|
+
end
|
519
|
+
|
520
|
+
# Extract entry resource and common labels that should be applied to
|
521
|
+
# individual entries from the group resource.
|
522
|
+
def extract_entry_labels(group_resource, record)
|
523
|
+
resource_labels = {}
|
524
|
+
common_labels = {}
|
525
|
+
|
526
|
+
if group_resource.type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
|
527
|
+
record.key?('log')
|
528
|
+
@cloudfunctions_log_match =
|
529
|
+
@cloudfunctions_log_regexp.match(record['log'])
|
530
|
+
end
|
531
|
+
|
532
|
+
if group_resource.type == CONTAINER_CONSTANTS[:resource_type]
|
533
|
+
# Move the stdout/stderr annotation from the record into a label
|
534
|
+
common_labels.merge!(
|
535
|
+
fields_to_labels(
|
536
|
+
record, 'stream' => "#{CONTAINER_CONSTANTS[:service]}/stream"))
|
537
|
+
|
538
|
+
# If the record has been annotated by the kubernetes_metadata_filter
|
539
|
+
# plugin, then use that metadata. Otherwise, rely on commonLabels
|
540
|
+
# populated at the grouped_entries level from the group's tag.
|
541
|
+
if record.key?('kubernetes')
|
542
|
+
extracted_resource_labels, extracted_common_labels = \
|
543
|
+
extract_container_metadata(record)
|
544
|
+
resource_labels.merge!(extracted_resource_labels)
|
545
|
+
common_labels.merge!(extracted_common_labels)
|
546
|
+
end
|
547
|
+
end
|
548
|
+
|
549
|
+
# If a field is present in the label_map, send its value as a label
|
550
|
+
# (mapping the field name to label name as specified in the config)
|
551
|
+
# and do not send that field as part of the payload.
|
552
|
+
common_labels.merge!(fields_to_labels(record, @label_map))
|
553
|
+
|
554
|
+
if group_resource.type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
|
555
|
+
@cloudfunctions_log_match &&
|
556
|
+
@cloudfunctions_log_match['execution_id']
|
557
|
+
common_labels['execution_id'] =
|
558
|
+
@cloudfunctions_log_match['execution_id']
|
559
|
+
end
|
560
|
+
resource_labels.merge!(
|
561
|
+
extract_resource_labels(group_resource.type, common_labels))
|
562
|
+
|
563
|
+
[resource_labels, common_labels]
|
564
|
+
end
|
565
|
+
|
365
566
|
def write(chunk)
|
366
567
|
# Group the entries since we have to make one call per tag.
|
367
568
|
grouped_entries = {}
|
368
569
|
chunk.msgpack_each do |tag, *arr|
|
369
570
|
sanitized_tag = sanitize_tag(tag)
|
370
|
-
@log.error("tag is: ")
|
371
|
-
@log.error(tag.inspect)
|
372
|
-
@log.error("arr is: ")
|
373
|
-
@log.error(arr.inspect)
|
374
571
|
if sanitized_tag.nil?
|
375
572
|
@log.warn "Dropping log entries with invalid tag: '#{tag}'. " \
|
376
573
|
'A tag should be a string with utf8 characters.'
|
@@ -382,22 +579,20 @@ module Fluent
|
|
382
579
|
|
383
580
|
grouped_entries.each do |tag, arr|
|
384
581
|
entries = []
|
385
|
-
group_resource, group_common_labels =
|
386
|
-
|
582
|
+
group_resource, group_common_labels = compute_group_resource_and_labels(
|
583
|
+
tag)
|
387
584
|
|
388
585
|
arr.each do |time, record|
|
389
586
|
next unless record.is_a?(Hash)
|
390
587
|
|
391
|
-
|
392
|
-
|
588
|
+
extracted_resource_labels, extracted_common_labels = \
|
589
|
+
extract_entry_labels(group_resource, record)
|
393
590
|
entry_resource = group_resource.dup
|
394
|
-
entry_resource.type = resource_type
|
395
591
|
entry_resource.labels.merge!(extracted_resource_labels)
|
396
592
|
entry_common_labels = \
|
397
593
|
group_common_labels.merge(extracted_common_labels)
|
398
594
|
|
399
|
-
if
|
400
|
-
DOCKER_CONSTANTS[:resource_type]].include?(entry_resource.type)
|
595
|
+
if entry_resource.type == CONTAINER_CONSTANTS[:resource_type]
|
401
596
|
# Save the timestamp if available, then clear it out to allow for
|
402
597
|
# determining whether we should parse the log or message field.
|
403
598
|
timestamp = record.key?('time') ? record['time'] : nil
|
@@ -491,6 +686,8 @@ module Fluent
|
|
491
686
|
)
|
492
687
|
|
493
688
|
client.write_log_entries(write_request)
|
689
|
+
increment_successful_requests_count
|
690
|
+
increment_ingested_entries_count(entries.length)
|
494
691
|
|
495
692
|
# Let the user explicitly know when the first call succeeded,
|
496
693
|
# to aid with verification and troubleshooting.
|
@@ -500,10 +697,12 @@ module Fluent
|
|
500
697
|
end
|
501
698
|
|
502
699
|
rescue GRPC::Cancelled => error
|
700
|
+
increment_failed_requests_count(GRPC::Core::StatusCodes::CANCELLED)
|
503
701
|
# RPC cancelled, so retry via re-raising the error.
|
504
702
|
raise error
|
505
703
|
|
506
704
|
rescue GRPC::BadStatus => error
|
705
|
+
increment_failed_requests_count(error.code)
|
507
706
|
case error.code
|
508
707
|
when GRPC::Core::StatusCodes::CANCELLED,
|
509
708
|
GRPC::Core::StatusCodes::UNAVAILABLE,
|
@@ -518,6 +717,7 @@ module Fluent
|
|
518
717
|
# Most client errors indicate a problem with the request itself
|
519
718
|
# and should not be retried.
|
520
719
|
dropped = entries.length
|
720
|
+
increment_dropped_entries_count(dropped)
|
521
721
|
@log.warn "Dropping #{dropped} log message(s)",
|
522
722
|
error: error.to_s, error_code: error.code.to_s
|
523
723
|
when GRPC::Core::StatusCodes::UNAUTHENTICATED
|
@@ -525,12 +725,14 @@ module Fluent
|
|
525
725
|
# These are usually solved via a `gcloud auth` call, or by
|
526
726
|
# modifying the permissions on the Google Cloud project.
|
527
727
|
dropped = entries.length
|
728
|
+
increment_dropped_entries_count(dropped)
|
528
729
|
@log.warn "Dropping #{dropped} log message(s)",
|
529
730
|
error: error.to_s, error_code: error.code.to_s
|
530
731
|
else
|
531
732
|
# Assume this is a problem with the request itself
|
532
733
|
# and don't retry.
|
533
734
|
dropped = entries.length
|
735
|
+
increment_dropped_entries_count(dropped)
|
534
736
|
@log.error "Unknown response code #{error.code} from the "\
|
535
737
|
"server, dropping #{dropped} log message(s)",
|
536
738
|
error: error.to_s, error_code: error.code.to_s
|
@@ -546,7 +748,14 @@ module Fluent
|
|
546
748
|
entries: entries)
|
547
749
|
|
548
750
|
# TODO: RequestOptions
|
549
|
-
|
751
|
+
begin
|
752
|
+
client.write_entry_log_entries(write_request)
|
753
|
+
rescue Google::Apis::Error => error
|
754
|
+
increment_failed_requests_count(error.status_code)
|
755
|
+
raise error
|
756
|
+
end
|
757
|
+
increment_successful_requests_count
|
758
|
+
increment_ingested_entries_count(entries.length)
|
550
759
|
|
551
760
|
# Let the user explicitly know when the first call succeeded,
|
552
761
|
# to aid with verification and troubleshooting.
|
@@ -564,6 +773,7 @@ module Fluent
|
|
564
773
|
# These are usually solved via a `gcloud auth` call, or by modifying
|
565
774
|
# the permissions on the Google Cloud project.
|
566
775
|
dropped = entries.length
|
776
|
+
increment_dropped_entries_count(dropped)
|
567
777
|
@log.warn "Dropping #{dropped} log message(s)",
|
568
778
|
error_class: error.class.to_s, error: error.to_s
|
569
779
|
|
@@ -571,6 +781,7 @@ module Fluent
|
|
571
781
|
# Most ClientErrors indicate a problem with the request itself and
|
572
782
|
# should not be retried.
|
573
783
|
dropped = entries.length
|
784
|
+
increment_dropped_entries_count(dropped)
|
574
785
|
@log.warn "Dropping #{dropped} log message(s)",
|
575
786
|
error_class: error.class.to_s, error: error.to_s
|
576
787
|
end
|
@@ -630,7 +841,7 @@ module Fluent
|
|
630
841
|
end
|
631
842
|
end
|
632
843
|
rescue StandardError => e
|
633
|
-
@log.
|
844
|
+
@log.debug 'Failed to access metadata service: ', error: e
|
634
845
|
end
|
635
846
|
|
636
847
|
@log.info 'Unable to determine platform'
|
@@ -645,600 +856,15 @@ module Fluent
|
|
645
856
|
metadata_path, 'Metadata-Flavor' => 'Google', &:read)
|
646
857
|
end
|
647
858
|
|
648
|
-
|
649
|
-
|
650
|
-
def ec2_metadata
|
651
|
-
fail "Called ec2_metadata with platform=#{@platform}" unless
|
859
|
+
def fetch_ec2_metadata
|
860
|
+
fail "Called fetch_ec2_metadata with platform=#{@platform}" unless
|
652
861
|
@platform == Platform::EC2
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
@ec2_metadata = JSON.parse(contents)
|
659
|
-
end
|
660
|
-
end
|
661
|
-
|
662
|
-
@ec2_metadata
|
663
|
-
end
|
664
|
-
|
665
|
-
# Set regexp patterns to parse tags and logs.
|
666
|
-
def set_regexp_patterns
|
667
|
-
@compiled_kubernetes_tag_regexp = nil
|
668
|
-
if @kubernetes_tag_regexp
|
669
|
-
@compiled_kubernetes_tag_regexp = Regexp.new(@kubernetes_tag_regexp)
|
862
|
+
# See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html
|
863
|
+
open('http://' + METADATA_SERVICE_ADDR +
|
864
|
+
'/latest/dynamic/instance-identity/document') do |f|
|
865
|
+
contents = f.read
|
866
|
+
return JSON.parse(contents)
|
670
867
|
end
|
671
|
-
|
672
|
-
@cloudfunctions_tag_regexp =
|
673
|
-
/\.(?<encoded_function_name>.+)\.\d+-[^-]+_default_worker$/
|
674
|
-
@cloudfunctions_log_regexp = /^
|
675
|
-
(?:\[(?<severity>.)\])?
|
676
|
-
\[(?<timestamp>.{24})\]
|
677
|
-
(?:\[(?<execution_id>[^\]]+)\])?
|
678
|
-
[ ](?<text>.*)$/x
|
679
|
-
|
680
|
-
@http_latency_regexp = /^\s*(?<seconds>\d+)(?<decimal>\.\d+)?\s*s\s*$/
|
681
|
-
end
|
682
|
-
|
683
|
-
# Set required variables like @project_id, @vm_id, @vm_name and @zone.
|
684
|
-
def set_required_metadata_variables
|
685
|
-
set_project_id
|
686
|
-
set_vm_id
|
687
|
-
set_vm_name
|
688
|
-
set_location
|
689
|
-
|
690
|
-
# All metadata parameters must now be set.
|
691
|
-
return if @project_id && @zone && @vm_id
|
692
|
-
missing = []
|
693
|
-
missing << 'project_id' unless @project_id
|
694
|
-
missing << 'zone' unless @zone
|
695
|
-
missing << 'vm_id' unless @vm_id
|
696
|
-
fail Fluent::ConfigError, 'Unable to obtain metadata parameters: ' +
|
697
|
-
missing.join(' ')
|
698
|
-
end
|
699
|
-
|
700
|
-
# 1. Return the value if it is explicitly set in the config already.
|
701
|
-
# 2. If not, try to retrieve it by calling metadata server directly.
|
702
|
-
# 3. If still not set, try to obtain it from the credentials.
|
703
|
-
def set_project_id
|
704
|
-
@project_id ||= fetch_gce_metadata('project/project-id') if
|
705
|
-
@platform == Platform::GCE
|
706
|
-
@project_id ||= CredentialsInfo.project_id
|
707
|
-
rescue StandardError => e
|
708
|
-
@log.error 'Failed to obtain project id: ', error: e
|
709
|
-
end
|
710
|
-
|
711
|
-
# 1. Return the value if it is explicitly set in the config already.
|
712
|
-
# 2. If not, check if the response from Metadata Agent includes this info.
|
713
|
-
# 3. If not, try to retrieve it by calling metadata servers directly.
|
714
|
-
def set_vm_id
|
715
|
-
@vm_id ||= @resource.labels['instance_id'] if
|
716
|
-
!@resource.nil? && @resource.labels.key?('instance_id')
|
717
|
-
@vm_id ||= fetch_gce_metadata('instance/id') if @platform == Platform::GCE
|
718
|
-
@vm_id ||= ec2_metadata['instanceId'] if @platform == Platform::EC2
|
719
|
-
rescue StandardError => e
|
720
|
-
@log.error 'Failed to obtain vm_id: ', error: e
|
721
|
-
end
|
722
|
-
|
723
|
-
# 1. Return the value if it is explicitly set in the config already.
|
724
|
-
# 2. If not, check if the response from Metadata Agent includes this info.
|
725
|
-
# 3. If not, try to retrieve it locally.
|
726
|
-
def set_vm_name
|
727
|
-
@vm_name ||= @resource.labels['instance_name'] if
|
728
|
-
!@resource.nil? && @resource.labels.key?('instance_name')
|
729
|
-
@vm_name ||= Socket.gethostname
|
730
|
-
rescue StandardError => e
|
731
|
-
@log.error 'Failed to obtain vm name: ', error: e
|
732
|
-
end
|
733
|
-
|
734
|
-
# 1. Return the value if it is explicitly set in the config already.
|
735
|
-
# 2. If not, check if the response from Metadata Agent includes this info.
|
736
|
-
# 3. If not, try to retrieve it locally.
|
737
|
-
def set_location
|
738
|
-
unless @resource.nil?
|
739
|
-
@zone ||= @resource.labels['location'] if
|
740
|
-
@resource.type == DOCKER_CONSTANTS[:resource_type] &&
|
741
|
-
@resource.labels.key?('location')
|
742
|
-
@zone ||= @resource.labels['zone'] if
|
743
|
-
@platform == Platform::GCE && @resource.labels.key?('zone')
|
744
|
-
@zone ||= @resource.labels['region'] if
|
745
|
-
@platform == Platform::EC2 && @resource.labels.key?('region')
|
746
|
-
end
|
747
|
-
# Response format: "projects/<number>/zones/<zone>"
|
748
|
-
@zone ||= fetch_gce_metadata('instance/zone').rpartition('/')[2] if
|
749
|
-
@platform == Platform::GCE
|
750
|
-
@zone ||= 'aws:' + ec2_metadata['availabilityZone'] if
|
751
|
-
@platform == Platform::EC2 && ec2_metadata.key?('availabilityZone')
|
752
|
-
rescue StandardError => e
|
753
|
-
@log.error 'Failed to obtain location: ', error: e
|
754
|
-
end
|
755
|
-
|
756
|
-
# Retrieve monitored resource via the legacy way.
|
757
|
-
#
|
758
|
-
# Note: This is just a failover plan if we fail to get metadata from
|
759
|
-
# Metadata Agent. Thus it should be equivalent to what Metadata Agent
|
760
|
-
# returns.
|
761
|
-
def determine_agent_level_monitored_resource_via_legacy
|
762
|
-
resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
|
763
|
-
labels: {})
|
764
|
-
resource.type = determine_agent_level_monitored_resource_type
|
765
|
-
resource.labels = determine_agent_level_monitored_resource_labels(
|
766
|
-
resource.type)
|
767
|
-
resource
|
768
|
-
end
|
769
|
-
|
770
|
-
# Determine agent level monitored resource type.
|
771
|
-
def determine_agent_level_monitored_resource_type
|
772
|
-
# EC2 instance.
|
773
|
-
return EC2_CONSTANTS[:resource_type] if
|
774
|
-
@platform == Platform::EC2
|
775
|
-
|
776
|
-
# Unknown platform will be defaulted to GCE instance..
|
777
|
-
return COMPUTE_CONSTANTS[:resource_type] if
|
778
|
-
@platform == Platform::OTHER
|
779
|
-
|
780
|
-
# Resource types determined by @subservice_name config.
|
781
|
-
# Cloud Dataflow.
|
782
|
-
return DATAFLOW_CONSTANTS[:resource_type] if
|
783
|
-
@subservice_name == DATAFLOW_CONSTANTS[:service]
|
784
|
-
# Cloud ML.
|
785
|
-
return ML_CONSTANTS[:resource_type] if
|
786
|
-
@subservice_name == ML_CONSTANTS[:service]
|
787
|
-
# Default back to GCE if invalid value is detected.
|
788
|
-
return COMPUTE_CONSTANTS[:resource_type] if
|
789
|
-
@subservice_name
|
790
|
-
|
791
|
-
# Resource types determined by @detect_subservice config.
|
792
|
-
if @detect_subservice
|
793
|
-
begin
|
794
|
-
attributes = fetch_gce_metadata('instance/attributes/').split
|
795
|
-
rescue StandardError => e
|
796
|
-
@log.error 'Failed to detect subservice: ', error: e
|
797
|
-
end
|
798
|
-
# GAE app.
|
799
|
-
return APPENGINE_CONSTANTS[:resource_type] if
|
800
|
-
attributes.include?('gae_backend_name') &&
|
801
|
-
attributes.include?('gae_backend_version')
|
802
|
-
# GKE container.
|
803
|
-
return CONTAINER_CONSTANTS[:resource_type] if
|
804
|
-
attributes.include?('kube-env')
|
805
|
-
# Cloud Dataproc.
|
806
|
-
return DATAPROC_CONSTANTS[:resource_type] if
|
807
|
-
attributes.include?('dataproc-cluster-uuid') &&
|
808
|
-
attributes.include?('dataproc-cluster-name')
|
809
|
-
end
|
810
|
-
# GCE instance.
|
811
|
-
COMPUTE_CONSTANTS[:resource_type]
|
812
|
-
end
|
813
|
-
|
814
|
-
# Determine agent level monitored resource labels based on the resource
|
815
|
-
# type. Each resource type has its own labels that need to be filled in.
|
816
|
-
def determine_agent_level_monitored_resource_labels(type)
|
817
|
-
labels = {}
|
818
|
-
|
819
|
-
case type
|
820
|
-
|
821
|
-
# GAE app.
|
822
|
-
when APPENGINE_CONSTANTS[:resource_type]
|
823
|
-
begin
|
824
|
-
labels['module_id'] = fetch_gce_metadata(
|
825
|
-
'instance/attributes/gae_backend_name')
|
826
|
-
labels['version_id'] = fetch_gce_metadata(
|
827
|
-
'instance/attributes/gae_backend_version')
|
828
|
-
rescue StandardError => e
|
829
|
-
@log.error 'Failed to set monitored resource labels for GAE: ',
|
830
|
-
error: e
|
831
|
-
end
|
832
|
-
|
833
|
-
# GCE.
|
834
|
-
when COMPUTE_CONSTANTS[:resource_type]
|
835
|
-
labels['instance_id'] = @vm_id
|
836
|
-
labels['zone'] = @zone
|
837
|
-
|
838
|
-
# GKE container.
|
839
|
-
when CONTAINER_CONSTANTS[:resource_type]
|
840
|
-
labels['instance_id'] = @vm_id
|
841
|
-
labels['zone'] = @zone
|
842
|
-
begin
|
843
|
-
raw_kube_env = fetch_gce_metadata('instance/attributes/kube-env')
|
844
|
-
kube_env = YAML.load(raw_kube_env)
|
845
|
-
labels['cluster_name'] =
|
846
|
-
cluster_name_from_kube_env(kube_env)
|
847
|
-
rescue StandardError => e
|
848
|
-
@log.error 'Failed to set monitored resource labels for GKE: ',
|
849
|
-
error: e
|
850
|
-
end
|
851
|
-
|
852
|
-
# Cloud Dataproc.
|
853
|
-
when DATAPROC_CONSTANTS[:resource_type]
|
854
|
-
begin
|
855
|
-
labels['cluster_uuid'] =
|
856
|
-
fetch_gce_metadata('instance/attributes/dataproc-cluster-uuid')
|
857
|
-
labels['cluster_name'] =
|
858
|
-
fetch_gce_metadata('instance/attributes/dataproc-cluster-name')
|
859
|
-
labels['region'] =
|
860
|
-
fetch_gce_metadata('instance/attributes/dataproc-region')
|
861
|
-
rescue StandardError => e
|
862
|
-
@log.error 'Failed to set monitored resource labels for Cloud ' \
|
863
|
-
'Dataproc: ', error: e
|
864
|
-
end
|
865
|
-
|
866
|
-
# EC2.
|
867
|
-
when EC2_CONSTANTS[:resource_type]
|
868
|
-
labels['instance_id'] = @vm_id
|
869
|
-
labels['region'] = @zone
|
870
|
-
labels['aws_account'] = ec2_metadata['accountId'] if
|
871
|
-
ec2_metadata.key?('accountId')
|
872
|
-
end
|
873
|
-
labels
|
874
|
-
end
|
875
|
-
|
876
|
-
# Determine the common labels that should be added to all log entries
|
877
|
-
# processed by this logging agent.
|
878
|
-
def determine_agent_level_common_labels
|
879
|
-
labels = {}
|
880
|
-
# User can specify labels via config. We want to capture those as well.
|
881
|
-
# TODO: Send instance tags as labels as well?
|
882
|
-
labels.merge!(@labels) if @labels
|
883
|
-
|
884
|
-
case @resource.type
|
885
|
-
|
886
|
-
# GAE app.
|
887
|
-
when APPENGINE_CONSTANTS[:resource_type]
|
888
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/resource_id"] = @vm_id
|
889
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
|
890
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/zone"] = @zone
|
891
|
-
|
892
|
-
# GCE and GKE container.
|
893
|
-
when COMPUTE_CONSTANTS[:resource_type],
|
894
|
-
CONTAINER_CONSTANTS[:resource_type]
|
895
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
|
896
|
-
|
897
|
-
# Cloud Dataflow and Cloud Dataproc.
|
898
|
-
when DATAFLOW_CONSTANTS[:resource_type],
|
899
|
-
DATAPROC_CONSTANTS[:resource_type]
|
900
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/resource_id"] = @vm_id
|
901
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
|
902
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/zone"] = @zone
|
903
|
-
|
904
|
-
# EC2.
|
905
|
-
when EC2_CONSTANTS[:resource_type]
|
906
|
-
labels["#{EC2_CONSTANTS[:service]}/resource_name"] = @vm_name
|
907
|
-
|
908
|
-
# Cloud ML.
|
909
|
-
when ML_CONSTANTS[:resource_type]
|
910
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/resource_id"] = @vm_id
|
911
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
|
912
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/zone"] = @zone
|
913
|
-
end
|
914
|
-
labels
|
915
|
-
end
|
916
|
-
|
917
|
-
# Determine the group level monitored resource and common labels shared by a
|
918
|
-
# collection of entries.
|
919
|
-
def determine_group_level_monitored_resource_and_labels(tag)
|
920
|
-
# Determine group level monitored resource type. For certain types,
|
921
|
-
# extract useful info from the tag and store those in
|
922
|
-
# matched_regexp_group.
|
923
|
-
group_resource_type, matched_regexp_group =
|
924
|
-
determine_group_level_monitored_resource_type(tag)
|
925
|
-
|
926
|
-
# Determine group level monitored resource labels and common labels.
|
927
|
-
group_resource_type, group_resource_labels, group_common_labels = \
|
928
|
-
determine_group_level_labels_and_adjust_type(
|
929
|
-
group_resource_type, matched_regexp_group)
|
930
|
-
|
931
|
-
group_resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
|
932
|
-
type: group_resource_type,
|
933
|
-
labels: group_resource_labels.to_h
|
934
|
-
)
|
935
|
-
|
936
|
-
# Freeze the per-request state. Any further changes must be made on a
|
937
|
-
# per-entry basis.
|
938
|
-
group_resource.freeze
|
939
|
-
group_resource.labels.freeze
|
940
|
-
group_common_labels.freeze
|
941
|
-
|
942
|
-
[group_resource, group_common_labels]
|
943
|
-
end
|
944
|
-
|
945
|
-
# Determine group level monitored resource type shared by a collection of
|
946
|
-
# entries.
|
947
|
-
# Returns the resource type and tag regexp matched groups. The matched
|
948
|
-
# groups only apply to some resource types. Return nil if not applicable or
|
949
|
-
# if there is no match.
|
950
|
-
def determine_group_level_monitored_resource_type(tag)
|
951
|
-
# Match tag against Cloud Functions format.
|
952
|
-
if @running_cloudfunctions
|
953
|
-
matched_regexp_group = @cloudfunctions_tag_regexp.match(tag)
|
954
|
-
return [CLOUDFUNCTIONS_CONSTANTS[:resource_type],
|
955
|
-
matched_regexp_group] if matched_regexp_group
|
956
|
-
end
|
957
|
-
|
958
|
-
# Match tag against Docker container stderr / stdout log format and
|
959
|
-
# Docker container application log format.
|
960
|
-
matched_regexp_group =
|
961
|
-
# Format: "container.<container_id>.<container_name>"
|
962
|
-
@dockercontainer_tag_regexp.match(tag) ||
|
963
|
-
# Format: "application-container.<container_name>.<additional_tag>"
|
964
|
-
@dockercontainer_tag_with_application_regexp.match(tag)
|
965
|
-
return [DOCKER_CONSTANTS[:resource_type], matched_regexp_group] if
|
966
|
-
matched_regexp_group
|
967
|
-
|
968
|
-
# Match tag against GKE Container format.
|
969
|
-
if @resource.type == CONTAINER_CONSTANTS[:resource_type] &&
|
970
|
-
@compiled_kubernetes_tag_regexp
|
971
|
-
# Container logs in Kubernetes are tagged based on where they came from,
|
972
|
-
# so we can extract useful metadata from the tag. Do this here to avoid
|
973
|
-
# having to repeat it for each record.
|
974
|
-
matched_regexp_group = @compiled_kubernetes_tag_regexp.match(tag)
|
975
|
-
return [@resource.type, matched_regexp_group] if matched_regexp_group
|
976
|
-
end
|
977
|
-
|
978
|
-
# Otherwise, return the original type.
|
979
|
-
[@resource.type, nil]
|
980
|
-
end
|
981
|
-
|
982
|
-
# Determine group level monitored resource labels and common labels. These
|
983
|
-
# labels will be shared by a collection of entries. In certain cases, we
|
984
|
-
# might also adjust the resource type.
|
985
|
-
def determine_group_level_labels_and_adjust_type(group_resource_type,
|
986
|
-
matched_regexp_group)
|
987
|
-
group_resource_labels = @resource.labels.dup
|
988
|
-
group_common_labels = @common_labels.dup
|
989
|
-
|
990
|
-
case group_resource_type
|
991
|
-
|
992
|
-
# Cloud Functions.
|
993
|
-
when CLOUDFUNCTIONS_CONSTANTS[:resource_type]
|
994
|
-
group_resource_labels['region'] = @gcf_region
|
995
|
-
group_resource_labels['function_name'] =
|
996
|
-
decode_cloudfunctions_function_name(
|
997
|
-
matched_regexp_group['encoded_function_name'])
|
998
|
-
instance_id = group_resource_labels.delete('instance_id')
|
999
|
-
group_common_labels["#{CONTAINER_CONSTANTS[:service]}/cluster_name"] =
|
1000
|
-
group_resource_labels.delete('cluster_name')
|
1001
|
-
group_common_labels["#{CONTAINER_CONSTANTS[:service]}/instance_id"] =
|
1002
|
-
instance_id
|
1003
|
-
group_common_labels["#{COMPUTE_CONSTANTS[:service]}/resource_id"] =
|
1004
|
-
instance_id
|
1005
|
-
group_common_labels["#{COMPUTE_CONSTANTS[:service]}/zone"] =
|
1006
|
-
group_resource_labels.delete('zone')
|
1007
|
-
|
1008
|
-
# GKE container.
|
1009
|
-
when CONTAINER_CONSTANTS[:resource_type]
|
1010
|
-
if matched_regexp_group
|
1011
|
-
group_resource_labels['container_name'] =
|
1012
|
-
matched_regexp_group['container_name']
|
1013
|
-
group_resource_labels['namespace_id'] =
|
1014
|
-
matched_regexp_group['namespace_name']
|
1015
|
-
group_resource_labels['pod_id'] =
|
1016
|
-
matched_regexp_group['pod_name']
|
1017
|
-
%w(namespace_name pod_name).each do |field|
|
1018
|
-
group_common_labels["#{CONTAINER_CONSTANTS[:service]}/#{field}"] =
|
1019
|
-
matched_regexp_group[field]
|
1020
|
-
end
|
1021
|
-
end
|
1022
|
-
|
1023
|
-
# Docker container.
|
1024
|
-
when DOCKER_CONSTANTS[:resource_type]
|
1025
|
-
# For Docker container stderr / stdout logs generated by Docker Fluentd
|
1026
|
-
# Logging Driver, tags are in the format of "container.<container_id>.
|
1027
|
-
# <container_name>", thus they include 'container_id' info.
|
1028
|
-
# For logs generated by applications running in Docker containers,
|
1029
|
-
# tags are in the format of "application-container.<container_name>.
|
1030
|
-
# <additional_tag>", thus 'container_id' info is unknown yet.
|
1031
|
-
# 'container_name' info on the other hand is always available.
|
1032
|
-
container_id = matched_regexp_group['container_id'] if
|
1033
|
-
matched_regexp_group.names.include? 'container_id'
|
1034
|
-
container_name = matched_regexp_group['container_name']
|
1035
|
-
|
1036
|
-
if @enable_metadata_agent
|
1037
|
-
# Call Metadata Agent with "container.<container_id>" or
|
1038
|
-
# "application-container.<container_name>" as the locally-unique key
|
1039
|
-
# to retrieve monitored resource. This should be different from the
|
1040
|
-
# original @resource value that got initiated when the agent starts up
|
1041
|
-
# because that one is always at the VM level.
|
1042
|
-
if container_id
|
1043
|
-
locally_unique_id = "container.#{container_id}"
|
1044
|
-
else
|
1045
|
-
locally_unique_id = "containerName.#{container_name}"
|
1046
|
-
end
|
1047
|
-
retrieved_resource = call_metadata_agent_for_monitored_resource(
|
1048
|
-
locally_unique_id)
|
1049
|
-
end
|
1050
|
-
|
1051
|
-
if !retrieved_resource.nil?
|
1052
|
-
# If we successfully get a monitored resource from Metadata Agent,
|
1053
|
-
# use this one instead of the original instance monitored resource.
|
1054
|
-
group_resource_labels = retrieved_resource.labels.dup
|
1055
|
-
@log.debug 'Retrieved monitored resource from Metadata Agent: ' \
|
1056
|
-
"#{retrieved_resource.inspect}."
|
1057
|
-
else
|
1058
|
-
# If Metadata Agent is not enabled, or we failed to get a monitored
|
1059
|
-
# resource, we need to have some backup plan.
|
1060
|
-
@log.debug 'Metadata Agent not enabled or failed to retrieve ' \
|
1061
|
-
'docker container monitored resource from Metadata ' \
|
1062
|
-
'Agent.'
|
1063
|
-
|
1064
|
-
# 1. Check if 'container_id' is set already. It should be available
|
1065
|
-
# for stdout / stderr). If so, use that.
|
1066
|
-
# 2. If not, call Docker Remote API to retrieve the container ID from
|
1067
|
-
# container name, but only if @call_docker_api_locally is true.
|
1068
|
-
container_id ||= retrieve_container_id_by_name_locally(
|
1069
|
-
container_name) if @call_docker_api_locally
|
1070
|
-
unless container_id
|
1071
|
-
@log.debug 'No docker container id retrieved. Falling back to
|
1072
|
-
instance monitored resource.'
|
1073
|
-
# If a container id is not available, fall back to the instance
|
1074
|
-
# monitored resource.
|
1075
|
-
return [COMPUTE_CONSTANTS[:resource_type], group_resource_labels,
|
1076
|
-
group_common_labels]
|
1077
|
-
end
|
1078
|
-
group_resource_labels['container_id'] = container_id
|
1079
|
-
# 'zone' for GCP and 'region' for EC2 must have been set at this
|
1080
|
-
# point. Rename them to 'location'.
|
1081
|
-
group_resource_labels['location'] = @zone
|
1082
|
-
if @platform == Platform::EC2
|
1083
|
-
group_resource_labels.delete('region')
|
1084
|
-
else
|
1085
|
-
group_resource_labels.delete('zone')
|
1086
|
-
end
|
1087
|
-
# vm id info should be reported as a metadata label instead.
|
1088
|
-
group_resource_labels.delete('instance_id')
|
1089
|
-
|
1090
|
-
end
|
1091
|
-
# Set metadata labels.
|
1092
|
-
group_common_labels["#{DOCKER_CONSTANTS[:service]}/container_name"] =
|
1093
|
-
matched_regexp_group['container_name']
|
1094
|
-
group_common_labels["#{COMPUTE_CONSTANTS[:service]}/resource_id"] =
|
1095
|
-
@vm_id
|
1096
|
-
end
|
1097
|
-
|
1098
|
-
[group_resource_type, group_resource_labels, group_common_labels]
|
1099
|
-
end
|
1100
|
-
|
1101
|
-
# Extract entry resource and common labels that should be applied to
|
1102
|
-
# individual entries from the group resource.
|
1103
|
-
def determine_entry_level_labels(group_resource, record)
|
1104
|
-
resource_type = group_resource.type
|
1105
|
-
resource_labels = {}
|
1106
|
-
common_labels = {}
|
1107
|
-
|
1108
|
-
# The format of the locally unique key varies by monitored resource.
|
1109
|
-
#
|
1110
|
-
# Docker container:
|
1111
|
-
# "container.<container_id>"
|
1112
|
-
# "containerName.<container_name>"
|
1113
|
-
# GKE container:
|
1114
|
-
# "gke_containerName.<namespace_id>.<pod_name>.<container_name>"
|
1115
|
-
if @enable_metadata_agent && record.key?(LOCALLY_UNIQUE_ID_LABEL_NAME)
|
1116
|
-
# Call Metadata Agent with "gke_containerName.<namespace_id>.
|
1117
|
-
# <pod_name>.<container_name>" as the locally-unique key to retrieve
|
1118
|
-
# monitored resource.
|
1119
|
-
locally_unique_id = record[LOCALLY_UNIQUE_ID_LABEL_NAME]
|
1120
|
-
@log.debug 'Calling metadata agent with locally unique id: ' \
|
1121
|
-
"#{locally_unique_id}."
|
1122
|
-
retrieved_resource = call_metadata_agent_for_monitored_resource(
|
1123
|
-
locally_unique_id)
|
1124
|
-
@log.debug 'Retrieved monitored resource from metadata agent: ' \
|
1125
|
-
"#{retrieved_resource.inspect}."
|
1126
|
-
unless retrieved_resource.nil?
|
1127
|
-
resource_type = retrieved_resource.type
|
1128
|
-
# Temporarily renaming 'gke_container' to 'container'.
|
1129
|
-
resource_type = 'container' if resource_type == 'gke_container'
|
1130
|
-
# If we successfully get a monitored resource from Metadata Agent,
|
1131
|
-
# use this one instead of the original VM-level monitored resource.
|
1132
|
-
resource_labels = retrieved_resource.labels.dup
|
1133
|
-
record.delete(LOCALLY_UNIQUE_ID_LABEL_NAME)
|
1134
|
-
@log.debug 'Retrieved gke_container monitored resource from' \
|
1135
|
-
'Stackdriver Metadata agent: ' \
|
1136
|
-
"#{retrieved_resource.inspect}."
|
1137
|
-
end
|
1138
|
-
end
|
1139
|
-
|
1140
|
-
# Cloud Functions.
|
1141
|
-
if resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
|
1142
|
-
record.key?('log')
|
1143
|
-
@cloudfunctions_log_match =
|
1144
|
-
@cloudfunctions_log_regexp.match(record['log'])
|
1145
|
-
common_labels['execution_id'] =
|
1146
|
-
@cloudfunctions_log_match['execution_id'] if \
|
1147
|
-
@cloudfunctions_log_match &&
|
1148
|
-
@cloudfunctions_log_match['execution_id']
|
1149
|
-
end
|
1150
|
-
|
1151
|
-
# GKE containers.
|
1152
|
-
if resource_type == CONTAINER_CONSTANTS[:resource_type]
|
1153
|
-
# Move the stdout/stderr annotation from the record into a label.
|
1154
|
-
common_labels.merge!(
|
1155
|
-
fields_to_labels(
|
1156
|
-
record, 'stream' => "#{CONTAINER_CONSTANTS[:service]}/stream"))
|
1157
|
-
|
1158
|
-
# If the record has been annotated by the kubernetes_metadata_filter
|
1159
|
-
# plugin, then use that metadata. Otherwise, rely on commonLabels
|
1160
|
-
# populated at the grouped_entries level from the group's tag.
|
1161
|
-
if record.key?('kubernetes')
|
1162
|
-
extracted_resource_labels, extracted_common_labels = \
|
1163
|
-
extract_container_metadata(record)
|
1164
|
-
resource_labels.merge!(extracted_resource_labels)
|
1165
|
-
common_labels.merge!(extracted_common_labels)
|
1166
|
-
end
|
1167
|
-
end
|
1168
|
-
|
1169
|
-
# Docker containers.
|
1170
|
-
if resource_type == DOCKER_CONSTANTS[:resource_type]
|
1171
|
-
# For logs coming from Docker Fluentd Logging Driver, the log record
|
1172
|
-
# has 4 fields: 'container_id', 'container_name', 'source' and 'log'.
|
1173
|
-
# Extract 'container_id', 'container_name' and 'source' from json
|
1174
|
-
# record, set corresponding labels, and remove these fields from record.
|
1175
|
-
{
|
1176
|
-
'container_name' => 'container_name',
|
1177
|
-
'source' => 'stream'
|
1178
|
-
}.each do |field_name, label_name|
|
1179
|
-
common_labels.merge!(
|
1180
|
-
fields_to_labels(
|
1181
|
-
record,
|
1182
|
-
field_name => "#{DOCKER_CONSTANTS[:service]}/#{label_name}"
|
1183
|
-
)
|
1184
|
-
)
|
1185
|
-
end
|
1186
|
-
resource_labels.merge!(
|
1187
|
-
fields_to_labels(record, 'container_id' => 'container_id'))
|
1188
|
-
end
|
1189
|
-
|
1190
|
-
# If the name of a field in the record is present in the @label_map
|
1191
|
-
# configured by users, report its value as a label and do not send that
|
1192
|
-
# field as part of the payload.
|
1193
|
-
common_labels.merge!(fields_to_labels(record, @label_map))
|
1194
|
-
|
1195
|
-
resource_labels.merge!(
|
1196
|
-
extract_resource_labels(resource_type, common_labels))
|
1197
|
-
|
1198
|
-
[resource_type, resource_labels, common_labels]
|
1199
|
-
end
|
1200
|
-
|
1201
|
-
# Call Metadata Agent to get monitored resource information and parse
|
1202
|
-
# response to Google::Api::MonitoredResource.
|
1203
|
-
def call_metadata_agent_for_monitored_resource(unique_key)
|
1204
|
-
response = call_metadata_agent("monitoredResource/#{unique_key}")
|
1205
|
-
return nil if response.nil?
|
1206
|
-
begin
|
1207
|
-
resource = Google::Api::MonitoredResource.decode_json(response.to_json)
|
1208
|
-
rescue Google::Protobuf::ParseError, ArgumentError => e
|
1209
|
-
@log.error 'Error paring monitored resource from Metadata Agent. ' \
|
1210
|
-
"response: #{response.inspect}", error: e
|
1211
|
-
return nil
|
1212
|
-
end
|
1213
|
-
|
1214
|
-
# TODO(qingling128): Use Google::Api::MonitoredResource directly after we
|
1215
|
-
# upgrade gRPC version to include the fix for the protobuf map
|
1216
|
-
# corruption issue.
|
1217
|
-
Google::Apis::LoggingV2beta1::MonitoredResource.new(
|
1218
|
-
type: resource.type,
|
1219
|
-
labels: resource.labels.to_h
|
1220
|
-
)
|
1221
|
-
end
|
1222
|
-
|
1223
|
-
# Call Metadata Agent and parse response to json. Return nil in case of any
|
1224
|
-
# error / failure.
|
1225
|
-
def call_metadata_agent(path)
|
1226
|
-
url = "#{@metadata_agent_url}/#{path}"
|
1227
|
-
@log.debug("Calling Metadata Agent: #{url}")
|
1228
|
-
open(url) do |f|
|
1229
|
-
response = f.read
|
1230
|
-
parsed_hash = parse_json_or_nil(response)
|
1231
|
-
if parsed_hash.nil?
|
1232
|
-
@log.error 'Response from Metadata Agent is not in valid json ' \
|
1233
|
-
"format: '#{response.inspect}'."
|
1234
|
-
return nil
|
1235
|
-
end
|
1236
|
-
@log.debug "Response from Metadata Agent: #{parsed_hash}"
|
1237
|
-
return parsed_hash
|
1238
|
-
end
|
1239
|
-
rescue StandardError => e
|
1240
|
-
@log.error 'Error calling Metadata Agent.', error: e
|
1241
|
-
return nil
|
1242
868
|
end
|
1243
869
|
|
1244
870
|
# TODO: This functionality should eventually be available in another
|
@@ -1276,28 +902,11 @@ module Fluent
|
|
1276
902
|
end
|
1277
903
|
end
|
1278
904
|
|
1279
|
-
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1283
|
-
|
1284
|
-
@log.debug "Response from Docker API with name '#{container_name}': " \
|
1285
|
-
"#{response.inspect}."
|
1286
|
-
return parse_container_id_from_docker_api_response(response)
|
1287
|
-
rescue StandardError => e
|
1288
|
-
@log.error 'Error calling Docker API to get container id.', error: e
|
1289
|
-
return nil
|
1290
|
-
end
|
1291
|
-
|
1292
|
-
# Parse the container id from Docker Remote API response.
|
1293
|
-
# TODO(qingling128) Add a config for Docker API version to support parsing
|
1294
|
-
# different versions of Docker Remote API when the format varies.
|
1295
|
-
def parse_container_id_from_docker_api_response(response)
|
1296
|
-
JSON.parse(response.data[:body])['Id']
|
1297
|
-
rescue StandardError => e
|
1298
|
-
@log.error 'Error parsing Docker API response to get container id.',
|
1299
|
-
error: e
|
1300
|
-
return nil
|
905
|
+
def detect_cloudfunctions(attributes)
|
906
|
+
return unless attributes.include?('gcf_region')
|
907
|
+
# Cloud Functions detected
|
908
|
+
@running_cloudfunctions = true
|
909
|
+
@gcf_region = fetch_gce_metadata('instance/attributes/gcf_region')
|
1301
910
|
end
|
1302
911
|
|
1303
912
|
def cluster_name_from_kube_env(kube_env)
|
@@ -1370,14 +979,9 @@ module Fluent
|
|
1370
979
|
end
|
1371
980
|
elsif record.key?('severity')
|
1372
981
|
return parse_severity(record.delete('severity'))
|
1373
|
-
elsif
|
1374
|
-
|
1375
|
-
stream = entry_common_labels[
|
1376
|
-
"#{CONTAINER_CONSTANTS[:service]}/stream"] if
|
1377
|
-
resource_type == CONTAINER_CONSTANTS[:resource_type]
|
1378
|
-
stream = entry_common_labels[
|
1379
|
-
"#{DOCKER_CONSTANTS[:service]}/stream"] if
|
1380
|
-
resource_type == DOCKER_CONSTANTS[:resource_type]
|
982
|
+
elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
|
983
|
+
entry_common_labels.key?("#{CONTAINER_CONSTANTS[:service]}/stream")
|
984
|
+
stream = entry_common_labels["#{CONTAINER_CONSTANTS[:service]}/stream"]
|
1381
985
|
if stream == 'stdout'
|
1382
986
|
return 'INFO'
|
1383
987
|
elsif stream == 'stderr'
|
@@ -1585,24 +1189,6 @@ module Fluent
|
|
1585
1189
|
[resource_labels, common_labels]
|
1586
1190
|
end
|
1587
1191
|
|
1588
|
-
def format(tag, time, record)
|
1589
|
-
[tag, time, record].to_msgpack
|
1590
|
-
end
|
1591
|
-
|
1592
|
-
# Given a tag, returns the corresponding valid tag if possible, or nil if
|
1593
|
-
# the tag should be rejected. If 'require_valid_tags' is false, non-string
|
1594
|
-
# tags are converted to strings, and invalid characters are sanitized;
|
1595
|
-
# otherwise such tags are rejected.
|
1596
|
-
def sanitize_tag(tag)
|
1597
|
-
if @require_valid_tags &&
|
1598
|
-
(!tag.is_a?(String) || tag == '' || convert_to_utf8(tag) != tag)
|
1599
|
-
return nil
|
1600
|
-
end
|
1601
|
-
tag = convert_to_utf8(tag.to_s)
|
1602
|
-
tag = '_' if tag == ''
|
1603
|
-
tag
|
1604
|
-
end
|
1605
|
-
|
1606
1192
|
# For every original_label => new_label pair in the label_map, delete the
|
1607
1193
|
# original_label from the record if it exists, and extract the value to form
|
1608
1194
|
# a map with the new_label as the key.
|
@@ -1630,8 +1216,7 @@ module Fluent
|
|
1630
1216
|
entry.text_payload = record['log']
|
1631
1217
|
elsif is_json
|
1632
1218
|
entry.json_payload = record
|
1633
|
-
elsif
|
1634
|
-
DOCKER_CONSTANTS[:resource_type]].include?(resource_type) &&
|
1219
|
+
elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
|
1635
1220
|
record.key?('log')
|
1636
1221
|
entry.text_payload = record['log']
|
1637
1222
|
elsif record.size == 1 && record.key?('message')
|
@@ -1701,8 +1286,7 @@ module Fluent
|
|
1701
1286
|
entry.text_payload = convert_to_utf8(record['log'])
|
1702
1287
|
elsif is_json
|
1703
1288
|
entry.json_payload = struct_from_ruby(record)
|
1704
|
-
elsif
|
1705
|
-
DOCKER_CONSTANTS[:resource_type]].include?(resource_type) &&
|
1289
|
+
elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
|
1706
1290
|
record.key?('log')
|
1707
1291
|
entry.text_payload = convert_to_utf8(record['log'])
|
1708
1292
|
elsif record.size == 1 && record.key?('message')
|
@@ -1715,7 +1299,7 @@ module Fluent
|
|
1715
1299
|
def log_name(tag, resource)
|
1716
1300
|
if resource.type == CLOUDFUNCTIONS_CONSTANTS[:resource_type]
|
1717
1301
|
tag = 'cloud-functions'
|
1718
|
-
elsif
|
1302
|
+
elsif @running_on_managed_vm
|
1719
1303
|
# Add a prefix to Managed VM logs to prevent namespace collisions.
|
1720
1304
|
tag = "#{APPENGINE_CONSTANTS[:service]}/#{tag}"
|
1721
1305
|
elsif resource.type == CONTAINER_CONSTANTS[:resource_type]
|
@@ -1745,9 +1329,6 @@ module Fluent
|
|
1745
1329
|
elsif resource_type == ML_CONSTANTS[:resource_type]
|
1746
1330
|
label_prefix = ML_CONSTANTS[:service]
|
1747
1331
|
labels_to_extract = %w(job_id task_name)
|
1748
|
-
elsif resource_type == DOCKER_CONSTANTS[:resource_type]
|
1749
|
-
label_prefix = DOCKER_CONSTANTS[:service]
|
1750
|
-
labels_to_extract = %w(container_id)
|
1751
1332
|
else
|
1752
1333
|
return extracted_labels
|
1753
1334
|
end
|
@@ -1815,6 +1396,33 @@ module Fluent
|
|
1815
1396
|
end
|
1816
1397
|
end
|
1817
1398
|
end
|
1399
|
+
|
1400
|
+
# Increment the metric for the number of successful requests.
|
1401
|
+
def increment_successful_requests_count
|
1402
|
+
return unless @successful_requests_count
|
1403
|
+
@successful_requests_count.increment(grpc: @use_grpc)
|
1404
|
+
end
|
1405
|
+
|
1406
|
+
# Increment the metric for the number of failed requests, labeled by
|
1407
|
+
# the provided status code.
|
1408
|
+
def increment_failed_requests_count(code)
|
1409
|
+
return unless @failed_requests_count
|
1410
|
+
@failed_requests_count.increment(grpc: @use_grpc, code: code)
|
1411
|
+
end
|
1412
|
+
|
1413
|
+
# Increment the metric for the number of log entries, successfully
|
1414
|
+
# ingested by the Stackdriver Logging API.
|
1415
|
+
def increment_ingested_entries_count(count)
|
1416
|
+
return unless @ingested_entries_count
|
1417
|
+
@ingested_entries_count.increment({}, count)
|
1418
|
+
end
|
1419
|
+
|
1420
|
+
# Increment the metric for the number of log entries that were dropped
|
1421
|
+
# and not ingested by the Stackdriver Logging API.
|
1422
|
+
def increment_dropped_entries_count(count)
|
1423
|
+
return unless @dropped_entries_count
|
1424
|
+
@dropped_entries_count.increment({}, count)
|
1425
|
+
end
|
1818
1426
|
end
|
1819
1427
|
end
|
1820
1428
|
|