fluent-plugin-google-cloud 0.6.4.pre.3 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +159 -0
- data/fluent-plugin-google-cloud.gemspec +2 -2
- data/lib/fluent/plugin/monitoring.rb +55 -0
- data/lib/fluent/plugin/out_google_cloud.rb +377 -769
- data/test/plugin/base_test.rb +86 -282
- data/test/plugin/constants.rb +3 -118
- data/test/plugin/test_out_google_cloud.rb +48 -0
- data/test/plugin/test_out_google_cloud_grpc.rb +47 -0
- metadata +21 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ab431f0283f1c6bc88669159be387f21bb1e908f
|
4
|
+
data.tar.gz: 77d320fd8b6e4c5370542142fd296581fcd9186e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15fc471919366c52a612cb2b1941d0e9ffdf5e1d5c94ad6f3395c1860ca2f03edd888959a0ed267375bf80133458251bc1555c7416ef81f8be4f4aa83fd5c405
|
7
|
+
data.tar.gz: 4aed5e4e2686eb4776d32f3b44f61fc86849f4f02a64bd12399d0e0ac4a6cc1a8752489513fa5e482fdb7cac432d517ed5b7b26e71365e344e2dcf2ca8e96832
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,159 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
fluent-plugin-google-cloud (0.6.4)
|
5
|
+
fluentd (~> 0.10)
|
6
|
+
google-api-client (~> 0.9.0)
|
7
|
+
google-cloud-logging (~> 0.23.2)
|
8
|
+
googleapis-common-protos (~> 1.3)
|
9
|
+
googleauth (~> 0.4)
|
10
|
+
grpc (~> 1.0, < 1.3)
|
11
|
+
json (~> 1.8)
|
12
|
+
|
13
|
+
GEM
|
14
|
+
remote: https://rubygems.org/
|
15
|
+
specs:
|
16
|
+
addressable (2.5.1)
|
17
|
+
public_suffix (~> 2.0, >= 2.0.2)
|
18
|
+
ast (2.3.0)
|
19
|
+
astrolabe (1.3.1)
|
20
|
+
parser (~> 2.2)
|
21
|
+
cool.io (1.5.0)
|
22
|
+
crack (0.4.3)
|
23
|
+
safe_yaml (~> 1.0.0)
|
24
|
+
faraday (0.12.1)
|
25
|
+
multipart-post (>= 1.2, < 3)
|
26
|
+
fluentd (0.14.18)
|
27
|
+
cool.io (>= 1.4.5, < 2.0.0)
|
28
|
+
http_parser.rb (>= 0.5.1, < 0.7.0)
|
29
|
+
msgpack (>= 0.7.0, < 2.0.0)
|
30
|
+
serverengine (>= 2.0.4, < 3.0.0)
|
31
|
+
sigdump (~> 0.2.2)
|
32
|
+
strptime (~> 0.1.7)
|
33
|
+
tzinfo (~> 1.0)
|
34
|
+
tzinfo-data (~> 1.0)
|
35
|
+
yajl-ruby (~> 1.0)
|
36
|
+
google-api-client (0.9.28)
|
37
|
+
addressable (~> 2.3)
|
38
|
+
googleauth (~> 0.5)
|
39
|
+
httpclient (~> 2.7)
|
40
|
+
hurley (~> 0.1)
|
41
|
+
memoist (~> 0.11)
|
42
|
+
mime-types (>= 1.6)
|
43
|
+
representable (~> 2.3.0)
|
44
|
+
retriable (~> 2.0)
|
45
|
+
google-cloud-core (0.21.1)
|
46
|
+
googleauth (~> 0.5.1)
|
47
|
+
google-cloud-logging (0.23.2)
|
48
|
+
google-cloud-core (~> 0.21.1)
|
49
|
+
google-gax (~> 0.6.0)
|
50
|
+
google-protobuf (~> 3.0)
|
51
|
+
googleapis-common-protos (~> 1.3)
|
52
|
+
grpc (~> 1.0)
|
53
|
+
orderedhash (= 0.0.6)
|
54
|
+
stackdriver-core (~> 0.21.0)
|
55
|
+
google-gax (0.6.0)
|
56
|
+
googleapis-common-protos (~> 1.3.1)
|
57
|
+
googleauth (~> 0.5.1)
|
58
|
+
grpc (~> 1.0)
|
59
|
+
rly (~> 0.2.3)
|
60
|
+
google-protobuf (3.3.0)
|
61
|
+
googleapis-common-protos (1.3.5)
|
62
|
+
google-protobuf (~> 3.2)
|
63
|
+
grpc (~> 1.0)
|
64
|
+
googleauth (0.5.1)
|
65
|
+
faraday (~> 0.9)
|
66
|
+
jwt (~> 1.4)
|
67
|
+
logging (~> 2.0)
|
68
|
+
memoist (~> 0.12)
|
69
|
+
multi_json (~> 1.11)
|
70
|
+
os (~> 0.9)
|
71
|
+
signet (~> 0.7)
|
72
|
+
grpc (1.2.5)
|
73
|
+
google-protobuf (~> 3.1)
|
74
|
+
googleauth (~> 0.5.1)
|
75
|
+
hashdiff (0.3.4)
|
76
|
+
http_parser.rb (0.6.0)
|
77
|
+
httpclient (2.8.3)
|
78
|
+
hurley (0.2)
|
79
|
+
json (1.8.6)
|
80
|
+
jwt (1.5.6)
|
81
|
+
little-plugger (1.1.4)
|
82
|
+
logging (2.2.2)
|
83
|
+
little-plugger (~> 1.1)
|
84
|
+
multi_json (~> 1.10)
|
85
|
+
memoist (0.16.0)
|
86
|
+
metaclass (0.0.4)
|
87
|
+
mime-types (3.1)
|
88
|
+
mime-types-data (~> 3.2015)
|
89
|
+
mime-types-data (3.2016.0521)
|
90
|
+
mocha (1.2.1)
|
91
|
+
metaclass (~> 0.0.1)
|
92
|
+
msgpack (1.1.0)
|
93
|
+
multi_json (1.12.1)
|
94
|
+
multipart-post (2.0.0)
|
95
|
+
orderedhash (0.0.6)
|
96
|
+
os (0.9.6)
|
97
|
+
parser (2.4.0.0)
|
98
|
+
ast (~> 2.2)
|
99
|
+
power_assert (1.0.2)
|
100
|
+
powerpack (0.1.1)
|
101
|
+
prometheus-client (0.7.1)
|
102
|
+
quantile (~> 0.2.0)
|
103
|
+
public_suffix (2.0.5)
|
104
|
+
quantile (0.2.0)
|
105
|
+
rainbow (2.2.2)
|
106
|
+
rake
|
107
|
+
rake (10.5.0)
|
108
|
+
representable (2.3.0)
|
109
|
+
uber (~> 0.0.7)
|
110
|
+
retriable (2.1.0)
|
111
|
+
rly (0.2.3)
|
112
|
+
rubocop (0.35.1)
|
113
|
+
astrolabe (~> 1.3)
|
114
|
+
parser (>= 2.2.3.0, < 3.0)
|
115
|
+
powerpack (~> 0.1)
|
116
|
+
rainbow (>= 1.99.1, < 3.0)
|
117
|
+
ruby-progressbar (~> 1.7)
|
118
|
+
tins (<= 1.6.0)
|
119
|
+
ruby-progressbar (1.8.1)
|
120
|
+
safe_yaml (1.0.4)
|
121
|
+
serverengine (2.0.5)
|
122
|
+
sigdump (~> 0.2.2)
|
123
|
+
sigdump (0.2.4)
|
124
|
+
signet (0.7.3)
|
125
|
+
addressable (~> 2.3)
|
126
|
+
faraday (~> 0.9)
|
127
|
+
jwt (~> 1.5)
|
128
|
+
multi_json (~> 1.10)
|
129
|
+
stackdriver-core (0.21.0)
|
130
|
+
strptime (0.1.9)
|
131
|
+
test-unit (3.2.5)
|
132
|
+
power_assert
|
133
|
+
thread_safe (0.3.6)
|
134
|
+
tins (1.6.0)
|
135
|
+
tzinfo (1.2.3)
|
136
|
+
thread_safe (~> 0.1)
|
137
|
+
tzinfo-data (1.2017.2)
|
138
|
+
tzinfo (>= 1.0.0)
|
139
|
+
uber (0.0.15)
|
140
|
+
webmock (1.24.6)
|
141
|
+
addressable (>= 2.3.6)
|
142
|
+
crack (>= 0.3.2)
|
143
|
+
hashdiff
|
144
|
+
yajl-ruby (1.3.0)
|
145
|
+
|
146
|
+
PLATFORMS
|
147
|
+
ruby
|
148
|
+
|
149
|
+
DEPENDENCIES
|
150
|
+
fluent-plugin-google-cloud!
|
151
|
+
mocha (~> 1.1)
|
152
|
+
prometheus-client (~> 0.7.1)
|
153
|
+
rake (~> 10.3)
|
154
|
+
rubocop (~> 0.35.0)
|
155
|
+
test-unit (~> 3.0)
|
156
|
+
webmock (~> 1.17)
|
157
|
+
|
158
|
+
BUNDLED WITH
|
159
|
+
1.15.0
|
@@ -10,7 +10,7 @@ eos
|
|
10
10
|
gem.homepage = \
|
11
11
|
'https://github.com/GoogleCloudPlatform/fluent-plugin-google-cloud'
|
12
12
|
gem.license = 'Apache-2.0'
|
13
|
-
gem.version = '0.6.4
|
13
|
+
gem.version = '0.6.4'
|
14
14
|
gem.authors = ['Todd Derr', 'Alex Robinson']
|
15
15
|
gem.email = ['salty@google.com']
|
16
16
|
gem.required_ruby_version = Gem::Requirement.new('>= 2.0')
|
@@ -19,7 +19,6 @@ eos
|
|
19
19
|
gem.test_files = gem.files.grep(/^(test)/)
|
20
20
|
gem.require_paths = ['lib']
|
21
21
|
|
22
|
-
gem.add_runtime_dependency 'excon', '~> 0.57.1'
|
23
22
|
gem.add_runtime_dependency 'fluentd', '~> 0.10'
|
24
23
|
gem.add_runtime_dependency 'googleapis-common-protos', '~> 1.3'
|
25
24
|
gem.add_runtime_dependency 'google-api-client', '~> 0.9.0'
|
@@ -33,4 +32,5 @@ eos
|
|
33
32
|
gem.add_development_dependency 'rubocop', '~> 0.35.0'
|
34
33
|
gem.add_development_dependency 'webmock', '~> 1.17'
|
35
34
|
gem.add_development_dependency 'test-unit', '~> 3.0'
|
35
|
+
gem.add_development_dependency 'prometheus-client', '~> 0.7.1'
|
36
36
|
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# Copyright 2017 Google Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module Monitoring
|
16
|
+
# Base class for the monitoring registry.
|
17
|
+
class BaseMonitoringRegistry
|
18
|
+
def counter(_name, _desc)
|
19
|
+
_undefined
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Prometheus implementation of the monitoring registry, that uses the default
|
24
|
+
# registry in the official Prometheus client library.
|
25
|
+
class PrometheusMonitoringRegistry < BaseMonitoringRegistry
|
26
|
+
def self.name
|
27
|
+
'prometheus'
|
28
|
+
end
|
29
|
+
|
30
|
+
def initialize
|
31
|
+
require 'prometheus/client'
|
32
|
+
@registry = Prometheus::Client.registry
|
33
|
+
end
|
34
|
+
|
35
|
+
# Exception-driven behavior to avoid synchronization errors.
|
36
|
+
def counter(name, desc)
|
37
|
+
return @registry.counter(name, desc)
|
38
|
+
rescue Prometheus::Client::Registry::AlreadyRegisteredError
|
39
|
+
return @registry.get(name)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Factory that is used to create a monitoring registry based on
|
44
|
+
# the monitoring solution name.
|
45
|
+
class MonitoringRegistryFactory
|
46
|
+
@known_registry_types = {
|
47
|
+
PrometheusMonitoringRegistry.name =>
|
48
|
+
PrometheusMonitoringRegistry
|
49
|
+
}
|
50
|
+
|
51
|
+
def self.create(name)
|
52
|
+
(@known_registry_types[name] || BaseMonitoringRegistry).new
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -11,11 +11,9 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
-
require 'excon'
|
15
14
|
require 'grpc'
|
16
15
|
require 'json'
|
17
16
|
require 'open-uri'
|
18
|
-
require 'rubygems'
|
19
17
|
require 'socket'
|
20
18
|
require 'time'
|
21
19
|
require 'yaml'
|
@@ -26,6 +24,8 @@ require 'google/logging/v2/logging_services_pb'
|
|
26
24
|
require 'google/logging/v2/log_entry_pb'
|
27
25
|
require 'googleauth'
|
28
26
|
|
27
|
+
require_relative 'monitoring'
|
28
|
+
|
29
29
|
module Google
|
30
30
|
module Protobuf
|
31
31
|
# Alias the has_key? method to have the same interface as a regular map.
|
@@ -38,10 +38,8 @@ end
|
|
38
38
|
module Fluent
|
39
39
|
# fluentd output plugin for the Stackdriver Logging API
|
40
40
|
class GoogleCloudOutput < BufferedOutput
|
41
|
-
# Constants.
|
41
|
+
# Constants for service names and resource types.
|
42
42
|
module Constants
|
43
|
-
# Service names and resource types.
|
44
|
-
|
45
43
|
APPENGINE_CONSTANTS = {
|
46
44
|
service: 'appengine.googleapis.com',
|
47
45
|
resource_type: 'gae_app'
|
@@ -58,10 +56,6 @@ module Fluent
|
|
58
56
|
service: 'container.googleapis.com',
|
59
57
|
resource_type: 'container'
|
60
58
|
}
|
61
|
-
DOCKER_CONSTANTS = {
|
62
|
-
service: 'dockercontainer.googleapis.com',
|
63
|
-
resource_type: 'docker_container'
|
64
|
-
}
|
65
59
|
DATAFLOW_CONSTANTS = {
|
66
60
|
service: 'dataflow.googleapis.com',
|
67
61
|
resource_type: 'dataflow_step'
|
@@ -78,20 +72,6 @@ module Fluent
|
|
78
72
|
service: 'ml.googleapis.com',
|
79
73
|
resource_type: 'ml_job'
|
80
74
|
}
|
81
|
-
|
82
|
-
# Metadata Agent support.
|
83
|
-
|
84
|
-
# Use empty string as request path when locally-unique key of monitored
|
85
|
-
# resource can be implicitly inferred by Metadata Agent.
|
86
|
-
IMPLICIT_MONITORED_RESOURCE_UNIQUE_KEY = ''
|
87
|
-
|
88
|
-
# The label name of locally unique id in the json payload. When a record
|
89
|
-
# has this field in the payload, we will use the value to retrieve
|
90
|
-
# monitored resource from Stackdriver Metadata agent.
|
91
|
-
LOCALLY_UNIQUE_ID_LABEL_NAME = 'logging.googleapis.com/locally_unique_id'
|
92
|
-
|
93
|
-
# Docker container support.
|
94
|
-
DEFAULT_DOCKER_API_SOCKET_PATH = '/var/run/docker.sock'
|
95
75
|
end
|
96
76
|
|
97
77
|
include self::Constants
|
@@ -99,7 +79,7 @@ module Fluent
|
|
99
79
|
Fluent::Plugin.register_output('google_cloud', self)
|
100
80
|
|
101
81
|
PLUGIN_NAME = 'Fluentd Google Cloud Logging plugin'
|
102
|
-
PLUGIN_VERSION = '0.6.
|
82
|
+
PLUGIN_VERSION = '0.6.3'
|
103
83
|
|
104
84
|
# Name of the the Google cloud logging write scope.
|
105
85
|
LOGGING_SCOPE = 'https://www.googleapis.com/auth/logging.write'
|
@@ -146,7 +126,7 @@ module Fluent
|
|
146
126
|
config_param :require_valid_tags, :bool, :default => false
|
147
127
|
|
148
128
|
# The regular expression to use on Kubernetes logs to extract some basic
|
149
|
-
# information about the log source. The
|
129
|
+
# information about the log source. The regex must contain capture groups
|
150
130
|
# for pod_name, namespace_name, and container_name.
|
151
131
|
config_param :kubernetes_tag_regexp, :string, :default =>
|
152
132
|
'\.(?<pod_name>[^_]+)_(?<namespace_name>[^_]+)_(?<container_name>.+)$'
|
@@ -208,17 +188,18 @@ module Fluent
|
|
208
188
|
:default => nil,
|
209
189
|
:secret => true
|
210
190
|
|
211
|
-
# Whether to
|
212
|
-
|
213
|
-
|
214
|
-
|
191
|
+
# Whether to collect metrics about the plugin usage. The mechanism for
|
192
|
+
# collecting and exposing metrics is controlled by the monitoring_type
|
193
|
+
# parameter.
|
194
|
+
config_param :enable_monitoring, :bool, :default => false
|
215
195
|
|
216
|
-
#
|
217
|
-
#
|
218
|
-
|
219
|
-
#
|
220
|
-
|
221
|
-
|
196
|
+
# What system to use when collecting metrics. Possible values are:
|
197
|
+
# - 'prometheus', in this case default registry in the Prometheus
|
198
|
+
# client library is used, without actually exposing the endpoint
|
199
|
+
# to serve metrics in the Prometheus format.
|
200
|
+
# - any other value will result in the absence of metrics.
|
201
|
+
config_param :monitoring_type, :string,
|
202
|
+
:default => Monitoring::PrometheusMonitoringRegistry.name
|
222
203
|
|
223
204
|
# rubocop:enable Style/HashSyntax
|
224
205
|
|
@@ -229,6 +210,9 @@ module Fluent
|
|
229
210
|
attr_reader :project_id
|
230
211
|
attr_reader :zone
|
231
212
|
attr_reader :vm_id
|
213
|
+
attr_reader :running_on_managed_vm
|
214
|
+
attr_reader :gae_backend_name
|
215
|
+
attr_reader :gae_backend_version
|
232
216
|
attr_reader :resource
|
233
217
|
attr_reader :common_labels
|
234
218
|
|
@@ -238,38 +222,28 @@ module Fluent
|
|
238
222
|
@log = $log # rubocop:disable Style/GlobalVars
|
239
223
|
end
|
240
224
|
|
241
|
-
# Set up regex patterns used to parse tags and logs.
|
242
|
-
def setup_regex_patterns
|
243
|
-
@compiled_kubernetes_tag_regexp = nil
|
244
|
-
if @kubernetes_tag_regexp
|
245
|
-
@compiled_kubernetes_tag_regexp = Regexp.new(@kubernetes_tag_regexp)
|
246
|
-
end
|
247
|
-
|
248
|
-
@cloudfunctions_tag_regexp =
|
249
|
-
/\.(?<encoded_function_name>.+)\.\d+-[^-]+_default_worker$/
|
250
|
-
@cloudfunctions_log_regexp = /^
|
251
|
-
(?:\[(?<severity>.)\])?
|
252
|
-
\[(?<timestamp>.{24})\]
|
253
|
-
(?:\[(?<execution_id>[^\]]+)\])?
|
254
|
-
[ ](?<text>.*)$/x
|
255
|
-
|
256
|
-
# Docker container tag format:
|
257
|
-
# "container.<container_id>.<container_name>".
|
258
|
-
@dockercontainer_tag_regexp =
|
259
|
-
/^container\.(?<container_id>[a-zA-Z0-9]+)\.
|
260
|
-
(?<container_name>[a-zA-Z0-9_.-]+)$/x
|
261
|
-
# Docker container with application tag format:
|
262
|
-
# "application-container.<container_name>.<additional_tag>".
|
263
|
-
@dockercontainer_tag_with_application_regexp =
|
264
|
-
/^application-container\.(?<container_name>[a-zA-Z0-9_.-]+)\.
|
265
|
-
(?<additional_tag>.+)$/x
|
266
|
-
|
267
|
-
@http_latency_regexp = /^\s*(?<seconds>\d+)(?<decimal>\.\d+)?\s*s\s*$/
|
268
|
-
end
|
269
|
-
|
270
225
|
def configure(conf)
|
271
226
|
super
|
272
227
|
|
228
|
+
# If monitoring is enabled, register metrics in the default registry
|
229
|
+
# and store metric objects for future use.
|
230
|
+
if @enable_monitoring
|
231
|
+
registry = Monitoring::MonitoringRegistryFactory.create @monitoring_type
|
232
|
+
@successful_requests_count = registry.counter(
|
233
|
+
:stackdriver_successful_requests_count,
|
234
|
+
'A number of successful requests to the Stackdriver Logging API')
|
235
|
+
@failed_requests_count = registry.counter(
|
236
|
+
:stackdriver_failed_requests_count,
|
237
|
+
'A number of failed requests to the Stackdriver Logging API,'\
|
238
|
+
' broken down by the error code')
|
239
|
+
@ingested_entries_count = registry.counter(
|
240
|
+
:stackdriver_ingested_entries_count,
|
241
|
+
'A number of log entries ingested by Stackdriver Logging')
|
242
|
+
@dropped_entries_count = registry.counter(
|
243
|
+
:stackdriver_dropped_entries_count,
|
244
|
+
'A number of log entries dropped by the Stackdriver output plugin')
|
245
|
+
end
|
246
|
+
|
273
247
|
# Alert on old authentication configuration.
|
274
248
|
unless @auth_method.nil? && @private_key_email.nil? &&
|
275
249
|
@private_key_path.nil? && @private_key_passphrase.nil?
|
@@ -285,57 +259,160 @@ module Fluent
|
|
285
259
|
extra.join(' ')
|
286
260
|
end
|
287
261
|
|
288
|
-
|
262
|
+
# TODO: Send instance tags as labels as well?
|
263
|
+
@common_labels = {}
|
264
|
+
@common_labels.merge!(@labels) if @labels
|
265
|
+
|
266
|
+
# TODO: Construct Google::Api::MonitoredResource when @use_grpc is
|
267
|
+
# true after the protobuf map corruption issue is fixed.
|
268
|
+
@resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
|
269
|
+
labels: {})
|
270
|
+
|
271
|
+
@compiled_kubernetes_tag_regexp = nil
|
272
|
+
if @kubernetes_tag_regexp
|
273
|
+
@compiled_kubernetes_tag_regexp = Regexp.new(@kubernetes_tag_regexp)
|
274
|
+
end
|
289
275
|
|
276
|
+
@cloudfunctions_tag_regexp =
|
277
|
+
/\.(?<encoded_function_name>.+)\.\d+-[^-]+_default_worker$/
|
278
|
+
@cloudfunctions_log_regexp = /^
|
279
|
+
(?:\[(?<severity>.)\])?
|
280
|
+
\[(?<timestamp>.{24})\]
|
281
|
+
(?:\[(?<execution_id>[^\]]+)\])?
|
282
|
+
[ ](?<text>.*)$/x
|
283
|
+
|
284
|
+
@http_latency_regexp = /^\s*(?<seconds>\d+)(?<decimal>\.\d+)?\s*s\s*$/
|
285
|
+
|
286
|
+
# set attributes from metadata (unless overriden by static config)
|
287
|
+
@vm_name = Socket.gethostname if @vm_name.nil?
|
290
288
|
@platform = detect_platform
|
289
|
+
case @platform
|
290
|
+
when Platform::GCE
|
291
|
+
if @project_id.nil?
|
292
|
+
@project_id = fetch_gce_metadata('project/project-id')
|
293
|
+
end
|
294
|
+
if @zone.nil?
|
295
|
+
# this returns "projects/<number>/zones/<zone>"; we only want
|
296
|
+
# the part after the final slash.
|
297
|
+
fully_qualified_zone = fetch_gce_metadata('instance/zone')
|
298
|
+
@zone = fully_qualified_zone.rpartition('/')[2]
|
299
|
+
end
|
300
|
+
@vm_id = fetch_gce_metadata('instance/id') if @vm_id.nil?
|
301
|
+
when Platform::EC2
|
302
|
+
metadata = fetch_ec2_metadata
|
303
|
+
if @zone.nil? && metadata.key?('availabilityZone')
|
304
|
+
@zone = 'aws:' + metadata['availabilityZone']
|
305
|
+
end
|
306
|
+
if @vm_id.nil? && metadata.key?('instanceId')
|
307
|
+
@vm_id = metadata['instanceId']
|
308
|
+
end
|
309
|
+
if metadata.key?('accountId')
|
310
|
+
@resource.labels['aws_account'] = metadata['accountId']
|
311
|
+
end
|
312
|
+
when Platform::OTHER
|
313
|
+
# do nothing
|
314
|
+
else
|
315
|
+
fail Fluent::ConfigError, 'Unknown platform ' + @platform
|
316
|
+
end
|
291
317
|
|
292
|
-
#
|
293
|
-
#
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
# explicitly send the key.
|
299
|
-
@resource = call_metadata_agent_for_monitored_resource(
|
300
|
-
IMPLICIT_MONITORED_RESOURCE_UNIQUE_KEY)
|
318
|
+
# If we still don't have a project ID, try to obtain it from the
|
319
|
+
# credentials.
|
320
|
+
if @project_id.nil?
|
321
|
+
@project_id = CredentialsInfo.project_id
|
322
|
+
@log.info 'Set Project ID from credentials: ', @project_id unless
|
323
|
+
@project_id.nil?
|
301
324
|
end
|
302
325
|
|
303
|
-
#
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
# Fail over to retrieve monitored resource via the legacy path if we fail
|
313
|
-
# to get it from Metadata Agent.
|
314
|
-
@resource ||= determine_agent_level_monitored_resource_via_legacy
|
315
|
-
|
316
|
-
# Set variables specific to CLoud Functions. This has to be called after
|
317
|
-
# we have determined the resource type. The purpose is to avoid repeated
|
318
|
-
# calls to metadata server.
|
319
|
-
@running_cloudfunctions = false
|
320
|
-
# We only support Cloud Functions logs for GKE right now.
|
321
|
-
if @resource.type == CONTAINER_CONSTANTS[:resource_type] &&
|
322
|
-
fetch_gce_metadata('instance/attributes/').split.include?('gcf_region')
|
323
|
-
# We are not setting resource type as Cloud Functions here because
|
324
|
-
# whether a log entry is truly coming from a Cloud Functions function
|
325
|
-
# depends on the log tag. Only when @running_cloudfunctions is true will
|
326
|
-
# we try to match log tags against Cloud Functions tag regexp when
|
327
|
-
# processing log entries.
|
328
|
-
@running_cloudfunctions = true
|
329
|
-
# Fetch this info and store it to avoid recurring metadata server calls.
|
330
|
-
@gcf_region = fetch_gce_metadata('instance/attributes/gcf_region')
|
326
|
+
# all metadata parameters must now be set
|
327
|
+
unless @project_id && @zone && @vm_id
|
328
|
+
missing = []
|
329
|
+
missing << 'project_id' unless @project_id
|
330
|
+
missing << 'zone' unless @zone
|
331
|
+
missing << 'vm_id' unless @vm_id
|
332
|
+
fail Fluent::ConfigError, 'Unable to obtain metadata parameters: ' +
|
333
|
+
missing.join(' ')
|
331
334
|
end
|
332
335
|
|
333
|
-
#
|
334
|
-
|
335
|
-
@common_labels = determine_agent_level_common_labels
|
336
|
+
# Default this to false; it is only overwritten if we detect Managed VM.
|
337
|
+
@running_on_managed_vm = false
|
336
338
|
|
337
|
-
#
|
338
|
-
#
|
339
|
+
# Default this to false; it is only overwritten if we detect Cloud
|
340
|
+
# Functions.
|
341
|
+
@running_cloudfunctions = false
|
342
|
+
|
343
|
+
# Set up the MonitoredResource, labels, etc. based on the config.
|
344
|
+
case @platform
|
345
|
+
when Platform::GCE
|
346
|
+
@resource.type = COMPUTE_CONSTANTS[:resource_type]
|
347
|
+
# TODO: introduce a new MonitoredResource-centric configuration and
|
348
|
+
# deprecate subservice-name; for now, translate known uses.
|
349
|
+
if @subservice_name
|
350
|
+
# TODO: what should we do if we encounter an unknown value?
|
351
|
+
if @subservice_name == DATAFLOW_CONSTANTS[:service]
|
352
|
+
@resource.type = DATAFLOW_CONSTANTS[:resource_type]
|
353
|
+
elsif @subservice_name == ML_CONSTANTS[:service]
|
354
|
+
@resource.type = ML_CONSTANTS[:resource_type]
|
355
|
+
end
|
356
|
+
elsif @detect_subservice
|
357
|
+
# Check for specialized GCE environments.
|
358
|
+
# TODO: Add config options for these to allow for running outside GCE?
|
359
|
+
attributes = fetch_gce_metadata('instance/attributes/').split
|
360
|
+
# Do nothing, just don't populate other service's labels.
|
361
|
+
if attributes.include?('gae_backend_name') &&
|
362
|
+
attributes.include?('gae_backend_version')
|
363
|
+
# Managed VM
|
364
|
+
@running_on_managed_vm = true
|
365
|
+
@gae_backend_name =
|
366
|
+
fetch_gce_metadata('instance/attributes/gae_backend_name')
|
367
|
+
@gae_backend_version =
|
368
|
+
fetch_gce_metadata('instance/attributes/gae_backend_version')
|
369
|
+
@resource.type = APPENGINE_CONSTANTS[:resource_type]
|
370
|
+
@resource.labels['module_id'] = @gae_backend_name
|
371
|
+
@resource.labels['version_id'] = @gae_backend_version
|
372
|
+
elsif attributes.include?('kube-env')
|
373
|
+
# Kubernetes/Container Engine
|
374
|
+
@resource.type = CONTAINER_CONSTANTS[:resource_type]
|
375
|
+
@raw_kube_env = fetch_gce_metadata('instance/attributes/kube-env')
|
376
|
+
@kube_env = YAML.load(@raw_kube_env)
|
377
|
+
@resource.labels['cluster_name'] =
|
378
|
+
cluster_name_from_kube_env(@kube_env)
|
379
|
+
detect_cloudfunctions(attributes)
|
380
|
+
elsif attributes.include?('dataproc-cluster-uuid') &&
|
381
|
+
attributes.include?('dataproc-cluster-name')
|
382
|
+
# Dataproc
|
383
|
+
@resource.type = DATAPROC_CONSTANTS[:resource_type]
|
384
|
+
@resource.labels['cluster_uuid'] =
|
385
|
+
fetch_gce_metadata('instance/attributes/dataproc-cluster-uuid')
|
386
|
+
@resource.labels['cluster_name'] =
|
387
|
+
fetch_gce_metadata('instance/attributes/dataproc-cluster-name')
|
388
|
+
@resource.labels['region'] =
|
389
|
+
fetch_gce_metadata('instance/attributes/dataproc-region')
|
390
|
+
end
|
391
|
+
end
|
392
|
+
# Some services have the GCE instance_id and zone as MonitoredResource
|
393
|
+
# labels; for other services we send them as entry labels.
|
394
|
+
if @resource.type == COMPUTE_CONSTANTS[:resource_type] ||
|
395
|
+
@resource.type == CONTAINER_CONSTANTS[:resource_type]
|
396
|
+
@resource.labels['instance_id'] = @vm_id
|
397
|
+
@resource.labels['zone'] = @zone
|
398
|
+
else
|
399
|
+
common_labels["#{COMPUTE_CONSTANTS[:service]}/resource_id"] = @vm_id
|
400
|
+
common_labels["#{COMPUTE_CONSTANTS[:service]}/zone"] = @zone
|
401
|
+
end
|
402
|
+
common_labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
|
403
|
+
when Platform::EC2
|
404
|
+
@resource.type = EC2_CONSTANTS[:resource_type]
|
405
|
+
@resource.labels['instance_id'] = @vm_id
|
406
|
+
@resource.labels['region'] = @zone
|
407
|
+
# the aws_account label is populated above.
|
408
|
+
common_labels["#{EC2_CONSTANTS[:service]}/resource_name"] = @vm_name
|
409
|
+
when Platform::OTHER
|
410
|
+
# Use GCE as the default environment.
|
411
|
+
@resource.type = COMPUTE_CONSTANTS[:resource_type]
|
412
|
+
@resource.labels['instance_id'] = @vm_id
|
413
|
+
@resource.labels['zone'] = @zone
|
414
|
+
common_labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
|
415
|
+
end
|
339
416
|
@resource.labels.merge!(
|
340
417
|
extract_resource_labels(@resource.type, common_labels))
|
341
418
|
|
@@ -347,7 +424,7 @@ module Fluent
|
|
347
424
|
|
348
425
|
# Log an informational message containing the Logs viewer URL
|
349
426
|
@log.info 'Logs viewer address: https://console.cloud.google.com/logs/',
|
350
|
-
"viewer?project=#{@project_id}&resource=#{@
|
427
|
+
"viewer?project=#{@project_id}&resource=#{@resource_type}/",
|
351
428
|
"instance_id/#{@vm_id}"
|
352
429
|
end
|
353
430
|
|
@@ -362,15 +439,135 @@ module Fluent
|
|
362
439
|
super
|
363
440
|
end
|
364
441
|
|
442
|
+
def format(tag, time, record)
|
443
|
+
[tag, time, record].to_msgpack
|
444
|
+
end
|
445
|
+
|
446
|
+
# Given a tag, returns the corresponding valid tag if possible, or nil if
|
447
|
+
# the tag should be rejected. If 'require_valid_tags' is false, non-string
|
448
|
+
# tags are converted to strings, and invalid characters are sanitized;
|
449
|
+
# otherwise such tags are rejected.
|
450
|
+
def sanitize_tag(tag)
|
451
|
+
if @require_valid_tags &&
|
452
|
+
(!tag.is_a?(String) || tag == '' || convert_to_utf8(tag) != tag)
|
453
|
+
return nil
|
454
|
+
end
|
455
|
+
tag = convert_to_utf8(tag.to_s)
|
456
|
+
tag = '_' if tag == ''
|
457
|
+
tag
|
458
|
+
end
|
459
|
+
|
460
|
+
# Compute the monitored resource and common labels shared by a collection of
|
461
|
+
# entries.
|
462
|
+
def compute_group_resource_and_labels(tag)
|
463
|
+
# Note that we assume that labels added to group_common_labels below are
|
464
|
+
# not 'service' labels (i.e. we do not call extract_resource_labels
|
465
|
+
# again).
|
466
|
+
group_resource = @resource.dup
|
467
|
+
group_common_labels = @common_labels.dup
|
468
|
+
|
469
|
+
if @running_cloudfunctions
|
470
|
+
# If the current group of entries is coming from a Cloud Functions
|
471
|
+
# function, the function name can be extracted from the tag.
|
472
|
+
match_data = @cloudfunctions_tag_regexp.match(tag)
|
473
|
+
if match_data
|
474
|
+
# Resource type is set to Cloud Functions only for logs actually
|
475
|
+
# coming from a function, otherwise we leave it as Container.
|
476
|
+
group_resource.type = CLOUDFUNCTIONS_CONSTANTS[:resource_type]
|
477
|
+
group_resource.labels['region'] = @gcf_region
|
478
|
+
group_resource.labels['function_name'] =
|
479
|
+
decode_cloudfunctions_function_name(
|
480
|
+
match_data['encoded_function_name'])
|
481
|
+
# Move GKE container labels from the MonitoredResource to the
|
482
|
+
# LogEntry.
|
483
|
+
instance_id = group_resource.labels.delete('instance_id')
|
484
|
+
group_common_labels["#{CONTAINER_CONSTANTS[:service]}/cluster_name"] =
|
485
|
+
group_resource.labels.delete('cluster_name')
|
486
|
+
group_common_labels["#{CONTAINER_CONSTANTS[:service]}/instance_id"] =
|
487
|
+
instance_id
|
488
|
+
group_common_labels["#{COMPUTE_CONSTANTS[:service]}/resource_id"] =
|
489
|
+
instance_id
|
490
|
+
group_common_labels["#{COMPUTE_CONSTANTS[:service]}/zone"] =
|
491
|
+
group_resource.labels.delete('zone')
|
492
|
+
end
|
493
|
+
end
|
494
|
+
if group_resource.type == CONTAINER_CONSTANTS[:resource_type] &&
|
495
|
+
@compiled_kubernetes_tag_regexp
|
496
|
+
# Container logs in Kubernetes are tagged based on where they came
|
497
|
+
# from, so we can extract useful metadata from the tag.
|
498
|
+
# Do this here to avoid having to repeat it for each record.
|
499
|
+
match_data = @compiled_kubernetes_tag_regexp.match(tag)
|
500
|
+
if match_data
|
501
|
+
group_resource.labels['container_name'] = match_data['container_name']
|
502
|
+
group_resource.labels['namespace_id'] = match_data['namespace_name']
|
503
|
+
group_resource.labels['pod_id'] = match_data['pod_name']
|
504
|
+
%w(namespace_name pod_name).each do |field|
|
505
|
+
group_common_labels["#{CONTAINER_CONSTANTS[:service]}/#{field}"] =
|
506
|
+
match_data[field]
|
507
|
+
end
|
508
|
+
end
|
509
|
+
end
|
510
|
+
|
511
|
+
# Freeze the per-request state. Any further changes must be made on a
|
512
|
+
# per-entry basis.
|
513
|
+
group_resource.freeze
|
514
|
+
group_resource.labels.freeze
|
515
|
+
group_common_labels.freeze
|
516
|
+
|
517
|
+
[group_resource, group_common_labels]
|
518
|
+
end
|
519
|
+
|
520
|
+
# Extract entry resource and common labels that should be applied to
|
521
|
+
# individual entries from the group resource.
|
522
|
+
def extract_entry_labels(group_resource, record)
|
523
|
+
resource_labels = {}
|
524
|
+
common_labels = {}
|
525
|
+
|
526
|
+
if group_resource.type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
|
527
|
+
record.key?('log')
|
528
|
+
@cloudfunctions_log_match =
|
529
|
+
@cloudfunctions_log_regexp.match(record['log'])
|
530
|
+
end
|
531
|
+
|
532
|
+
if group_resource.type == CONTAINER_CONSTANTS[:resource_type]
|
533
|
+
# Move the stdout/stderr annotation from the record into a label
|
534
|
+
common_labels.merge!(
|
535
|
+
fields_to_labels(
|
536
|
+
record, 'stream' => "#{CONTAINER_CONSTANTS[:service]}/stream"))
|
537
|
+
|
538
|
+
# If the record has been annotated by the kubernetes_metadata_filter
|
539
|
+
# plugin, then use that metadata. Otherwise, rely on commonLabels
|
540
|
+
# populated at the grouped_entries level from the group's tag.
|
541
|
+
if record.key?('kubernetes')
|
542
|
+
extracted_resource_labels, extracted_common_labels = \
|
543
|
+
extract_container_metadata(record)
|
544
|
+
resource_labels.merge!(extracted_resource_labels)
|
545
|
+
common_labels.merge!(extracted_common_labels)
|
546
|
+
end
|
547
|
+
end
|
548
|
+
|
549
|
+
# If a field is present in the label_map, send its value as a label
|
550
|
+
# (mapping the field name to label name as specified in the config)
|
551
|
+
# and do not send that field as part of the payload.
|
552
|
+
common_labels.merge!(fields_to_labels(record, @label_map))
|
553
|
+
|
554
|
+
if group_resource.type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
|
555
|
+
@cloudfunctions_log_match &&
|
556
|
+
@cloudfunctions_log_match['execution_id']
|
557
|
+
common_labels['execution_id'] =
|
558
|
+
@cloudfunctions_log_match['execution_id']
|
559
|
+
end
|
560
|
+
resource_labels.merge!(
|
561
|
+
extract_resource_labels(group_resource.type, common_labels))
|
562
|
+
|
563
|
+
[resource_labels, common_labels]
|
564
|
+
end
|
565
|
+
|
365
566
|
def write(chunk)
|
366
567
|
# Group the entries since we have to make one call per tag.
|
367
568
|
grouped_entries = {}
|
368
569
|
chunk.msgpack_each do |tag, *arr|
|
369
570
|
sanitized_tag = sanitize_tag(tag)
|
370
|
-
@log.error("tag is: ")
|
371
|
-
@log.error(tag.inspect)
|
372
|
-
@log.error("arr is: ")
|
373
|
-
@log.error(arr.inspect)
|
374
571
|
if sanitized_tag.nil?
|
375
572
|
@log.warn "Dropping log entries with invalid tag: '#{tag}'. " \
|
376
573
|
'A tag should be a string with utf8 characters.'
|
@@ -382,22 +579,20 @@ module Fluent
|
|
382
579
|
|
383
580
|
grouped_entries.each do |tag, arr|
|
384
581
|
entries = []
|
385
|
-
group_resource, group_common_labels =
|
386
|
-
|
582
|
+
group_resource, group_common_labels = compute_group_resource_and_labels(
|
583
|
+
tag)
|
387
584
|
|
388
585
|
arr.each do |time, record|
|
389
586
|
next unless record.is_a?(Hash)
|
390
587
|
|
391
|
-
|
392
|
-
|
588
|
+
extracted_resource_labels, extracted_common_labels = \
|
589
|
+
extract_entry_labels(group_resource, record)
|
393
590
|
entry_resource = group_resource.dup
|
394
|
-
entry_resource.type = resource_type
|
395
591
|
entry_resource.labels.merge!(extracted_resource_labels)
|
396
592
|
entry_common_labels = \
|
397
593
|
group_common_labels.merge(extracted_common_labels)
|
398
594
|
|
399
|
-
if
|
400
|
-
DOCKER_CONSTANTS[:resource_type]].include?(entry_resource.type)
|
595
|
+
if entry_resource.type == CONTAINER_CONSTANTS[:resource_type]
|
401
596
|
# Save the timestamp if available, then clear it out to allow for
|
402
597
|
# determining whether we should parse the log or message field.
|
403
598
|
timestamp = record.key?('time') ? record['time'] : nil
|
@@ -491,6 +686,8 @@ module Fluent
|
|
491
686
|
)
|
492
687
|
|
493
688
|
client.write_log_entries(write_request)
|
689
|
+
increment_successful_requests_count
|
690
|
+
increment_ingested_entries_count(entries.length)
|
494
691
|
|
495
692
|
# Let the user explicitly know when the first call succeeded,
|
496
693
|
# to aid with verification and troubleshooting.
|
@@ -500,10 +697,12 @@ module Fluent
|
|
500
697
|
end
|
501
698
|
|
502
699
|
rescue GRPC::Cancelled => error
|
700
|
+
increment_failed_requests_count(GRPC::Core::StatusCodes::CANCELLED)
|
503
701
|
# RPC cancelled, so retry via re-raising the error.
|
504
702
|
raise error
|
505
703
|
|
506
704
|
rescue GRPC::BadStatus => error
|
705
|
+
increment_failed_requests_count(error.code)
|
507
706
|
case error.code
|
508
707
|
when GRPC::Core::StatusCodes::CANCELLED,
|
509
708
|
GRPC::Core::StatusCodes::UNAVAILABLE,
|
@@ -518,6 +717,7 @@ module Fluent
|
|
518
717
|
# Most client errors indicate a problem with the request itself
|
519
718
|
# and should not be retried.
|
520
719
|
dropped = entries.length
|
720
|
+
increment_dropped_entries_count(dropped)
|
521
721
|
@log.warn "Dropping #{dropped} log message(s)",
|
522
722
|
error: error.to_s, error_code: error.code.to_s
|
523
723
|
when GRPC::Core::StatusCodes::UNAUTHENTICATED
|
@@ -525,12 +725,14 @@ module Fluent
|
|
525
725
|
# These are usually solved via a `gcloud auth` call, or by
|
526
726
|
# modifying the permissions on the Google Cloud project.
|
527
727
|
dropped = entries.length
|
728
|
+
increment_dropped_entries_count(dropped)
|
528
729
|
@log.warn "Dropping #{dropped} log message(s)",
|
529
730
|
error: error.to_s, error_code: error.code.to_s
|
530
731
|
else
|
531
732
|
# Assume this is a problem with the request itself
|
532
733
|
# and don't retry.
|
533
734
|
dropped = entries.length
|
735
|
+
increment_dropped_entries_count(dropped)
|
534
736
|
@log.error "Unknown response code #{error.code} from the "\
|
535
737
|
"server, dropping #{dropped} log message(s)",
|
536
738
|
error: error.to_s, error_code: error.code.to_s
|
@@ -546,7 +748,14 @@ module Fluent
|
|
546
748
|
entries: entries)
|
547
749
|
|
548
750
|
# TODO: RequestOptions
|
549
|
-
|
751
|
+
begin
|
752
|
+
client.write_entry_log_entries(write_request)
|
753
|
+
rescue Google::Apis::Error => error
|
754
|
+
increment_failed_requests_count(error.status_code)
|
755
|
+
raise error
|
756
|
+
end
|
757
|
+
increment_successful_requests_count
|
758
|
+
increment_ingested_entries_count(entries.length)
|
550
759
|
|
551
760
|
# Let the user explicitly know when the first call succeeded,
|
552
761
|
# to aid with verification and troubleshooting.
|
@@ -564,6 +773,7 @@ module Fluent
|
|
564
773
|
# These are usually solved via a `gcloud auth` call, or by modifying
|
565
774
|
# the permissions on the Google Cloud project.
|
566
775
|
dropped = entries.length
|
776
|
+
increment_dropped_entries_count(dropped)
|
567
777
|
@log.warn "Dropping #{dropped} log message(s)",
|
568
778
|
error_class: error.class.to_s, error: error.to_s
|
569
779
|
|
@@ -571,6 +781,7 @@ module Fluent
|
|
571
781
|
# Most ClientErrors indicate a problem with the request itself and
|
572
782
|
# should not be retried.
|
573
783
|
dropped = entries.length
|
784
|
+
increment_dropped_entries_count(dropped)
|
574
785
|
@log.warn "Dropping #{dropped} log message(s)",
|
575
786
|
error_class: error.class.to_s, error: error.to_s
|
576
787
|
end
|
@@ -630,7 +841,7 @@ module Fluent
|
|
630
841
|
end
|
631
842
|
end
|
632
843
|
rescue StandardError => e
|
633
|
-
@log.
|
844
|
+
@log.debug 'Failed to access metadata service: ', error: e
|
634
845
|
end
|
635
846
|
|
636
847
|
@log.info 'Unable to determine platform'
|
@@ -645,600 +856,15 @@ module Fluent
|
|
645
856
|
metadata_path, 'Metadata-Flavor' => 'Google', &:read)
|
646
857
|
end
|
647
858
|
|
648
|
-
|
649
|
-
|
650
|
-
def ec2_metadata
|
651
|
-
fail "Called ec2_metadata with platform=#{@platform}" unless
|
859
|
+
def fetch_ec2_metadata
|
860
|
+
fail "Called fetch_ec2_metadata with platform=#{@platform}" unless
|
652
861
|
@platform == Platform::EC2
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
@ec2_metadata = JSON.parse(contents)
|
659
|
-
end
|
660
|
-
end
|
661
|
-
|
662
|
-
@ec2_metadata
|
663
|
-
end
|
664
|
-
|
665
|
-
# Set regexp patterns to parse tags and logs.
|
666
|
-
def set_regexp_patterns
|
667
|
-
@compiled_kubernetes_tag_regexp = nil
|
668
|
-
if @kubernetes_tag_regexp
|
669
|
-
@compiled_kubernetes_tag_regexp = Regexp.new(@kubernetes_tag_regexp)
|
862
|
+
# See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html
|
863
|
+
open('http://' + METADATA_SERVICE_ADDR +
|
864
|
+
'/latest/dynamic/instance-identity/document') do |f|
|
865
|
+
contents = f.read
|
866
|
+
return JSON.parse(contents)
|
670
867
|
end
|
671
|
-
|
672
|
-
@cloudfunctions_tag_regexp =
|
673
|
-
/\.(?<encoded_function_name>.+)\.\d+-[^-]+_default_worker$/
|
674
|
-
@cloudfunctions_log_regexp = /^
|
675
|
-
(?:\[(?<severity>.)\])?
|
676
|
-
\[(?<timestamp>.{24})\]
|
677
|
-
(?:\[(?<execution_id>[^\]]+)\])?
|
678
|
-
[ ](?<text>.*)$/x
|
679
|
-
|
680
|
-
@http_latency_regexp = /^\s*(?<seconds>\d+)(?<decimal>\.\d+)?\s*s\s*$/
|
681
|
-
end
|
682
|
-
|
683
|
-
# Set required variables like @project_id, @vm_id, @vm_name and @zone.
|
684
|
-
def set_required_metadata_variables
|
685
|
-
set_project_id
|
686
|
-
set_vm_id
|
687
|
-
set_vm_name
|
688
|
-
set_location
|
689
|
-
|
690
|
-
# All metadata parameters must now be set.
|
691
|
-
return if @project_id && @zone && @vm_id
|
692
|
-
missing = []
|
693
|
-
missing << 'project_id' unless @project_id
|
694
|
-
missing << 'zone' unless @zone
|
695
|
-
missing << 'vm_id' unless @vm_id
|
696
|
-
fail Fluent::ConfigError, 'Unable to obtain metadata parameters: ' +
|
697
|
-
missing.join(' ')
|
698
|
-
end
|
699
|
-
|
700
|
-
# 1. Return the value if it is explicitly set in the config already.
|
701
|
-
# 2. If not, try to retrieve it by calling metadata server directly.
|
702
|
-
# 3. If still not set, try to obtain it from the credentials.
|
703
|
-
def set_project_id
|
704
|
-
@project_id ||= fetch_gce_metadata('project/project-id') if
|
705
|
-
@platform == Platform::GCE
|
706
|
-
@project_id ||= CredentialsInfo.project_id
|
707
|
-
rescue StandardError => e
|
708
|
-
@log.error 'Failed to obtain project id: ', error: e
|
709
|
-
end
|
710
|
-
|
711
|
-
# 1. Return the value if it is explicitly set in the config already.
|
712
|
-
# 2. If not, check if the response from Metadata Agent includes this info.
|
713
|
-
# 3. If not, try to retrieve it by calling metadata servers directly.
|
714
|
-
def set_vm_id
|
715
|
-
@vm_id ||= @resource.labels['instance_id'] if
|
716
|
-
!@resource.nil? && @resource.labels.key?('instance_id')
|
717
|
-
@vm_id ||= fetch_gce_metadata('instance/id') if @platform == Platform::GCE
|
718
|
-
@vm_id ||= ec2_metadata['instanceId'] if @platform == Platform::EC2
|
719
|
-
rescue StandardError => e
|
720
|
-
@log.error 'Failed to obtain vm_id: ', error: e
|
721
|
-
end
|
722
|
-
|
723
|
-
# 1. Return the value if it is explicitly set in the config already.
|
724
|
-
# 2. If not, check if the response from Metadata Agent includes this info.
|
725
|
-
# 3. If not, try to retrieve it locally.
|
726
|
-
def set_vm_name
|
727
|
-
@vm_name ||= @resource.labels['instance_name'] if
|
728
|
-
!@resource.nil? && @resource.labels.key?('instance_name')
|
729
|
-
@vm_name ||= Socket.gethostname
|
730
|
-
rescue StandardError => e
|
731
|
-
@log.error 'Failed to obtain vm name: ', error: e
|
732
|
-
end
|
733
|
-
|
734
|
-
# 1. Return the value if it is explicitly set in the config already.
|
735
|
-
# 2. If not, check if the response from Metadata Agent includes this info.
|
736
|
-
# 3. If not, try to retrieve it locally.
|
737
|
-
def set_location
|
738
|
-
unless @resource.nil?
|
739
|
-
@zone ||= @resource.labels['location'] if
|
740
|
-
@resource.type == DOCKER_CONSTANTS[:resource_type] &&
|
741
|
-
@resource.labels.key?('location')
|
742
|
-
@zone ||= @resource.labels['zone'] if
|
743
|
-
@platform == Platform::GCE && @resource.labels.key?('zone')
|
744
|
-
@zone ||= @resource.labels['region'] if
|
745
|
-
@platform == Platform::EC2 && @resource.labels.key?('region')
|
746
|
-
end
|
747
|
-
# Response format: "projects/<number>/zones/<zone>"
|
748
|
-
@zone ||= fetch_gce_metadata('instance/zone').rpartition('/')[2] if
|
749
|
-
@platform == Platform::GCE
|
750
|
-
@zone ||= 'aws:' + ec2_metadata['availabilityZone'] if
|
751
|
-
@platform == Platform::EC2 && ec2_metadata.key?('availabilityZone')
|
752
|
-
rescue StandardError => e
|
753
|
-
@log.error 'Failed to obtain location: ', error: e
|
754
|
-
end
|
755
|
-
|
756
|
-
# Retrieve monitored resource via the legacy way.
|
757
|
-
#
|
758
|
-
# Note: This is just a failover plan if we fail to get metadata from
|
759
|
-
# Metadata Agent. Thus it should be equivalent to what Metadata Agent
|
760
|
-
# returns.
|
761
|
-
def determine_agent_level_monitored_resource_via_legacy
|
762
|
-
resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
|
763
|
-
labels: {})
|
764
|
-
resource.type = determine_agent_level_monitored_resource_type
|
765
|
-
resource.labels = determine_agent_level_monitored_resource_labels(
|
766
|
-
resource.type)
|
767
|
-
resource
|
768
|
-
end
|
769
|
-
|
770
|
-
# Determine agent level monitored resource type.
|
771
|
-
def determine_agent_level_monitored_resource_type
|
772
|
-
# EC2 instance.
|
773
|
-
return EC2_CONSTANTS[:resource_type] if
|
774
|
-
@platform == Platform::EC2
|
775
|
-
|
776
|
-
# Unknown platform will be defaulted to GCE instance..
|
777
|
-
return COMPUTE_CONSTANTS[:resource_type] if
|
778
|
-
@platform == Platform::OTHER
|
779
|
-
|
780
|
-
# Resource types determined by @subservice_name config.
|
781
|
-
# Cloud Dataflow.
|
782
|
-
return DATAFLOW_CONSTANTS[:resource_type] if
|
783
|
-
@subservice_name == DATAFLOW_CONSTANTS[:service]
|
784
|
-
# Cloud ML.
|
785
|
-
return ML_CONSTANTS[:resource_type] if
|
786
|
-
@subservice_name == ML_CONSTANTS[:service]
|
787
|
-
# Default back to GCE if invalid value is detected.
|
788
|
-
return COMPUTE_CONSTANTS[:resource_type] if
|
789
|
-
@subservice_name
|
790
|
-
|
791
|
-
# Resource types determined by @detect_subservice config.
|
792
|
-
if @detect_subservice
|
793
|
-
begin
|
794
|
-
attributes = fetch_gce_metadata('instance/attributes/').split
|
795
|
-
rescue StandardError => e
|
796
|
-
@log.error 'Failed to detect subservice: ', error: e
|
797
|
-
end
|
798
|
-
# GAE app.
|
799
|
-
return APPENGINE_CONSTANTS[:resource_type] if
|
800
|
-
attributes.include?('gae_backend_name') &&
|
801
|
-
attributes.include?('gae_backend_version')
|
802
|
-
# GKE container.
|
803
|
-
return CONTAINER_CONSTANTS[:resource_type] if
|
804
|
-
attributes.include?('kube-env')
|
805
|
-
# Cloud Dataproc.
|
806
|
-
return DATAPROC_CONSTANTS[:resource_type] if
|
807
|
-
attributes.include?('dataproc-cluster-uuid') &&
|
808
|
-
attributes.include?('dataproc-cluster-name')
|
809
|
-
end
|
810
|
-
# GCE instance.
|
811
|
-
COMPUTE_CONSTANTS[:resource_type]
|
812
|
-
end
|
813
|
-
|
814
|
-
# Determine agent level monitored resource labels based on the resource
|
815
|
-
# type. Each resource type has its own labels that need to be filled in.
|
816
|
-
def determine_agent_level_monitored_resource_labels(type)
|
817
|
-
labels = {}
|
818
|
-
|
819
|
-
case type
|
820
|
-
|
821
|
-
# GAE app.
|
822
|
-
when APPENGINE_CONSTANTS[:resource_type]
|
823
|
-
begin
|
824
|
-
labels['module_id'] = fetch_gce_metadata(
|
825
|
-
'instance/attributes/gae_backend_name')
|
826
|
-
labels['version_id'] = fetch_gce_metadata(
|
827
|
-
'instance/attributes/gae_backend_version')
|
828
|
-
rescue StandardError => e
|
829
|
-
@log.error 'Failed to set monitored resource labels for GAE: ',
|
830
|
-
error: e
|
831
|
-
end
|
832
|
-
|
833
|
-
# GCE.
|
834
|
-
when COMPUTE_CONSTANTS[:resource_type]
|
835
|
-
labels['instance_id'] = @vm_id
|
836
|
-
labels['zone'] = @zone
|
837
|
-
|
838
|
-
# GKE container.
|
839
|
-
when CONTAINER_CONSTANTS[:resource_type]
|
840
|
-
labels['instance_id'] = @vm_id
|
841
|
-
labels['zone'] = @zone
|
842
|
-
begin
|
843
|
-
raw_kube_env = fetch_gce_metadata('instance/attributes/kube-env')
|
844
|
-
kube_env = YAML.load(raw_kube_env)
|
845
|
-
labels['cluster_name'] =
|
846
|
-
cluster_name_from_kube_env(kube_env)
|
847
|
-
rescue StandardError => e
|
848
|
-
@log.error 'Failed to set monitored resource labels for GKE: ',
|
849
|
-
error: e
|
850
|
-
end
|
851
|
-
|
852
|
-
# Cloud Dataproc.
|
853
|
-
when DATAPROC_CONSTANTS[:resource_type]
|
854
|
-
begin
|
855
|
-
labels['cluster_uuid'] =
|
856
|
-
fetch_gce_metadata('instance/attributes/dataproc-cluster-uuid')
|
857
|
-
labels['cluster_name'] =
|
858
|
-
fetch_gce_metadata('instance/attributes/dataproc-cluster-name')
|
859
|
-
labels['region'] =
|
860
|
-
fetch_gce_metadata('instance/attributes/dataproc-region')
|
861
|
-
rescue StandardError => e
|
862
|
-
@log.error 'Failed to set monitored resource labels for Cloud ' \
|
863
|
-
'Dataproc: ', error: e
|
864
|
-
end
|
865
|
-
|
866
|
-
# EC2.
|
867
|
-
when EC2_CONSTANTS[:resource_type]
|
868
|
-
labels['instance_id'] = @vm_id
|
869
|
-
labels['region'] = @zone
|
870
|
-
labels['aws_account'] = ec2_metadata['accountId'] if
|
871
|
-
ec2_metadata.key?('accountId')
|
872
|
-
end
|
873
|
-
labels
|
874
|
-
end
|
875
|
-
|
876
|
-
# Determine the common labels that should be added to all log entries
|
877
|
-
# processed by this logging agent.
|
878
|
-
def determine_agent_level_common_labels
|
879
|
-
labels = {}
|
880
|
-
# User can specify labels via config. We want to capture those as well.
|
881
|
-
# TODO: Send instance tags as labels as well?
|
882
|
-
labels.merge!(@labels) if @labels
|
883
|
-
|
884
|
-
case @resource.type
|
885
|
-
|
886
|
-
# GAE app.
|
887
|
-
when APPENGINE_CONSTANTS[:resource_type]
|
888
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/resource_id"] = @vm_id
|
889
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
|
890
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/zone"] = @zone
|
891
|
-
|
892
|
-
# GCE and GKE container.
|
893
|
-
when COMPUTE_CONSTANTS[:resource_type],
|
894
|
-
CONTAINER_CONSTANTS[:resource_type]
|
895
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
|
896
|
-
|
897
|
-
# Cloud Dataflow and Cloud Dataproc.
|
898
|
-
when DATAFLOW_CONSTANTS[:resource_type],
|
899
|
-
DATAPROC_CONSTANTS[:resource_type]
|
900
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/resource_id"] = @vm_id
|
901
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
|
902
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/zone"] = @zone
|
903
|
-
|
904
|
-
# EC2.
|
905
|
-
when EC2_CONSTANTS[:resource_type]
|
906
|
-
labels["#{EC2_CONSTANTS[:service]}/resource_name"] = @vm_name
|
907
|
-
|
908
|
-
# Cloud ML.
|
909
|
-
when ML_CONSTANTS[:resource_type]
|
910
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/resource_id"] = @vm_id
|
911
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/resource_name"] = @vm_name
|
912
|
-
labels["#{COMPUTE_CONSTANTS[:service]}/zone"] = @zone
|
913
|
-
end
|
914
|
-
labels
|
915
|
-
end
|
916
|
-
|
917
|
-
# Determine the group level monitored resource and common labels shared by a
|
918
|
-
# collection of entries.
|
919
|
-
def determine_group_level_monitored_resource_and_labels(tag)
|
920
|
-
# Determine group level monitored resource type. For certain types,
|
921
|
-
# extract useful info from the tag and store those in
|
922
|
-
# matched_regexp_group.
|
923
|
-
group_resource_type, matched_regexp_group =
|
924
|
-
determine_group_level_monitored_resource_type(tag)
|
925
|
-
|
926
|
-
# Determine group level monitored resource labels and common labels.
|
927
|
-
group_resource_type, group_resource_labels, group_common_labels = \
|
928
|
-
determine_group_level_labels_and_adjust_type(
|
929
|
-
group_resource_type, matched_regexp_group)
|
930
|
-
|
931
|
-
group_resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
|
932
|
-
type: group_resource_type,
|
933
|
-
labels: group_resource_labels.to_h
|
934
|
-
)
|
935
|
-
|
936
|
-
# Freeze the per-request state. Any further changes must be made on a
|
937
|
-
# per-entry basis.
|
938
|
-
group_resource.freeze
|
939
|
-
group_resource.labels.freeze
|
940
|
-
group_common_labels.freeze
|
941
|
-
|
942
|
-
[group_resource, group_common_labels]
|
943
|
-
end
|
944
|
-
|
945
|
-
# Determine group level monitored resource type shared by a collection of
|
946
|
-
# entries.
|
947
|
-
# Returns the resource type and tag regexp matched groups. The matched
|
948
|
-
# groups only apply to some resource types. Return nil if not applicable or
|
949
|
-
# if there is no match.
|
950
|
-
def determine_group_level_monitored_resource_type(tag)
|
951
|
-
# Match tag against Cloud Functions format.
|
952
|
-
if @running_cloudfunctions
|
953
|
-
matched_regexp_group = @cloudfunctions_tag_regexp.match(tag)
|
954
|
-
return [CLOUDFUNCTIONS_CONSTANTS[:resource_type],
|
955
|
-
matched_regexp_group] if matched_regexp_group
|
956
|
-
end
|
957
|
-
|
958
|
-
# Match tag against Docker container stderr / stdout log format and
|
959
|
-
# Docker container application log format.
|
960
|
-
matched_regexp_group =
|
961
|
-
# Format: "container.<container_id>.<container_name>"
|
962
|
-
@dockercontainer_tag_regexp.match(tag) ||
|
963
|
-
# Format: "application-container.<container_name>.<additional_tag>"
|
964
|
-
@dockercontainer_tag_with_application_regexp.match(tag)
|
965
|
-
return [DOCKER_CONSTANTS[:resource_type], matched_regexp_group] if
|
966
|
-
matched_regexp_group
|
967
|
-
|
968
|
-
# Match tag against GKE Container format.
|
969
|
-
if @resource.type == CONTAINER_CONSTANTS[:resource_type] &&
|
970
|
-
@compiled_kubernetes_tag_regexp
|
971
|
-
# Container logs in Kubernetes are tagged based on where they came from,
|
972
|
-
# so we can extract useful metadata from the tag. Do this here to avoid
|
973
|
-
# having to repeat it for each record.
|
974
|
-
matched_regexp_group = @compiled_kubernetes_tag_regexp.match(tag)
|
975
|
-
return [@resource.type, matched_regexp_group] if matched_regexp_group
|
976
|
-
end
|
977
|
-
|
978
|
-
# Otherwise, return the original type.
|
979
|
-
[@resource.type, nil]
|
980
|
-
end
|
981
|
-
|
982
|
-
# Determine group level monitored resource labels and common labels. These
|
983
|
-
# labels will be shared by a collection of entries. In certain cases, we
|
984
|
-
# might also adjust the resource type.
|
985
|
-
def determine_group_level_labels_and_adjust_type(group_resource_type,
|
986
|
-
matched_regexp_group)
|
987
|
-
group_resource_labels = @resource.labels.dup
|
988
|
-
group_common_labels = @common_labels.dup
|
989
|
-
|
990
|
-
case group_resource_type
|
991
|
-
|
992
|
-
# Cloud Functions.
|
993
|
-
when CLOUDFUNCTIONS_CONSTANTS[:resource_type]
|
994
|
-
group_resource_labels['region'] = @gcf_region
|
995
|
-
group_resource_labels['function_name'] =
|
996
|
-
decode_cloudfunctions_function_name(
|
997
|
-
matched_regexp_group['encoded_function_name'])
|
998
|
-
instance_id = group_resource_labels.delete('instance_id')
|
999
|
-
group_common_labels["#{CONTAINER_CONSTANTS[:service]}/cluster_name"] =
|
1000
|
-
group_resource_labels.delete('cluster_name')
|
1001
|
-
group_common_labels["#{CONTAINER_CONSTANTS[:service]}/instance_id"] =
|
1002
|
-
instance_id
|
1003
|
-
group_common_labels["#{COMPUTE_CONSTANTS[:service]}/resource_id"] =
|
1004
|
-
instance_id
|
1005
|
-
group_common_labels["#{COMPUTE_CONSTANTS[:service]}/zone"] =
|
1006
|
-
group_resource_labels.delete('zone')
|
1007
|
-
|
1008
|
-
# GKE container.
|
1009
|
-
when CONTAINER_CONSTANTS[:resource_type]
|
1010
|
-
if matched_regexp_group
|
1011
|
-
group_resource_labels['container_name'] =
|
1012
|
-
matched_regexp_group['container_name']
|
1013
|
-
group_resource_labels['namespace_id'] =
|
1014
|
-
matched_regexp_group['namespace_name']
|
1015
|
-
group_resource_labels['pod_id'] =
|
1016
|
-
matched_regexp_group['pod_name']
|
1017
|
-
%w(namespace_name pod_name).each do |field|
|
1018
|
-
group_common_labels["#{CONTAINER_CONSTANTS[:service]}/#{field}"] =
|
1019
|
-
matched_regexp_group[field]
|
1020
|
-
end
|
1021
|
-
end
|
1022
|
-
|
1023
|
-
# Docker container.
|
1024
|
-
when DOCKER_CONSTANTS[:resource_type]
|
1025
|
-
# For Docker container stderr / stdout logs generated by Docker Fluentd
|
1026
|
-
# Logging Driver, tags are in the format of "container.<container_id>.
|
1027
|
-
# <container_name>", thus they include 'container_id' info.
|
1028
|
-
# For logs generated by applications running in Docker containers,
|
1029
|
-
# tags are in the format of "application-container.<container_name>.
|
1030
|
-
# <additional_tag>", thus 'container_id' info is unknown yet.
|
1031
|
-
# 'container_name' info on the other hand is always available.
|
1032
|
-
container_id = matched_regexp_group['container_id'] if
|
1033
|
-
matched_regexp_group.names.include? 'container_id'
|
1034
|
-
container_name = matched_regexp_group['container_name']
|
1035
|
-
|
1036
|
-
if @enable_metadata_agent
|
1037
|
-
# Call Metadata Agent with "container.<container_id>" or
|
1038
|
-
# "application-container.<container_name>" as the locally-unique key
|
1039
|
-
# to retrieve monitored resource. This should be different from the
|
1040
|
-
# original @resource value that got initiated when the agent starts up
|
1041
|
-
# because that one is always at the VM level.
|
1042
|
-
if container_id
|
1043
|
-
locally_unique_id = "container.#{container_id}"
|
1044
|
-
else
|
1045
|
-
locally_unique_id = "containerName.#{container_name}"
|
1046
|
-
end
|
1047
|
-
retrieved_resource = call_metadata_agent_for_monitored_resource(
|
1048
|
-
locally_unique_id)
|
1049
|
-
end
|
1050
|
-
|
1051
|
-
if !retrieved_resource.nil?
|
1052
|
-
# If we successfully get a monitored resource from Metadata Agent,
|
1053
|
-
# use this one instead of the original instance monitored resource.
|
1054
|
-
group_resource_labels = retrieved_resource.labels.dup
|
1055
|
-
@log.debug 'Retrieved monitored resource from Metadata Agent: ' \
|
1056
|
-
"#{retrieved_resource.inspect}."
|
1057
|
-
else
|
1058
|
-
# If Metadata Agent is not enabled, or we failed to get a monitored
|
1059
|
-
# resource, we need to have some backup plan.
|
1060
|
-
@log.debug 'Metadata Agent not enabled or failed to retrieve ' \
|
1061
|
-
'docker container monitored resource from Metadata ' \
|
1062
|
-
'Agent.'
|
1063
|
-
|
1064
|
-
# 1. Check if 'container_id' is set already. It should be available
|
1065
|
-
# for stdout / stderr). If so, use that.
|
1066
|
-
# 2. If not, call Docker Remote API to retrieve the container ID from
|
1067
|
-
# container name, but only if @call_docker_api_locally is true.
|
1068
|
-
container_id ||= retrieve_container_id_by_name_locally(
|
1069
|
-
container_name) if @call_docker_api_locally
|
1070
|
-
unless container_id
|
1071
|
-
@log.debug 'No docker container id retrieved. Falling back to
|
1072
|
-
instance monitored resource.'
|
1073
|
-
# If a container id is not available, fall back to the instance
|
1074
|
-
# monitored resource.
|
1075
|
-
return [COMPUTE_CONSTANTS[:resource_type], group_resource_labels,
|
1076
|
-
group_common_labels]
|
1077
|
-
end
|
1078
|
-
group_resource_labels['container_id'] = container_id
|
1079
|
-
# 'zone' for GCP and 'region' for EC2 must have been set at this
|
1080
|
-
# point. Rename them to 'location'.
|
1081
|
-
group_resource_labels['location'] = @zone
|
1082
|
-
if @platform == Platform::EC2
|
1083
|
-
group_resource_labels.delete('region')
|
1084
|
-
else
|
1085
|
-
group_resource_labels.delete('zone')
|
1086
|
-
end
|
1087
|
-
# vm id info should be reported as a metadata label instead.
|
1088
|
-
group_resource_labels.delete('instance_id')
|
1089
|
-
|
1090
|
-
end
|
1091
|
-
# Set metadata labels.
|
1092
|
-
group_common_labels["#{DOCKER_CONSTANTS[:service]}/container_name"] =
|
1093
|
-
matched_regexp_group['container_name']
|
1094
|
-
group_common_labels["#{COMPUTE_CONSTANTS[:service]}/resource_id"] =
|
1095
|
-
@vm_id
|
1096
|
-
end
|
1097
|
-
|
1098
|
-
[group_resource_type, group_resource_labels, group_common_labels]
|
1099
|
-
end
|
1100
|
-
|
1101
|
-
# Extract entry resource and common labels that should be applied to
|
1102
|
-
# individual entries from the group resource.
|
1103
|
-
def determine_entry_level_labels(group_resource, record)
|
1104
|
-
resource_type = group_resource.type
|
1105
|
-
resource_labels = {}
|
1106
|
-
common_labels = {}
|
1107
|
-
|
1108
|
-
# The format of the locally unique key varies by monitored resource.
|
1109
|
-
#
|
1110
|
-
# Docker container:
|
1111
|
-
# "container.<container_id>"
|
1112
|
-
# "containerName.<container_name>"
|
1113
|
-
# GKE container:
|
1114
|
-
# "gke_containerName.<namespace_id>.<pod_name>.<container_name>"
|
1115
|
-
if @enable_metadata_agent && record.key?(LOCALLY_UNIQUE_ID_LABEL_NAME)
|
1116
|
-
# Call Metadata Agent with "gke_containerName.<namespace_id>.
|
1117
|
-
# <pod_name>.<container_name>" as the locally-unique key to retrieve
|
1118
|
-
# monitored resource.
|
1119
|
-
locally_unique_id = record[LOCALLY_UNIQUE_ID_LABEL_NAME]
|
1120
|
-
@log.debug 'Calling metadata agent with locally unique id: ' \
|
1121
|
-
"#{locally_unique_id}."
|
1122
|
-
retrieved_resource = call_metadata_agent_for_monitored_resource(
|
1123
|
-
locally_unique_id)
|
1124
|
-
@log.debug 'Retrieved monitored resource from metadata agent: ' \
|
1125
|
-
"#{retrieved_resource.inspect}."
|
1126
|
-
unless retrieved_resource.nil?
|
1127
|
-
resource_type = retrieved_resource.type
|
1128
|
-
# Temporarily renaming 'gke_container' to 'container'.
|
1129
|
-
resource_type = 'container' if resource_type == 'gke_container'
|
1130
|
-
# If we successfully get a monitored resource from Metadata Agent,
|
1131
|
-
# use this one instead of the original VM-level monitored resource.
|
1132
|
-
resource_labels = retrieved_resource.labels.dup
|
1133
|
-
record.delete(LOCALLY_UNIQUE_ID_LABEL_NAME)
|
1134
|
-
@log.debug 'Retrieved gke_container monitored resource from' \
|
1135
|
-
'Stackdriver Metadata agent: ' \
|
1136
|
-
"#{retrieved_resource.inspect}."
|
1137
|
-
end
|
1138
|
-
end
|
1139
|
-
|
1140
|
-
# Cloud Functions.
|
1141
|
-
if resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
|
1142
|
-
record.key?('log')
|
1143
|
-
@cloudfunctions_log_match =
|
1144
|
-
@cloudfunctions_log_regexp.match(record['log'])
|
1145
|
-
common_labels['execution_id'] =
|
1146
|
-
@cloudfunctions_log_match['execution_id'] if \
|
1147
|
-
@cloudfunctions_log_match &&
|
1148
|
-
@cloudfunctions_log_match['execution_id']
|
1149
|
-
end
|
1150
|
-
|
1151
|
-
# GKE containers.
|
1152
|
-
if resource_type == CONTAINER_CONSTANTS[:resource_type]
|
1153
|
-
# Move the stdout/stderr annotation from the record into a label.
|
1154
|
-
common_labels.merge!(
|
1155
|
-
fields_to_labels(
|
1156
|
-
record, 'stream' => "#{CONTAINER_CONSTANTS[:service]}/stream"))
|
1157
|
-
|
1158
|
-
# If the record has been annotated by the kubernetes_metadata_filter
|
1159
|
-
# plugin, then use that metadata. Otherwise, rely on commonLabels
|
1160
|
-
# populated at the grouped_entries level from the group's tag.
|
1161
|
-
if record.key?('kubernetes')
|
1162
|
-
extracted_resource_labels, extracted_common_labels = \
|
1163
|
-
extract_container_metadata(record)
|
1164
|
-
resource_labels.merge!(extracted_resource_labels)
|
1165
|
-
common_labels.merge!(extracted_common_labels)
|
1166
|
-
end
|
1167
|
-
end
|
1168
|
-
|
1169
|
-
# Docker containers.
|
1170
|
-
if resource_type == DOCKER_CONSTANTS[:resource_type]
|
1171
|
-
# For logs coming from Docker Fluentd Logging Driver, the log record
|
1172
|
-
# has 4 fields: 'container_id', 'container_name', 'source' and 'log'.
|
1173
|
-
# Extract 'container_id', 'container_name' and 'source' from json
|
1174
|
-
# record, set corresponding labels, and remove these fields from record.
|
1175
|
-
{
|
1176
|
-
'container_name' => 'container_name',
|
1177
|
-
'source' => 'stream'
|
1178
|
-
}.each do |field_name, label_name|
|
1179
|
-
common_labels.merge!(
|
1180
|
-
fields_to_labels(
|
1181
|
-
record,
|
1182
|
-
field_name => "#{DOCKER_CONSTANTS[:service]}/#{label_name}"
|
1183
|
-
)
|
1184
|
-
)
|
1185
|
-
end
|
1186
|
-
resource_labels.merge!(
|
1187
|
-
fields_to_labels(record, 'container_id' => 'container_id'))
|
1188
|
-
end
|
1189
|
-
|
1190
|
-
# If the name of a field in the record is present in the @label_map
|
1191
|
-
# configured by users, report its value as a label and do not send that
|
1192
|
-
# field as part of the payload.
|
1193
|
-
common_labels.merge!(fields_to_labels(record, @label_map))
|
1194
|
-
|
1195
|
-
resource_labels.merge!(
|
1196
|
-
extract_resource_labels(resource_type, common_labels))
|
1197
|
-
|
1198
|
-
[resource_type, resource_labels, common_labels]
|
1199
|
-
end
|
1200
|
-
|
1201
|
-
# Call Metadata Agent to get monitored resource information and parse
|
1202
|
-
# response to Google::Api::MonitoredResource.
|
1203
|
-
def call_metadata_agent_for_monitored_resource(unique_key)
|
1204
|
-
response = call_metadata_agent("monitoredResource/#{unique_key}")
|
1205
|
-
return nil if response.nil?
|
1206
|
-
begin
|
1207
|
-
resource = Google::Api::MonitoredResource.decode_json(response.to_json)
|
1208
|
-
rescue Google::Protobuf::ParseError, ArgumentError => e
|
1209
|
-
@log.error 'Error paring monitored resource from Metadata Agent. ' \
|
1210
|
-
"response: #{response.inspect}", error: e
|
1211
|
-
return nil
|
1212
|
-
end
|
1213
|
-
|
1214
|
-
# TODO(qingling128): Use Google::Api::MonitoredResource directly after we
|
1215
|
-
# upgrade gRPC version to include the fix for the protobuf map
|
1216
|
-
# corruption issue.
|
1217
|
-
Google::Apis::LoggingV2beta1::MonitoredResource.new(
|
1218
|
-
type: resource.type,
|
1219
|
-
labels: resource.labels.to_h
|
1220
|
-
)
|
1221
|
-
end
|
1222
|
-
|
1223
|
-
# Call Metadata Agent and parse response to json. Return nil in case of any
|
1224
|
-
# error / failure.
|
1225
|
-
def call_metadata_agent(path)
|
1226
|
-
url = "#{@metadata_agent_url}/#{path}"
|
1227
|
-
@log.debug("Calling Metadata Agent: #{url}")
|
1228
|
-
open(url) do |f|
|
1229
|
-
response = f.read
|
1230
|
-
parsed_hash = parse_json_or_nil(response)
|
1231
|
-
if parsed_hash.nil?
|
1232
|
-
@log.error 'Response from Metadata Agent is not in valid json ' \
|
1233
|
-
"format: '#{response.inspect}'."
|
1234
|
-
return nil
|
1235
|
-
end
|
1236
|
-
@log.debug "Response from Metadata Agent: #{parsed_hash}"
|
1237
|
-
return parsed_hash
|
1238
|
-
end
|
1239
|
-
rescue StandardError => e
|
1240
|
-
@log.error 'Error calling Metadata Agent.', error: e
|
1241
|
-
return nil
|
1242
868
|
end
|
1243
869
|
|
1244
870
|
# TODO: This functionality should eventually be available in another
|
@@ -1276,28 +902,11 @@ module Fluent
|
|
1276
902
|
end
|
1277
903
|
end
|
1278
904
|
|
1279
|
-
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1283
|
-
|
1284
|
-
@log.debug "Response from Docker API with name '#{container_name}': " \
|
1285
|
-
"#{response.inspect}."
|
1286
|
-
return parse_container_id_from_docker_api_response(response)
|
1287
|
-
rescue StandardError => e
|
1288
|
-
@log.error 'Error calling Docker API to get container id.', error: e
|
1289
|
-
return nil
|
1290
|
-
end
|
1291
|
-
|
1292
|
-
# Parse the container id from Docker Remote API response.
|
1293
|
-
# TODO(qingling128) Add a config for Docker API version to support parsing
|
1294
|
-
# different versions of Docker Remote API when the format varies.
|
1295
|
-
def parse_container_id_from_docker_api_response(response)
|
1296
|
-
JSON.parse(response.data[:body])['Id']
|
1297
|
-
rescue StandardError => e
|
1298
|
-
@log.error 'Error parsing Docker API response to get container id.',
|
1299
|
-
error: e
|
1300
|
-
return nil
|
905
|
+
def detect_cloudfunctions(attributes)
|
906
|
+
return unless attributes.include?('gcf_region')
|
907
|
+
# Cloud Functions detected
|
908
|
+
@running_cloudfunctions = true
|
909
|
+
@gcf_region = fetch_gce_metadata('instance/attributes/gcf_region')
|
1301
910
|
end
|
1302
911
|
|
1303
912
|
def cluster_name_from_kube_env(kube_env)
|
@@ -1370,14 +979,9 @@ module Fluent
|
|
1370
979
|
end
|
1371
980
|
elsif record.key?('severity')
|
1372
981
|
return parse_severity(record.delete('severity'))
|
1373
|
-
elsif
|
1374
|
-
|
1375
|
-
stream = entry_common_labels[
|
1376
|
-
"#{CONTAINER_CONSTANTS[:service]}/stream"] if
|
1377
|
-
resource_type == CONTAINER_CONSTANTS[:resource_type]
|
1378
|
-
stream = entry_common_labels[
|
1379
|
-
"#{DOCKER_CONSTANTS[:service]}/stream"] if
|
1380
|
-
resource_type == DOCKER_CONSTANTS[:resource_type]
|
982
|
+
elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
|
983
|
+
entry_common_labels.key?("#{CONTAINER_CONSTANTS[:service]}/stream")
|
984
|
+
stream = entry_common_labels["#{CONTAINER_CONSTANTS[:service]}/stream"]
|
1381
985
|
if stream == 'stdout'
|
1382
986
|
return 'INFO'
|
1383
987
|
elsif stream == 'stderr'
|
@@ -1585,24 +1189,6 @@ module Fluent
|
|
1585
1189
|
[resource_labels, common_labels]
|
1586
1190
|
end
|
1587
1191
|
|
1588
|
-
def format(tag, time, record)
|
1589
|
-
[tag, time, record].to_msgpack
|
1590
|
-
end
|
1591
|
-
|
1592
|
-
# Given a tag, returns the corresponding valid tag if possible, or nil if
|
1593
|
-
# the tag should be rejected. If 'require_valid_tags' is false, non-string
|
1594
|
-
# tags are converted to strings, and invalid characters are sanitized;
|
1595
|
-
# otherwise such tags are rejected.
|
1596
|
-
def sanitize_tag(tag)
|
1597
|
-
if @require_valid_tags &&
|
1598
|
-
(!tag.is_a?(String) || tag == '' || convert_to_utf8(tag) != tag)
|
1599
|
-
return nil
|
1600
|
-
end
|
1601
|
-
tag = convert_to_utf8(tag.to_s)
|
1602
|
-
tag = '_' if tag == ''
|
1603
|
-
tag
|
1604
|
-
end
|
1605
|
-
|
1606
1192
|
# For every original_label => new_label pair in the label_map, delete the
|
1607
1193
|
# original_label from the record if it exists, and extract the value to form
|
1608
1194
|
# a map with the new_label as the key.
|
@@ -1630,8 +1216,7 @@ module Fluent
|
|
1630
1216
|
entry.text_payload = record['log']
|
1631
1217
|
elsif is_json
|
1632
1218
|
entry.json_payload = record
|
1633
|
-
elsif
|
1634
|
-
DOCKER_CONSTANTS[:resource_type]].include?(resource_type) &&
|
1219
|
+
elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
|
1635
1220
|
record.key?('log')
|
1636
1221
|
entry.text_payload = record['log']
|
1637
1222
|
elsif record.size == 1 && record.key?('message')
|
@@ -1701,8 +1286,7 @@ module Fluent
|
|
1701
1286
|
entry.text_payload = convert_to_utf8(record['log'])
|
1702
1287
|
elsif is_json
|
1703
1288
|
entry.json_payload = struct_from_ruby(record)
|
1704
|
-
elsif
|
1705
|
-
DOCKER_CONSTANTS[:resource_type]].include?(resource_type) &&
|
1289
|
+
elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
|
1706
1290
|
record.key?('log')
|
1707
1291
|
entry.text_payload = convert_to_utf8(record['log'])
|
1708
1292
|
elsif record.size == 1 && record.key?('message')
|
@@ -1715,7 +1299,7 @@ module Fluent
|
|
1715
1299
|
def log_name(tag, resource)
|
1716
1300
|
if resource.type == CLOUDFUNCTIONS_CONSTANTS[:resource_type]
|
1717
1301
|
tag = 'cloud-functions'
|
1718
|
-
elsif
|
1302
|
+
elsif @running_on_managed_vm
|
1719
1303
|
# Add a prefix to Managed VM logs to prevent namespace collisions.
|
1720
1304
|
tag = "#{APPENGINE_CONSTANTS[:service]}/#{tag}"
|
1721
1305
|
elsif resource.type == CONTAINER_CONSTANTS[:resource_type]
|
@@ -1745,9 +1329,6 @@ module Fluent
|
|
1745
1329
|
elsif resource_type == ML_CONSTANTS[:resource_type]
|
1746
1330
|
label_prefix = ML_CONSTANTS[:service]
|
1747
1331
|
labels_to_extract = %w(job_id task_name)
|
1748
|
-
elsif resource_type == DOCKER_CONSTANTS[:resource_type]
|
1749
|
-
label_prefix = DOCKER_CONSTANTS[:service]
|
1750
|
-
labels_to_extract = %w(container_id)
|
1751
1332
|
else
|
1752
1333
|
return extracted_labels
|
1753
1334
|
end
|
@@ -1815,6 +1396,33 @@ module Fluent
|
|
1815
1396
|
end
|
1816
1397
|
end
|
1817
1398
|
end
|
1399
|
+
|
1400
|
+
# Increment the metric for the number of successful requests.
|
1401
|
+
def increment_successful_requests_count
|
1402
|
+
return unless @successful_requests_count
|
1403
|
+
@successful_requests_count.increment(grpc: @use_grpc)
|
1404
|
+
end
|
1405
|
+
|
1406
|
+
# Increment the metric for the number of failed requests, labeled by
|
1407
|
+
# the provided status code.
|
1408
|
+
def increment_failed_requests_count(code)
|
1409
|
+
return unless @failed_requests_count
|
1410
|
+
@failed_requests_count.increment(grpc: @use_grpc, code: code)
|
1411
|
+
end
|
1412
|
+
|
1413
|
+
# Increment the metric for the number of log entries, successfully
|
1414
|
+
# ingested by the Stackdriver Logging API.
|
1415
|
+
def increment_ingested_entries_count(count)
|
1416
|
+
return unless @ingested_entries_count
|
1417
|
+
@ingested_entries_count.increment({}, count)
|
1418
|
+
end
|
1419
|
+
|
1420
|
+
# Increment the metric for the number of log entries that were dropped
|
1421
|
+
# and not ingested by the Stackdriver Logging API.
|
1422
|
+
def increment_dropped_entries_count(count)
|
1423
|
+
return unless @dropped_entries_count
|
1424
|
+
@dropped_entries_count.increment({}, count)
|
1425
|
+
end
|
1818
1426
|
end
|
1819
1427
|
end
|
1820
1428
|
|