fluent-plugin-google-cloud 0.6.7 → 0.6.8.pre.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/fluent-plugin-google-cloud.gemspec +1 -1
- data/lib/fluent/plugin/out_google_cloud.rb +287 -189
- data/test/plugin/base_test.rb +254 -2
- data/test/plugin/constants.rb +146 -13
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f013270937ffeb8ac1addeb0b50cbd04b9434d8f
|
4
|
+
data.tar.gz: a19be3b8f5b49266e6055be20c6a8c693fae1fea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cfc74c7b17e028776423d30fbc438d1d5d146d532991bb2331044a35f54e1a6acfd6f14e6bc4b234f924ac5e6d62b268931b8c856d07532b9c610c85579f3906
|
7
|
+
data.tar.gz: bb14fdffce9b35b5c1ff4d9609d5ec473d19d69b74972b3ba20bc843d28e1118be19b89b112b961d7b903e246612eca551780c3e9c202fa852ad8bdec6f50f2d
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fluent-plugin-google-cloud (0.6.
|
4
|
+
fluent-plugin-google-cloud (0.6.8.pre.1)
|
5
5
|
fluentd (~> 0.10)
|
6
6
|
google-api-client (~> 0.9.0)
|
7
7
|
google-cloud-logging (= 0.24.1)
|
@@ -55,7 +55,7 @@ GEM
|
|
55
55
|
googleauth (~> 0.5.1)
|
56
56
|
grpc (~> 1.0)
|
57
57
|
rly (~> 0.2.3)
|
58
|
-
google-protobuf (3.4.0.2
|
58
|
+
google-protobuf (3.4.0.2)
|
59
59
|
googleapis-common-protos (1.3.5)
|
60
60
|
google-protobuf (~> 3.2)
|
61
61
|
grpc (~> 1.0)
|
@@ -67,7 +67,7 @@ GEM
|
|
67
67
|
multi_json (~> 1.11)
|
68
68
|
os (~> 0.9)
|
69
69
|
signet (~> 0.7)
|
70
|
-
grpc (1.2.5
|
70
|
+
grpc (1.2.5)
|
71
71
|
google-protobuf (~> 3.1)
|
72
72
|
googleauth (~> 0.5.1)
|
73
73
|
hashdiff (0.3.6)
|
@@ -10,7 +10,7 @@ eos
|
|
10
10
|
gem.homepage = \
|
11
11
|
'https://github.com/GoogleCloudPlatform/fluent-plugin-google-cloud'
|
12
12
|
gem.license = 'Apache-2.0'
|
13
|
-
gem.version = '0.6.
|
13
|
+
gem.version = '0.6.8.pre.1'
|
14
14
|
gem.authors = ['Todd Derr', 'Alex Robinson']
|
15
15
|
gem.email = ['salty@google.com']
|
16
16
|
gem.required_ruby_version = Gem::Requirement.new('>= 2.0')
|
@@ -38,12 +38,12 @@ end
|
|
38
38
|
module Fluent
|
39
39
|
# fluentd output plugin for the Stackdriver Logging API
|
40
40
|
class GoogleCloudOutput < BufferedOutput
|
41
|
-
# Constants for service names
|
42
|
-
module
|
41
|
+
# Constants for service names, resource types and etc.
|
42
|
+
module ServiceConstants
|
43
43
|
APPENGINE_CONSTANTS = {
|
44
44
|
service: 'appengine.googleapis.com',
|
45
45
|
resource_type: 'gae_app',
|
46
|
-
metadata_attributes: %w(gae_backend_name gae_backend_version)
|
46
|
+
metadata_attributes: %w(gae_backend_name gae_backend_version)
|
47
47
|
}
|
48
48
|
CLOUDFUNCTIONS_CONSTANTS = {
|
49
49
|
service: 'cloudfunctions.googleapis.com',
|
@@ -53,12 +53,16 @@ module Fluent
|
|
53
53
|
service: 'compute.googleapis.com',
|
54
54
|
resource_type: 'gce_instance'
|
55
55
|
}
|
56
|
-
|
56
|
+
GKE_CONSTANTS = {
|
57
57
|
service: 'container.googleapis.com',
|
58
58
|
resource_type: 'container',
|
59
59
|
extra_resource_labels: %w(namespace_id pod_id container_name),
|
60
60
|
extra_common_labels: %w(namespace_name pod_name),
|
61
|
-
metadata_attributes: %w(kube-env)
|
61
|
+
metadata_attributes: %w(kube-env)
|
62
|
+
}
|
63
|
+
DOCKER_CONSTANTS = {
|
64
|
+
service: 'docker.googleapis.com',
|
65
|
+
resource_type: 'docker_container'
|
62
66
|
}
|
63
67
|
DATAFLOW_CONSTANTS = {
|
64
68
|
service: 'dataflow.googleapis.com',
|
@@ -68,8 +72,7 @@ module Fluent
|
|
68
72
|
DATAPROC_CONSTANTS = {
|
69
73
|
service: 'cluster.dataproc.googleapis.com',
|
70
74
|
resource_type: 'cloud_dataproc_cluster',
|
71
|
-
metadata_attributes:
|
72
|
-
%w(dataproc-cluster-uuid dataproc-cluster-name).to_set
|
75
|
+
metadata_attributes: %w(dataproc-cluster-uuid dataproc-cluster-name)
|
73
76
|
}
|
74
77
|
EC2_CONSTANTS = {
|
75
78
|
service: 'ec2.amazonaws.com',
|
@@ -83,7 +86,7 @@ module Fluent
|
|
83
86
|
|
84
87
|
# The map between a subservice name and a resource type.
|
85
88
|
SUBSERVICE_MAP = \
|
86
|
-
[APPENGINE_CONSTANTS,
|
89
|
+
[APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAFLOW_CONSTANTS,
|
87
90
|
DATAPROC_CONSTANTS, ML_CONSTANTS]
|
88
91
|
.map { |consts| [consts[:service], consts[:resource_type]] }.to_h
|
89
92
|
# Default back to GCE if invalid value is detected.
|
@@ -91,18 +94,34 @@ module Fluent
|
|
91
94
|
|
92
95
|
# The map between a resource type and expected subservice attributes.
|
93
96
|
SUBSERVICE_METADATA_ATTRIBUTES = \
|
94
|
-
[APPENGINE_CONSTANTS,
|
95
|
-
|
96
|
-
.to_h
|
97
|
+
[APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAPROC_CONSTANTS].map do |consts|
|
98
|
+
[consts[:resource_type], consts[:metadata_attributes].to_set]
|
99
|
+
end.to_h
|
100
|
+
end
|
97
101
|
|
98
|
-
|
99
|
-
|
100
|
-
|
102
|
+
# Constants for configuration.
|
103
|
+
module ConfigConstants
|
104
|
+
# Default values for JSON payload keys to set the "httpRequest",
|
105
|
+
# "operation", "sourceLocation", "trace" fields in the LogEntry.
|
101
106
|
DEFAULT_HTTP_REQUEST_KEY = 'httpRequest'
|
102
|
-
DEFAULT_OPERATION_KEY =
|
103
|
-
DEFAULT_SOURCE_LOCATION_KEY =
|
104
|
-
|
105
|
-
|
107
|
+
DEFAULT_OPERATION_KEY = 'logging.googleapis.com/operation'
|
108
|
+
DEFAULT_SOURCE_LOCATION_KEY = 'logging.googleapis.com/sourceLocation'
|
109
|
+
DEFAULT_TRACE_KEY = 'logging.googleapis.com/trace'
|
110
|
+
|
111
|
+
DEFAULT_METADATA_AGENT_URL =
|
112
|
+
'http://local-metadata-agent.stackdriver.com:8000'
|
113
|
+
end
|
114
|
+
|
115
|
+
# Constants for log entry field extraction.
|
116
|
+
module InternalConstants
|
117
|
+
# Use empty string as request path when the local_resource_id of monitored
|
118
|
+
# resource can be implicitly inferred by Metadata Agent.
|
119
|
+
IMPLICIT_LOCAL_RESOURCE_ID = ''
|
120
|
+
|
121
|
+
# The label name of local_resource_id in the json payload. When a record
|
122
|
+
# has this field in the payload, we will use the value to retrieve
|
123
|
+
# monitored resource from Stackdriver Metadata agent.
|
124
|
+
LOCAL_RESOURCE_ID_KEY = 'logging.googleapis.com/local_resource_id'
|
106
125
|
|
107
126
|
# Map from each field name under LogEntry to corresponding variables
|
108
127
|
# required to perform field value extraction from the log record.
|
@@ -155,12 +174,14 @@ module Fluent
|
|
155
174
|
}
|
156
175
|
end
|
157
176
|
|
158
|
-
include self::
|
177
|
+
include self::ServiceConstants
|
178
|
+
include self::ConfigConstants
|
179
|
+
include self::InternalConstants
|
159
180
|
|
160
181
|
Fluent::Plugin.register_output('google_cloud', self)
|
161
182
|
|
162
183
|
PLUGIN_NAME = 'Fluentd Google Cloud Logging plugin'
|
163
|
-
PLUGIN_VERSION = '0.6.
|
184
|
+
PLUGIN_VERSION = '0.6.8.pre.1'
|
164
185
|
|
165
186
|
# Name of the the Google cloud logging write scope.
|
166
187
|
LOGGING_SCOPE = 'https://www.googleapis.com/auth/logging.write'
|
@@ -295,6 +316,11 @@ module Fluent
|
|
295
316
|
config_param :monitoring_type, :string,
|
296
317
|
:default => Monitoring::PrometheusMonitoringRegistry.name
|
297
318
|
|
319
|
+
# Whether to call metadata agent to retrieve monitored resource.
|
320
|
+
config_param :enable_metadata_agent, :bool, :default => false
|
321
|
+
config_param :metadata_agent_url, :string,
|
322
|
+
:default => DEFAULT_METADATA_AGENT_URL
|
323
|
+
|
298
324
|
# rubocop:enable Style/HashSyntax
|
299
325
|
|
300
326
|
# TODO: Add a log_name config option rather than just using the tag?
|
@@ -354,28 +380,32 @@ module Fluent
|
|
354
380
|
|
355
381
|
@platform = detect_platform
|
356
382
|
|
357
|
-
# Set
|
358
|
-
#
|
359
|
-
#
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
383
|
+
# Set agent-level monitored resource. This monitored resource is initiated
|
384
|
+
# as the logging agent starts up. It will be inherited by all log entries
|
385
|
+
# processed by this agent. First try to retrieve it via Metadata Agent.
|
386
|
+
if @enable_metadata_agent
|
387
|
+
# The local_resource_id for this should be the instance id. Since this
|
388
|
+
# can be implicitly inferred by Metadata Agent, we do not need to
|
389
|
+
# explicitly send the key.
|
390
|
+
# TODO(qingling128): Remove this logic once the resource is retrieved at
|
391
|
+
# a proper time (b/65175256).
|
392
|
+
@resource = query_metadata_agent_for_monitored_resource(
|
393
|
+
IMPLICIT_LOCAL_RESOURCE_ID)
|
394
|
+
end
|
395
|
+
|
396
|
+
# Set required variables: @project_id, @vm_id, @vm_name and @zone.
|
366
397
|
set_required_metadata_variables
|
367
398
|
|
368
399
|
# Retrieve monitored resource.
|
369
|
-
#
|
370
|
-
#
|
371
|
-
|
372
|
-
@resource = determine_agent_level_monitored_resource_via_legacy
|
400
|
+
# Fail over to retrieve monitored resource via the legacy path if we fail
|
401
|
+
# to get it from Metadata Agent.
|
402
|
+
@resource ||= determine_agent_level_monitored_resource_via_legacy
|
373
403
|
|
374
404
|
# Set regexp that we should match tags against later on. Using a list
|
375
405
|
# instead of a map to ensure order. For example, tags will be matched
|
376
406
|
# against Cloud Functions first, then GKE.
|
377
407
|
@tag_regexp_list = []
|
378
|
-
if @resource.type ==
|
408
|
+
if @resource.type == GKE_CONSTANTS[:resource_type]
|
379
409
|
# We only support Cloud Functions logs for GKE right now.
|
380
410
|
if fetch_gce_metadata('instance/attributes/'
|
381
411
|
).split.include?('gcf_region')
|
@@ -388,7 +418,7 @@ module Fluent
|
|
388
418
|
]
|
389
419
|
end
|
390
420
|
@tag_regexp_list << [
|
391
|
-
|
421
|
+
GKE_CONSTANTS[:resource_type], @compiled_kubernetes_tag_regexp
|
392
422
|
]
|
393
423
|
end
|
394
424
|
|
@@ -404,7 +434,7 @@ module Fluent
|
|
404
434
|
|
405
435
|
# Log an informational message containing the Logs viewer URL
|
406
436
|
@log.info 'Logs viewer address: https://console.cloud.google.com/logs/',
|
407
|
-
"viewer?project=#{@project_id}&resource=#{@
|
437
|
+
"viewer?project=#{@project_id}&resource=#{@resource.type}/",
|
408
438
|
"instance_id/#{@vm_id}"
|
409
439
|
end
|
410
440
|
|
@@ -420,39 +450,27 @@ module Fluent
|
|
420
450
|
end
|
421
451
|
|
422
452
|
def write(chunk)
|
423
|
-
|
424
|
-
grouped_entries = {}
|
425
|
-
chunk.msgpack_each do |tag, *arr|
|
426
|
-
sanitized_tag = sanitize_tag(tag)
|
427
|
-
if sanitized_tag.nil?
|
428
|
-
@log.warn "Dropping log entries with invalid tag: '#{tag}'. " \
|
429
|
-
'A tag should be a string with utf8 characters.'
|
430
|
-
next
|
431
|
-
end
|
432
|
-
grouped_entries[sanitized_tag] ||= []
|
433
|
-
grouped_entries[sanitized_tag].push(arr)
|
434
|
-
end
|
453
|
+
grouped_entries = group_log_entries_by_tag_and_local_resource_id(chunk)
|
435
454
|
|
436
|
-
grouped_entries.each do |tag, arr|
|
455
|
+
grouped_entries.each do |(tag, local_resource_id), arr|
|
437
456
|
entries = []
|
438
|
-
|
439
|
-
determine_group_level_monitored_resource_and_labels(
|
457
|
+
group_level_resource, group_level_common_labels =
|
458
|
+
determine_group_level_monitored_resource_and_labels(
|
459
|
+
tag, local_resource_id)
|
440
460
|
|
441
461
|
arr.each do |time, record|
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
determine_entry_level_labels(group_resource, record)
|
446
|
-
entry_resource = group_resource.dup
|
447
|
-
entry_resource.labels.merge!(extracted_resource_labels)
|
448
|
-
entry_common_labels = \
|
449
|
-
group_common_labels.merge(extracted_common_labels)
|
462
|
+
entry_level_resource, entry_level_common_labels =
|
463
|
+
determine_entry_level_monitored_resource_and_labels(
|
464
|
+
group_level_resource, group_level_common_labels, record)
|
450
465
|
|
451
466
|
is_json = false
|
452
467
|
if @detect_json
|
453
|
-
# Save the timestamp if available, then clear it out to
|
454
|
-
# determining whether we should parse the log or message
|
468
|
+
# Save the timestamp and severity if available, then clear it out to
|
469
|
+
# allow for determining whether we should parse the log or message
|
470
|
+
# field.
|
455
471
|
timestamp = record.delete('time')
|
472
|
+
severity = record.delete('severity')
|
473
|
+
|
456
474
|
# If the log is json, we want to export it as a structured log
|
457
475
|
# unless there is additional metadata that would be lost.
|
458
476
|
record_json = nil
|
@@ -467,16 +485,16 @@ module Fluent
|
|
467
485
|
record = record_json
|
468
486
|
is_json = true
|
469
487
|
end
|
470
|
-
# Restore timestamp if necessary.
|
471
|
-
|
472
|
-
|
473
|
-
|
488
|
+
# Restore timestamp and severity if necessary. Note that we don't
|
489
|
+
# want to override these keys in the JSON we've just parsed.
|
490
|
+
record['time'] ||= timestamp if timestamp
|
491
|
+
record['severity'] ||= severity if severity
|
474
492
|
end
|
475
493
|
|
476
494
|
ts_secs, ts_nanos = compute_timestamp(
|
477
|
-
|
495
|
+
entry_level_resource.type, record, time)
|
478
496
|
severity = compute_severity(
|
479
|
-
|
497
|
+
entry_level_resource.type, record, entry_level_common_labels)
|
480
498
|
|
481
499
|
ts_secs = begin
|
482
500
|
Integer ts_secs
|
@@ -488,12 +506,13 @@ module Fluent
|
|
488
506
|
rescue ArgumentError, TypeError
|
489
507
|
ts_nanos
|
490
508
|
end
|
509
|
+
|
491
510
|
if @use_grpc
|
492
511
|
entry = Google::Logging::V2::LogEntry.new(
|
493
|
-
labels:
|
512
|
+
labels: entry_level_common_labels,
|
494
513
|
resource: Google::Api::MonitoredResource.new(
|
495
|
-
type:
|
496
|
-
labels:
|
514
|
+
type: entry_level_resource.type,
|
515
|
+
labels: entry_level_resource.labels.to_h
|
497
516
|
),
|
498
517
|
severity: grpc_severity(severity)
|
499
518
|
)
|
@@ -510,10 +529,11 @@ module Fluent
|
|
510
529
|
end
|
511
530
|
else
|
512
531
|
# Remove the labels if we didn't populate them with anything.
|
513
|
-
|
532
|
+
entry_level_resource.labels = nil if
|
533
|
+
entry_level_resource.labels.empty?
|
514
534
|
entry = Google::Apis::LoggingV2beta1::LogEntry.new(
|
515
|
-
labels:
|
516
|
-
resource:
|
535
|
+
labels: entry_level_common_labels,
|
536
|
+
resource: entry_level_resource,
|
517
537
|
severity: severity,
|
518
538
|
timestamp: {
|
519
539
|
seconds: ts_secs,
|
@@ -522,13 +542,12 @@ module Fluent
|
|
522
542
|
)
|
523
543
|
end
|
524
544
|
|
525
|
-
# Get fully-qualified trace id for LogEntry "trace" field
|
545
|
+
# Get fully-qualified trace id for LogEntry "trace" field.
|
526
546
|
fq_trace_id = record.delete(@trace_key)
|
527
547
|
entry.trace = fq_trace_id if fq_trace_id
|
528
548
|
|
529
549
|
set_log_entry_fields(record, entry)
|
530
|
-
|
531
|
-
set_payload(entry_resource.type, record, entry, is_json)
|
550
|
+
set_payload(entry_level_resource.type, record, entry, is_json)
|
532
551
|
|
533
552
|
entries.push(entry)
|
534
553
|
end
|
@@ -536,21 +555,21 @@ module Fluent
|
|
536
555
|
next if entries.empty?
|
537
556
|
|
538
557
|
log_name = "projects/#{@project_id}/logs/#{log_name(
|
539
|
-
tag,
|
558
|
+
tag, group_level_resource)}"
|
540
559
|
|
541
560
|
# Does the actual write to the cloud logging api.
|
542
561
|
client = api_client
|
543
562
|
if @use_grpc
|
544
563
|
begin
|
545
|
-
labels_utf8_pairs =
|
564
|
+
labels_utf8_pairs = group_level_common_labels.map do |k, v|
|
546
565
|
[k.encode('utf-8'), convert_to_utf8(v)]
|
547
566
|
end
|
548
567
|
|
549
568
|
write_request = Google::Logging::V2::WriteLogEntriesRequest.new(
|
550
569
|
log_name: log_name,
|
551
570
|
resource: Google::Api::MonitoredResource.new(
|
552
|
-
type:
|
553
|
-
labels:
|
571
|
+
type: group_level_resource.type,
|
572
|
+
labels: group_level_resource.labels.to_h
|
554
573
|
),
|
555
574
|
labels: labels_utf8_pairs.to_h,
|
556
575
|
entries: entries
|
@@ -560,8 +579,8 @@ module Fluent
|
|
560
579
|
increment_successful_requests_count
|
561
580
|
increment_ingested_entries_count(entries.length)
|
562
581
|
|
563
|
-
# Let the user explicitly know when the first call succeeded,
|
564
|
-
#
|
582
|
+
# Let the user explicitly know when the first call succeeded, to aid
|
583
|
+
# with verification and troubleshooting.
|
565
584
|
unless @successful_call
|
566
585
|
@successful_call = true
|
567
586
|
@log.info 'Successfully sent gRPC to Stackdriver Logging API.'
|
@@ -600,8 +619,8 @@ module Fluent
|
|
600
619
|
@log.warn "Dropping #{dropped} log message(s)",
|
601
620
|
error: error.to_s, error_code: error.code.to_s
|
602
621
|
else
|
603
|
-
# Assume this is a problem with the request itself
|
604
|
-
#
|
622
|
+
# Assume this is a problem with the request itself and don't
|
623
|
+
# retry.
|
605
624
|
dropped = entries.length
|
606
625
|
increment_dropped_entries_count(dropped)
|
607
626
|
@log.error "Unknown response code #{error.code} from the "\
|
@@ -614,8 +633,8 @@ module Fluent
|
|
614
633
|
write_request = \
|
615
634
|
Google::Apis::LoggingV2beta1::WriteLogEntriesRequest.new(
|
616
635
|
log_name: log_name,
|
617
|
-
resource:
|
618
|
-
labels:
|
636
|
+
resource: group_level_resource,
|
637
|
+
labels: group_level_common_labels,
|
619
638
|
entries: entries)
|
620
639
|
|
621
640
|
# TODO: RequestOptions
|
@@ -628,8 +647,8 @@ module Fluent
|
|
628
647
|
increment_successful_requests_count
|
629
648
|
increment_ingested_entries_count(entries.length)
|
630
649
|
|
631
|
-
# Let the user explicitly know when the first call succeeded,
|
632
|
-
#
|
650
|
+
# Let the user explicitly know when the first call succeeded, to aid
|
651
|
+
# with verification and troubleshooting.
|
633
652
|
unless @successful_call
|
634
653
|
@successful_call = true
|
635
654
|
@log.info 'Successfully sent to Stackdriver Logging API.'
|
@@ -820,8 +839,9 @@ module Fluent
|
|
820
839
|
|
821
840
|
# Retrieve monitored resource via the legacy way.
|
822
841
|
#
|
823
|
-
#
|
824
|
-
#
|
842
|
+
# Note: This is just a failover plan if we fail to get metadata from
|
843
|
+
# Metadata Agent. Thus it should be equivalent to what Metadata Agent
|
844
|
+
# returns.
|
825
845
|
def determine_agent_level_monitored_resource_via_legacy
|
826
846
|
resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
|
827
847
|
labels: {})
|
@@ -883,7 +903,7 @@ module Fluent
|
|
883
903
|
}
|
884
904
|
|
885
905
|
# GKE container.
|
886
|
-
when
|
906
|
+
when GKE_CONSTANTS[:resource_type]
|
887
907
|
raw_kube_env = fetch_gce_metadata('instance/attributes/kube-env')
|
888
908
|
kube_env = YAML.load(raw_kube_env)
|
889
909
|
return {
|
@@ -918,7 +938,7 @@ module Fluent
|
|
918
938
|
rescue StandardError => e
|
919
939
|
@log.error "Failed to set monitored resource labels for #{type}: ",
|
920
940
|
error: e
|
921
|
-
|
941
|
+
{}
|
922
942
|
end
|
923
943
|
|
924
944
|
# Determine the common labels that should be added to all log entries
|
@@ -942,7 +962,7 @@ module Fluent
|
|
942
962
|
|
943
963
|
# GCE instance and GKE container.
|
944
964
|
when COMPUTE_CONSTANTS[:resource_type],
|
945
|
-
|
965
|
+
GKE_CONSTANTS[:resource_type]
|
946
966
|
labels.merge!(
|
947
967
|
"#{COMPUTE_CONSTANTS[:service]}/resource_name" => @vm_name)
|
948
968
|
|
@@ -954,138 +974,175 @@ module Fluent
|
|
954
974
|
labels
|
955
975
|
end
|
956
976
|
|
957
|
-
#
|
958
|
-
#
|
959
|
-
def
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
[group_resource, group_common_labels]
|
977
|
+
# Group the log entries by tag and local_resource_id pairs. Also filter out
|
978
|
+
# invalid non-Hash entries.
|
979
|
+
def group_log_entries_by_tag_and_local_resource_id(chunk)
|
980
|
+
groups = {}
|
981
|
+
chunk.msgpack_each do |tag, time, record|
|
982
|
+
unless record.is_a?(Hash)
|
983
|
+
@log.warn 'Dropping log entries with malformed record: ' \
|
984
|
+
"'#{record.inspect}'. " \
|
985
|
+
'A log record should be in JSON format.'
|
986
|
+
next
|
987
|
+
end
|
988
|
+
sanitized_tag = sanitize_tag(tag)
|
989
|
+
if sanitized_tag.nil?
|
990
|
+
@log.warn "Dropping log entries with invalid tag: '#{tag.inspect}'." \
|
991
|
+
' A tag should be a string with utf8 characters.'
|
992
|
+
next
|
993
|
+
end
|
994
|
+
local_resource_id = record.delete(LOCAL_RESOURCE_ID_KEY)
|
995
|
+
# A nil local_resource_id means "fall back to legacy".
|
996
|
+
hash_key = [sanitized_tag, local_resource_id].freeze
|
997
|
+
groups[hash_key] ||= []
|
998
|
+
groups[hash_key].push([time, record])
|
999
|
+
end
|
1000
|
+
groups
|
982
1001
|
end
|
983
1002
|
|
984
|
-
# Determine group level monitored resource
|
985
|
-
# entries.
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
1003
|
+
# Determine the group level monitored resource and common labels shared by a
|
1004
|
+
# collection of entries.
|
1005
|
+
def determine_group_level_monitored_resource_and_labels(tag,
|
1006
|
+
local_resource_id)
|
1007
|
+
resource = @resource.dup
|
1008
|
+
resource.labels = @resource.labels.dup
|
1009
|
+
common_labels = @common_labels.dup
|
1010
|
+
|
1011
|
+
# Change the resource type and set matched_regexp_group if the tag matches
|
1012
|
+
# certain regexp.
|
1013
|
+
matched_regexp_group = nil # @tag_regexp_list can be an empty list.
|
990
1014
|
@tag_regexp_list.each do |derived_type, tag_regexp|
|
991
|
-
|
992
|
-
|
993
|
-
|
1015
|
+
matched_regexp_group = tag_regexp.match(tag)
|
1016
|
+
if matched_regexp_group
|
1017
|
+
resource.type = derived_type
|
1018
|
+
break
|
1019
|
+
end
|
994
1020
|
end
|
995
|
-
[@resource.type, nil]
|
996
|
-
end
|
997
1021
|
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1022
|
+
# Determine the monitored resource based on the local_resource_id.
|
1023
|
+
# Different monitored resource types have unique ids in different format.
|
1024
|
+
# We will query Metadata Agent for the monitored resource. Return the
|
1025
|
+
# legacy monitored resource (either the instance resource or the resource
|
1026
|
+
# inferred from the tag) if failed to get a monitored resource from
|
1027
|
+
# Metadata Agent with this key.
|
1028
|
+
#
|
1029
|
+
# Docker container:
|
1030
|
+
# "container.<container_id>"
|
1031
|
+
# "containerName.<container_name>"
|
1032
|
+
# GKE container:
|
1033
|
+
# "gke_containerName.<namespace_id>.<pod_name>.<container_name>"
|
1034
|
+
if @enable_metadata_agent && local_resource_id
|
1035
|
+
@log.debug 'Calling metadata agent with local_resource_id: ' \
|
1036
|
+
"#{local_resource_id}."
|
1037
|
+
retrieved_resource = query_metadata_agent_for_monitored_resource(
|
1038
|
+
local_resource_id)
|
1039
|
+
@log.debug 'Retrieved monitored resource from metadata agent: ' \
|
1040
|
+
"#{retrieved_resource.inspect}."
|
1041
|
+
if retrieved_resource
|
1042
|
+
resource = retrieved_resource
|
1043
|
+
# TODO(qingling128): Fix this temporary renaming from 'gke_container'
|
1044
|
+
# to 'container'.
|
1045
|
+
resource.type = 'container' if resource.type == 'gke_container'
|
1046
|
+
end
|
1047
|
+
end
|
1003
1048
|
|
1004
|
-
|
1049
|
+
# Once the resource type is settled down, determine the labels.
|
1050
|
+
case resource.type
|
1005
1051
|
# Cloud Functions.
|
1006
1052
|
when CLOUDFUNCTIONS_CONSTANTS[:resource_type]
|
1007
|
-
|
1053
|
+
resource.labels.merge!(
|
1008
1054
|
'region' => @gcf_region,
|
1009
1055
|
'function_name' => decode_cloudfunctions_function_name(
|
1010
|
-
|
1056
|
+
matched_regexp_group['encoded_function_name'])
|
1011
1057
|
)
|
1012
|
-
|
1013
|
-
|
1014
|
-
|
1015
|
-
"#{CONTAINER_CONSTANTS[:service]}/instance_id" => instance_id,
|
1058
|
+
instance_id = resource.labels.delete('instance_id')
|
1059
|
+
common_labels.merge!(
|
1060
|
+
"#{GKE_CONSTANTS[:service]}/instance_id" => instance_id,
|
1016
1061
|
"#{COMPUTE_CONSTANTS[:service]}/resource_id" => instance_id,
|
1017
|
-
"#{
|
1018
|
-
|
1062
|
+
"#{GKE_CONSTANTS[:service]}/cluster_name" =>
|
1063
|
+
resource.labels.delete('cluster_name'),
|
1019
1064
|
"#{COMPUTE_CONSTANTS[:service]}/zone" =>
|
1020
|
-
|
1065
|
+
resource.labels.delete('zone')
|
1021
1066
|
)
|
1022
1067
|
|
1023
1068
|
# GKE container.
|
1024
|
-
when
|
1025
|
-
if
|
1069
|
+
when GKE_CONSTANTS[:resource_type]
|
1070
|
+
if matched_regexp_group
|
1026
1071
|
# We only expect one occurrence of each key in the match group.
|
1027
1072
|
resource_labels_candidates =
|
1028
|
-
|
1029
|
-
common_labels_candidates =
|
1030
|
-
|
1031
|
-
group_resource_labels.merge!(
|
1073
|
+
matched_regexp_group.names.zip(matched_regexp_group.captures).to_h
|
1074
|
+
common_labels_candidates = resource_labels_candidates.dup
|
1075
|
+
resource.labels.merge!(
|
1032
1076
|
delete_and_extract_labels(
|
1033
1077
|
resource_labels_candidates,
|
1034
1078
|
# The kubernetes_tag_regexp is poorly named. 'namespace_name' is
|
1035
1079
|
# in fact 'namespace_id'. 'pod_name' is in fact 'pod_id'.
|
1036
1080
|
# TODO(qingling128): Figure out how to put this map into
|
1037
|
-
# constants like
|
1081
|
+
# constants like GKE_CONSTANTS[:extra_resource_labels].
|
1038
1082
|
'container_name' => 'container_name',
|
1039
1083
|
'namespace_name' => 'namespace_id',
|
1040
1084
|
'pod_name' => 'pod_id'))
|
1041
1085
|
|
1042
|
-
|
1086
|
+
common_labels.merge!(
|
1043
1087
|
delete_and_extract_labels(
|
1044
1088
|
common_labels_candidates,
|
1045
|
-
|
1046
|
-
.map { |l| [l, "#{
|
1089
|
+
GKE_CONSTANTS[:extra_common_labels]
|
1090
|
+
.map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h))
|
1047
1091
|
end
|
1092
|
+
|
1093
|
+
# Docker container.
|
1094
|
+
# TODO(qingling128): Remove this logic once the resource is retrieved at a
|
1095
|
+
# proper time (b/65175256).
|
1096
|
+
when DOCKER_CONSTANTS[:resource_type]
|
1097
|
+
common_labels.delete("#{COMPUTE_CONSTANTS[:service]}/resource_name")
|
1048
1098
|
end
|
1049
1099
|
|
1050
|
-
|
1100
|
+
resource.freeze
|
1101
|
+
resource.labels.freeze
|
1102
|
+
common_labels.freeze
|
1103
|
+
|
1104
|
+
[resource, common_labels]
|
1051
1105
|
end
|
1052
1106
|
|
1053
|
-
# Extract entry resource and common labels that should be
|
1054
|
-
# individual entries
|
1055
|
-
def
|
1056
|
-
|
1057
|
-
|
1107
|
+
# Extract entry level monitored resource and common labels that should be
|
1108
|
+
# applied to individual entries.
|
1109
|
+
def determine_entry_level_monitored_resource_and_labels(
|
1110
|
+
group_level_resource, group_level_common_labels, record)
|
1111
|
+
resource = group_level_resource.dup
|
1112
|
+
resource.labels = group_level_resource.labels.dup
|
1113
|
+
common_labels = group_level_common_labels.dup
|
1058
1114
|
|
1115
|
+
case resource.type
|
1059
1116
|
# Cloud Functions.
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1117
|
+
when CLOUDFUNCTIONS_CONSTANTS[:resource_type]
|
1118
|
+
if record.key?('log')
|
1119
|
+
@cloudfunctions_log_match =
|
1120
|
+
@compiled_cloudfunctions_log_regexp.match(record['log'])
|
1121
|
+
common_labels['execution_id'] =
|
1122
|
+
@cloudfunctions_log_match['execution_id'] if
|
1123
|
+
@cloudfunctions_log_match &&
|
1124
|
+
@cloudfunctions_log_match['execution_id']
|
1125
|
+
end
|
1069
1126
|
|
1070
|
-
# GKE
|
1071
|
-
|
1127
|
+
# GKE container.
|
1128
|
+
when GKE_CONSTANTS[:resource_type]
|
1072
1129
|
# Move the stdout/stderr annotation from the record into a label.
|
1073
1130
|
common_labels.merge!(
|
1074
1131
|
delete_and_extract_labels(
|
1075
|
-
record, 'stream' => "#{
|
1132
|
+
record, 'stream' => "#{GKE_CONSTANTS[:service]}/stream"))
|
1076
1133
|
|
1077
1134
|
# If the record has been annotated by the kubernetes_metadata_filter
|
1078
1135
|
# plugin, then use that metadata. Otherwise, rely on commonLabels
|
1079
|
-
# populated
|
1136
|
+
# populated from the group's tag.
|
1080
1137
|
if record.key?('kubernetes')
|
1081
|
-
|
1138
|
+
resource.labels.merge!(
|
1082
1139
|
delete_and_extract_labels(
|
1083
|
-
record['kubernetes'],
|
1140
|
+
record['kubernetes'], GKE_CONSTANTS[:extra_resource_labels]
|
1084
1141
|
.map { |l| [l, l] }.to_h))
|
1085
1142
|
common_labels.merge!(
|
1086
1143
|
delete_and_extract_labels(
|
1087
|
-
record['kubernetes'],
|
1088
|
-
.map { |l| [l, "#{
|
1144
|
+
record['kubernetes'], GKE_CONSTANTS[:extra_common_labels]
|
1145
|
+
.map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h))
|
1089
1146
|
# Prepend label/ to all user-defined labels' keys.
|
1090
1147
|
if record['kubernetes'].key?('labels')
|
1091
1148
|
common_labels.merge!(
|
@@ -1111,14 +1168,56 @@ module Fluent
|
|
1111
1168
|
# Report them as monitored resource labels instead of common labels.
|
1112
1169
|
# e.g. "dataflow.googleapis.com/job_id" => "job_id"
|
1113
1170
|
[DATAFLOW_CONSTANTS, ML_CONSTANTS].each do |service_constants|
|
1114
|
-
next unless
|
1115
|
-
|
1171
|
+
next unless resource.type == service_constants[:resource_type]
|
1172
|
+
resource.labels.merge!(
|
1116
1173
|
delete_and_extract_labels(
|
1117
1174
|
common_labels, service_constants[:extra_common_labels]
|
1118
1175
|
.map { |l| ["#{service_constants[:service]}/#{l}", l] }.to_h))
|
1119
1176
|
end
|
1120
1177
|
|
1121
|
-
[
|
1178
|
+
[resource, common_labels]
|
1179
|
+
end
|
1180
|
+
|
1181
|
+
# Call Metadata Agent to get monitored resource information and parse
|
1182
|
+
# response to Google::Api::MonitoredResource.
|
1183
|
+
def query_metadata_agent_for_monitored_resource(local_resource_id)
|
1184
|
+
response = query_metadata_agent("monitoredResource/#{local_resource_id}")
|
1185
|
+
return nil if response.nil?
|
1186
|
+
begin
|
1187
|
+
resource = Google::Api::MonitoredResource.decode_json(response.to_json)
|
1188
|
+
rescue Google::Protobuf::ParseError, ArgumentError => e
|
1189
|
+
@log.error 'Error paring monitored resource from Metadata Agent. ' \
|
1190
|
+
"response: #{response.inspect}", error: e
|
1191
|
+
return nil
|
1192
|
+
end
|
1193
|
+
|
1194
|
+
# TODO(qingling128): Use Google::Api::MonitoredResource directly after we
|
1195
|
+
# upgrade gRPC version to include the fix for the protobuf map
|
1196
|
+
# corruption issue.
|
1197
|
+
Google::Apis::LoggingV2beta1::MonitoredResource.new(
|
1198
|
+
type: resource.type,
|
1199
|
+
labels: resource.labels.to_h
|
1200
|
+
)
|
1201
|
+
end
|
1202
|
+
|
1203
|
+
# Issue a request to the Metadata Agent's local API and parse the response
|
1204
|
+
# to JSON. Return nil in case of failure.
|
1205
|
+
def query_metadata_agent(path)
|
1206
|
+
url = "#{@metadata_agent_url}/#{path}"
|
1207
|
+
@log.debug("Calling Metadata Agent: #{url}")
|
1208
|
+
open(url) do |f|
|
1209
|
+
response = f.read
|
1210
|
+
parsed_hash = parse_json_or_nil(response)
|
1211
|
+
if parsed_hash.nil?
|
1212
|
+
@log.error 'Response from Metadata Agent is not in valid json ' \
|
1213
|
+
"format: '#{response.inspect}'."
|
1214
|
+
return nil
|
1215
|
+
end
|
1216
|
+
@log.debug "Response from Metadata Agent: #{parsed_hash}"
|
1217
|
+
return parsed_hash
|
1218
|
+
end
|
1219
|
+
rescue StandardError => e
|
1220
|
+
@log.error 'Error calling Metadata Agent.', error: e
|
1122
1221
|
end
|
1123
1222
|
|
1124
1223
|
# TODO: This functionality should eventually be available in another
|
@@ -1211,7 +1310,7 @@ module Fluent
|
|
1211
1310
|
[ts_secs, ts_nanos]
|
1212
1311
|
end
|
1213
1312
|
|
1214
|
-
def compute_severity(resource_type, record,
|
1313
|
+
def compute_severity(resource_type, record, entry_level_common_labels)
|
1215
1314
|
if resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type]
|
1216
1315
|
if @cloudfunctions_log_match && @cloudfunctions_log_match['severity']
|
1217
1316
|
return parse_severity(@cloudfunctions_log_match['severity'])
|
@@ -1226,9 +1325,8 @@ module Fluent
|
|
1226
1325
|
end
|
1227
1326
|
elsif record.key?('severity')
|
1228
1327
|
return parse_severity(record.delete('severity'))
|
1229
|
-
elsif resource_type ==
|
1230
|
-
|
1231
|
-
stream = entry_common_labels["#{CONTAINER_CONSTANTS[:service]}/stream"]
|
1328
|
+
elsif resource_type == GKE_CONSTANTS[:resource_type]
|
1329
|
+
stream = entry_level_common_labels["#{GKE_CONSTANTS[:service]}/stream"]
|
1232
1330
|
if stream == 'stdout'
|
1233
1331
|
return 'INFO'
|
1234
1332
|
elsif stream == 'stderr'
|
@@ -1452,9 +1550,8 @@ module Fluent
|
|
1452
1550
|
hash.nil? || !hash.is_a?(Hash)
|
1453
1551
|
label_map.each_with_object({}) \
|
1454
1552
|
do |(original_label, new_label), extracted_labels|
|
1455
|
-
|
1456
|
-
|
1457
|
-
hash.key?(original_label)
|
1553
|
+
value = hash.delete(original_label)
|
1554
|
+
extracted_labels[new_label] = convert_to_utf8(value.to_s) if value
|
1458
1555
|
end
|
1459
1556
|
end
|
1460
1557
|
|
@@ -1520,7 +1617,8 @@ module Fluent
|
|
1520
1617
|
text_payload = record['log']
|
1521
1618
|
elsif is_json
|
1522
1619
|
json_payload = record
|
1523
|
-
elsif
|
1620
|
+
elsif [GKE_CONSTANTS[:resource_type],
|
1621
|
+
DOCKER_CONSTANTS[:resource_type]].include?(resource_type) &&
|
1524
1622
|
record.key?('log')
|
1525
1623
|
text_payload = record['log']
|
1526
1624
|
elsif record.size == 1 && record.key?('message')
|
@@ -1550,7 +1648,7 @@ module Fluent
|
|
1550
1648
|
elsif resource.type == APPENGINE_CONSTANTS[:resource_type]
|
1551
1649
|
# Add a prefix to Managed VM logs to prevent namespace collisions.
|
1552
1650
|
tag = "#{APPENGINE_CONSTANTS[:service]}/#{tag}"
|
1553
|
-
elsif resource.type ==
|
1651
|
+
elsif resource.type == GKE_CONSTANTS[:resource_type]
|
1554
1652
|
# For Kubernetes logs, use just the container name as the log name
|
1555
1653
|
# if we have it.
|
1556
1654
|
if resource.labels && resource.labels.key?('container_name')
|