fluent-plugin-google-cloud 0.6.7 → 0.6.8.pre.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/fluent-plugin-google-cloud.gemspec +1 -1
- data/lib/fluent/plugin/out_google_cloud.rb +287 -189
- data/test/plugin/base_test.rb +254 -2
- data/test/plugin/constants.rb +146 -13
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f013270937ffeb8ac1addeb0b50cbd04b9434d8f
|
4
|
+
data.tar.gz: a19be3b8f5b49266e6055be20c6a8c693fae1fea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cfc74c7b17e028776423d30fbc438d1d5d146d532991bb2331044a35f54e1a6acfd6f14e6bc4b234f924ac5e6d62b268931b8c856d07532b9c610c85579f3906
|
7
|
+
data.tar.gz: bb14fdffce9b35b5c1ff4d9609d5ec473d19d69b74972b3ba20bc843d28e1118be19b89b112b961d7b903e246612eca551780c3e9c202fa852ad8bdec6f50f2d
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fluent-plugin-google-cloud (0.6.
|
4
|
+
fluent-plugin-google-cloud (0.6.8.pre.1)
|
5
5
|
fluentd (~> 0.10)
|
6
6
|
google-api-client (~> 0.9.0)
|
7
7
|
google-cloud-logging (= 0.24.1)
|
@@ -55,7 +55,7 @@ GEM
|
|
55
55
|
googleauth (~> 0.5.1)
|
56
56
|
grpc (~> 1.0)
|
57
57
|
rly (~> 0.2.3)
|
58
|
-
google-protobuf (3.4.0.2
|
58
|
+
google-protobuf (3.4.0.2)
|
59
59
|
googleapis-common-protos (1.3.5)
|
60
60
|
google-protobuf (~> 3.2)
|
61
61
|
grpc (~> 1.0)
|
@@ -67,7 +67,7 @@ GEM
|
|
67
67
|
multi_json (~> 1.11)
|
68
68
|
os (~> 0.9)
|
69
69
|
signet (~> 0.7)
|
70
|
-
grpc (1.2.5
|
70
|
+
grpc (1.2.5)
|
71
71
|
google-protobuf (~> 3.1)
|
72
72
|
googleauth (~> 0.5.1)
|
73
73
|
hashdiff (0.3.6)
|
@@ -10,7 +10,7 @@ eos
|
|
10
10
|
gem.homepage = \
|
11
11
|
'https://github.com/GoogleCloudPlatform/fluent-plugin-google-cloud'
|
12
12
|
gem.license = 'Apache-2.0'
|
13
|
-
gem.version = '0.6.
|
13
|
+
gem.version = '0.6.8.pre.1'
|
14
14
|
gem.authors = ['Todd Derr', 'Alex Robinson']
|
15
15
|
gem.email = ['salty@google.com']
|
16
16
|
gem.required_ruby_version = Gem::Requirement.new('>= 2.0')
|
@@ -38,12 +38,12 @@ end
|
|
38
38
|
module Fluent
|
39
39
|
# fluentd output plugin for the Stackdriver Logging API
|
40
40
|
class GoogleCloudOutput < BufferedOutput
|
41
|
-
# Constants for service names
|
42
|
-
module
|
41
|
+
# Constants for service names, resource types and etc.
|
42
|
+
module ServiceConstants
|
43
43
|
APPENGINE_CONSTANTS = {
|
44
44
|
service: 'appengine.googleapis.com',
|
45
45
|
resource_type: 'gae_app',
|
46
|
-
metadata_attributes: %w(gae_backend_name gae_backend_version)
|
46
|
+
metadata_attributes: %w(gae_backend_name gae_backend_version)
|
47
47
|
}
|
48
48
|
CLOUDFUNCTIONS_CONSTANTS = {
|
49
49
|
service: 'cloudfunctions.googleapis.com',
|
@@ -53,12 +53,16 @@ module Fluent
|
|
53
53
|
service: 'compute.googleapis.com',
|
54
54
|
resource_type: 'gce_instance'
|
55
55
|
}
|
56
|
-
|
56
|
+
GKE_CONSTANTS = {
|
57
57
|
service: 'container.googleapis.com',
|
58
58
|
resource_type: 'container',
|
59
59
|
extra_resource_labels: %w(namespace_id pod_id container_name),
|
60
60
|
extra_common_labels: %w(namespace_name pod_name),
|
61
|
-
metadata_attributes: %w(kube-env)
|
61
|
+
metadata_attributes: %w(kube-env)
|
62
|
+
}
|
63
|
+
DOCKER_CONSTANTS = {
|
64
|
+
service: 'docker.googleapis.com',
|
65
|
+
resource_type: 'docker_container'
|
62
66
|
}
|
63
67
|
DATAFLOW_CONSTANTS = {
|
64
68
|
service: 'dataflow.googleapis.com',
|
@@ -68,8 +72,7 @@ module Fluent
|
|
68
72
|
DATAPROC_CONSTANTS = {
|
69
73
|
service: 'cluster.dataproc.googleapis.com',
|
70
74
|
resource_type: 'cloud_dataproc_cluster',
|
71
|
-
metadata_attributes:
|
72
|
-
%w(dataproc-cluster-uuid dataproc-cluster-name).to_set
|
75
|
+
metadata_attributes: %w(dataproc-cluster-uuid dataproc-cluster-name)
|
73
76
|
}
|
74
77
|
EC2_CONSTANTS = {
|
75
78
|
service: 'ec2.amazonaws.com',
|
@@ -83,7 +86,7 @@ module Fluent
|
|
83
86
|
|
84
87
|
# The map between a subservice name and a resource type.
|
85
88
|
SUBSERVICE_MAP = \
|
86
|
-
[APPENGINE_CONSTANTS,
|
89
|
+
[APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAFLOW_CONSTANTS,
|
87
90
|
DATAPROC_CONSTANTS, ML_CONSTANTS]
|
88
91
|
.map { |consts| [consts[:service], consts[:resource_type]] }.to_h
|
89
92
|
# Default back to GCE if invalid value is detected.
|
@@ -91,18 +94,34 @@ module Fluent
|
|
91
94
|
|
92
95
|
# The map between a resource type and expected subservice attributes.
|
93
96
|
SUBSERVICE_METADATA_ATTRIBUTES = \
|
94
|
-
[APPENGINE_CONSTANTS,
|
95
|
-
|
96
|
-
.to_h
|
97
|
+
[APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAPROC_CONSTANTS].map do |consts|
|
98
|
+
[consts[:resource_type], consts[:metadata_attributes].to_set]
|
99
|
+
end.to_h
|
100
|
+
end
|
97
101
|
|
98
|
-
|
99
|
-
|
100
|
-
|
102
|
+
# Constants for configuration.
|
103
|
+
module ConfigConstants
|
104
|
+
# Default values for JSON payload keys to set the "httpRequest",
|
105
|
+
# "operation", "sourceLocation", "trace" fields in the LogEntry.
|
101
106
|
DEFAULT_HTTP_REQUEST_KEY = 'httpRequest'
|
102
|
-
DEFAULT_OPERATION_KEY =
|
103
|
-
DEFAULT_SOURCE_LOCATION_KEY =
|
104
|
-
|
105
|
-
|
107
|
+
DEFAULT_OPERATION_KEY = 'logging.googleapis.com/operation'
|
108
|
+
DEFAULT_SOURCE_LOCATION_KEY = 'logging.googleapis.com/sourceLocation'
|
109
|
+
DEFAULT_TRACE_KEY = 'logging.googleapis.com/trace'
|
110
|
+
|
111
|
+
DEFAULT_METADATA_AGENT_URL =
|
112
|
+
'http://local-metadata-agent.stackdriver.com:8000'
|
113
|
+
end
|
114
|
+
|
115
|
+
# Constants for log entry field extraction.
|
116
|
+
module InternalConstants
|
117
|
+
# Use empty string as request path when the local_resource_id of monitored
|
118
|
+
# resource can be implicitly inferred by Metadata Agent.
|
119
|
+
IMPLICIT_LOCAL_RESOURCE_ID = ''
|
120
|
+
|
121
|
+
# The label name of local_resource_id in the json payload. When a record
|
122
|
+
# has this field in the payload, we will use the value to retrieve
|
123
|
+
# monitored resource from Stackdriver Metadata agent.
|
124
|
+
LOCAL_RESOURCE_ID_KEY = 'logging.googleapis.com/local_resource_id'
|
106
125
|
|
107
126
|
# Map from each field name under LogEntry to corresponding variables
|
108
127
|
# required to perform field value extraction from the log record.
|
@@ -155,12 +174,14 @@ module Fluent
|
|
155
174
|
}
|
156
175
|
end
|
157
176
|
|
158
|
-
include self::
|
177
|
+
include self::ServiceConstants
|
178
|
+
include self::ConfigConstants
|
179
|
+
include self::InternalConstants
|
159
180
|
|
160
181
|
Fluent::Plugin.register_output('google_cloud', self)
|
161
182
|
|
162
183
|
PLUGIN_NAME = 'Fluentd Google Cloud Logging plugin'
|
163
|
-
PLUGIN_VERSION = '0.6.
|
184
|
+
PLUGIN_VERSION = '0.6.8.pre.1'
|
164
185
|
|
165
186
|
# Name of the the Google cloud logging write scope.
|
166
187
|
LOGGING_SCOPE = 'https://www.googleapis.com/auth/logging.write'
|
@@ -295,6 +316,11 @@ module Fluent
|
|
295
316
|
config_param :monitoring_type, :string,
|
296
317
|
:default => Monitoring::PrometheusMonitoringRegistry.name
|
297
318
|
|
319
|
+
# Whether to call metadata agent to retrieve monitored resource.
|
320
|
+
config_param :enable_metadata_agent, :bool, :default => false
|
321
|
+
config_param :metadata_agent_url, :string,
|
322
|
+
:default => DEFAULT_METADATA_AGENT_URL
|
323
|
+
|
298
324
|
# rubocop:enable Style/HashSyntax
|
299
325
|
|
300
326
|
# TODO: Add a log_name config option rather than just using the tag?
|
@@ -354,28 +380,32 @@ module Fluent
|
|
354
380
|
|
355
381
|
@platform = detect_platform
|
356
382
|
|
357
|
-
# Set
|
358
|
-
#
|
359
|
-
#
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
383
|
+
# Set agent-level monitored resource. This monitored resource is initiated
|
384
|
+
# as the logging agent starts up. It will be inherited by all log entries
|
385
|
+
# processed by this agent. First try to retrieve it via Metadata Agent.
|
386
|
+
if @enable_metadata_agent
|
387
|
+
# The local_resource_id for this should be the instance id. Since this
|
388
|
+
# can be implicitly inferred by Metadata Agent, we do not need to
|
389
|
+
# explicitly send the key.
|
390
|
+
# TODO(qingling128): Remove this logic once the resource is retrieved at
|
391
|
+
# a proper time (b/65175256).
|
392
|
+
@resource = query_metadata_agent_for_monitored_resource(
|
393
|
+
IMPLICIT_LOCAL_RESOURCE_ID)
|
394
|
+
end
|
395
|
+
|
396
|
+
# Set required variables: @project_id, @vm_id, @vm_name and @zone.
|
366
397
|
set_required_metadata_variables
|
367
398
|
|
368
399
|
# Retrieve monitored resource.
|
369
|
-
#
|
370
|
-
#
|
371
|
-
|
372
|
-
@resource = determine_agent_level_monitored_resource_via_legacy
|
400
|
+
# Fail over to retrieve monitored resource via the legacy path if we fail
|
401
|
+
# to get it from Metadata Agent.
|
402
|
+
@resource ||= determine_agent_level_monitored_resource_via_legacy
|
373
403
|
|
374
404
|
# Set regexp that we should match tags against later on. Using a list
|
375
405
|
# instead of a map to ensure order. For example, tags will be matched
|
376
406
|
# against Cloud Functions first, then GKE.
|
377
407
|
@tag_regexp_list = []
|
378
|
-
if @resource.type ==
|
408
|
+
if @resource.type == GKE_CONSTANTS[:resource_type]
|
379
409
|
# We only support Cloud Functions logs for GKE right now.
|
380
410
|
if fetch_gce_metadata('instance/attributes/'
|
381
411
|
).split.include?('gcf_region')
|
@@ -388,7 +418,7 @@ module Fluent
|
|
388
418
|
]
|
389
419
|
end
|
390
420
|
@tag_regexp_list << [
|
391
|
-
|
421
|
+
GKE_CONSTANTS[:resource_type], @compiled_kubernetes_tag_regexp
|
392
422
|
]
|
393
423
|
end
|
394
424
|
|
@@ -404,7 +434,7 @@ module Fluent
|
|
404
434
|
|
405
435
|
# Log an informational message containing the Logs viewer URL
|
406
436
|
@log.info 'Logs viewer address: https://console.cloud.google.com/logs/',
|
407
|
-
"viewer?project=#{@project_id}&resource=#{@
|
437
|
+
"viewer?project=#{@project_id}&resource=#{@resource.type}/",
|
408
438
|
"instance_id/#{@vm_id}"
|
409
439
|
end
|
410
440
|
|
@@ -420,39 +450,27 @@ module Fluent
|
|
420
450
|
end
|
421
451
|
|
422
452
|
def write(chunk)
|
423
|
-
|
424
|
-
grouped_entries = {}
|
425
|
-
chunk.msgpack_each do |tag, *arr|
|
426
|
-
sanitized_tag = sanitize_tag(tag)
|
427
|
-
if sanitized_tag.nil?
|
428
|
-
@log.warn "Dropping log entries with invalid tag: '#{tag}'. " \
|
429
|
-
'A tag should be a string with utf8 characters.'
|
430
|
-
next
|
431
|
-
end
|
432
|
-
grouped_entries[sanitized_tag] ||= []
|
433
|
-
grouped_entries[sanitized_tag].push(arr)
|
434
|
-
end
|
453
|
+
grouped_entries = group_log_entries_by_tag_and_local_resource_id(chunk)
|
435
454
|
|
436
|
-
grouped_entries.each do |tag, arr|
|
455
|
+
grouped_entries.each do |(tag, local_resource_id), arr|
|
437
456
|
entries = []
|
438
|
-
|
439
|
-
determine_group_level_monitored_resource_and_labels(
|
457
|
+
group_level_resource, group_level_common_labels =
|
458
|
+
determine_group_level_monitored_resource_and_labels(
|
459
|
+
tag, local_resource_id)
|
440
460
|
|
441
461
|
arr.each do |time, record|
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
determine_entry_level_labels(group_resource, record)
|
446
|
-
entry_resource = group_resource.dup
|
447
|
-
entry_resource.labels.merge!(extracted_resource_labels)
|
448
|
-
entry_common_labels = \
|
449
|
-
group_common_labels.merge(extracted_common_labels)
|
462
|
+
entry_level_resource, entry_level_common_labels =
|
463
|
+
determine_entry_level_monitored_resource_and_labels(
|
464
|
+
group_level_resource, group_level_common_labels, record)
|
450
465
|
|
451
466
|
is_json = false
|
452
467
|
if @detect_json
|
453
|
-
# Save the timestamp if available, then clear it out to
|
454
|
-
# determining whether we should parse the log or message
|
468
|
+
# Save the timestamp and severity if available, then clear it out to
|
469
|
+
# allow for determining whether we should parse the log or message
|
470
|
+
# field.
|
455
471
|
timestamp = record.delete('time')
|
472
|
+
severity = record.delete('severity')
|
473
|
+
|
456
474
|
# If the log is json, we want to export it as a structured log
|
457
475
|
# unless there is additional metadata that would be lost.
|
458
476
|
record_json = nil
|
@@ -467,16 +485,16 @@ module Fluent
|
|
467
485
|
record = record_json
|
468
486
|
is_json = true
|
469
487
|
end
|
470
|
-
# Restore timestamp if necessary.
|
471
|
-
|
472
|
-
|
473
|
-
|
488
|
+
# Restore timestamp and severity if necessary. Note that we don't
|
489
|
+
# want to override these keys in the JSON we've just parsed.
|
490
|
+
record['time'] ||= timestamp if timestamp
|
491
|
+
record['severity'] ||= severity if severity
|
474
492
|
end
|
475
493
|
|
476
494
|
ts_secs, ts_nanos = compute_timestamp(
|
477
|
-
|
495
|
+
entry_level_resource.type, record, time)
|
478
496
|
severity = compute_severity(
|
479
|
-
|
497
|
+
entry_level_resource.type, record, entry_level_common_labels)
|
480
498
|
|
481
499
|
ts_secs = begin
|
482
500
|
Integer ts_secs
|
@@ -488,12 +506,13 @@ module Fluent
|
|
488
506
|
rescue ArgumentError, TypeError
|
489
507
|
ts_nanos
|
490
508
|
end
|
509
|
+
|
491
510
|
if @use_grpc
|
492
511
|
entry = Google::Logging::V2::LogEntry.new(
|
493
|
-
labels:
|
512
|
+
labels: entry_level_common_labels,
|
494
513
|
resource: Google::Api::MonitoredResource.new(
|
495
|
-
type:
|
496
|
-
labels:
|
514
|
+
type: entry_level_resource.type,
|
515
|
+
labels: entry_level_resource.labels.to_h
|
497
516
|
),
|
498
517
|
severity: grpc_severity(severity)
|
499
518
|
)
|
@@ -510,10 +529,11 @@ module Fluent
|
|
510
529
|
end
|
511
530
|
else
|
512
531
|
# Remove the labels if we didn't populate them with anything.
|
513
|
-
|
532
|
+
entry_level_resource.labels = nil if
|
533
|
+
entry_level_resource.labels.empty?
|
514
534
|
entry = Google::Apis::LoggingV2beta1::LogEntry.new(
|
515
|
-
labels:
|
516
|
-
resource:
|
535
|
+
labels: entry_level_common_labels,
|
536
|
+
resource: entry_level_resource,
|
517
537
|
severity: severity,
|
518
538
|
timestamp: {
|
519
539
|
seconds: ts_secs,
|
@@ -522,13 +542,12 @@ module Fluent
|
|
522
542
|
)
|
523
543
|
end
|
524
544
|
|
525
|
-
# Get fully-qualified trace id for LogEntry "trace" field
|
545
|
+
# Get fully-qualified trace id for LogEntry "trace" field.
|
526
546
|
fq_trace_id = record.delete(@trace_key)
|
527
547
|
entry.trace = fq_trace_id if fq_trace_id
|
528
548
|
|
529
549
|
set_log_entry_fields(record, entry)
|
530
|
-
|
531
|
-
set_payload(entry_resource.type, record, entry, is_json)
|
550
|
+
set_payload(entry_level_resource.type, record, entry, is_json)
|
532
551
|
|
533
552
|
entries.push(entry)
|
534
553
|
end
|
@@ -536,21 +555,21 @@ module Fluent
|
|
536
555
|
next if entries.empty?
|
537
556
|
|
538
557
|
log_name = "projects/#{@project_id}/logs/#{log_name(
|
539
|
-
tag,
|
558
|
+
tag, group_level_resource)}"
|
540
559
|
|
541
560
|
# Does the actual write to the cloud logging api.
|
542
561
|
client = api_client
|
543
562
|
if @use_grpc
|
544
563
|
begin
|
545
|
-
labels_utf8_pairs =
|
564
|
+
labels_utf8_pairs = group_level_common_labels.map do |k, v|
|
546
565
|
[k.encode('utf-8'), convert_to_utf8(v)]
|
547
566
|
end
|
548
567
|
|
549
568
|
write_request = Google::Logging::V2::WriteLogEntriesRequest.new(
|
550
569
|
log_name: log_name,
|
551
570
|
resource: Google::Api::MonitoredResource.new(
|
552
|
-
type:
|
553
|
-
labels:
|
571
|
+
type: group_level_resource.type,
|
572
|
+
labels: group_level_resource.labels.to_h
|
554
573
|
),
|
555
574
|
labels: labels_utf8_pairs.to_h,
|
556
575
|
entries: entries
|
@@ -560,8 +579,8 @@ module Fluent
|
|
560
579
|
increment_successful_requests_count
|
561
580
|
increment_ingested_entries_count(entries.length)
|
562
581
|
|
563
|
-
# Let the user explicitly know when the first call succeeded,
|
564
|
-
#
|
582
|
+
# Let the user explicitly know when the first call succeeded, to aid
|
583
|
+
# with verification and troubleshooting.
|
565
584
|
unless @successful_call
|
566
585
|
@successful_call = true
|
567
586
|
@log.info 'Successfully sent gRPC to Stackdriver Logging API.'
|
@@ -600,8 +619,8 @@ module Fluent
|
|
600
619
|
@log.warn "Dropping #{dropped} log message(s)",
|
601
620
|
error: error.to_s, error_code: error.code.to_s
|
602
621
|
else
|
603
|
-
# Assume this is a problem with the request itself
|
604
|
-
#
|
622
|
+
# Assume this is a problem with the request itself and don't
|
623
|
+
# retry.
|
605
624
|
dropped = entries.length
|
606
625
|
increment_dropped_entries_count(dropped)
|
607
626
|
@log.error "Unknown response code #{error.code} from the "\
|
@@ -614,8 +633,8 @@ module Fluent
|
|
614
633
|
write_request = \
|
615
634
|
Google::Apis::LoggingV2beta1::WriteLogEntriesRequest.new(
|
616
635
|
log_name: log_name,
|
617
|
-
resource:
|
618
|
-
labels:
|
636
|
+
resource: group_level_resource,
|
637
|
+
labels: group_level_common_labels,
|
619
638
|
entries: entries)
|
620
639
|
|
621
640
|
# TODO: RequestOptions
|
@@ -628,8 +647,8 @@ module Fluent
|
|
628
647
|
increment_successful_requests_count
|
629
648
|
increment_ingested_entries_count(entries.length)
|
630
649
|
|
631
|
-
# Let the user explicitly know when the first call succeeded,
|
632
|
-
#
|
650
|
+
# Let the user explicitly know when the first call succeeded, to aid
|
651
|
+
# with verification and troubleshooting.
|
633
652
|
unless @successful_call
|
634
653
|
@successful_call = true
|
635
654
|
@log.info 'Successfully sent to Stackdriver Logging API.'
|
@@ -820,8 +839,9 @@ module Fluent
|
|
820
839
|
|
821
840
|
# Retrieve monitored resource via the legacy way.
|
822
841
|
#
|
823
|
-
#
|
824
|
-
#
|
842
|
+
# Note: This is just a failover plan if we fail to get metadata from
|
843
|
+
# Metadata Agent. Thus it should be equivalent to what Metadata Agent
|
844
|
+
# returns.
|
825
845
|
def determine_agent_level_monitored_resource_via_legacy
|
826
846
|
resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
|
827
847
|
labels: {})
|
@@ -883,7 +903,7 @@ module Fluent
|
|
883
903
|
}
|
884
904
|
|
885
905
|
# GKE container.
|
886
|
-
when
|
906
|
+
when GKE_CONSTANTS[:resource_type]
|
887
907
|
raw_kube_env = fetch_gce_metadata('instance/attributes/kube-env')
|
888
908
|
kube_env = YAML.load(raw_kube_env)
|
889
909
|
return {
|
@@ -918,7 +938,7 @@ module Fluent
|
|
918
938
|
rescue StandardError => e
|
919
939
|
@log.error "Failed to set monitored resource labels for #{type}: ",
|
920
940
|
error: e
|
921
|
-
|
941
|
+
{}
|
922
942
|
end
|
923
943
|
|
924
944
|
# Determine the common labels that should be added to all log entries
|
@@ -942,7 +962,7 @@ module Fluent
|
|
942
962
|
|
943
963
|
# GCE instance and GKE container.
|
944
964
|
when COMPUTE_CONSTANTS[:resource_type],
|
945
|
-
|
965
|
+
GKE_CONSTANTS[:resource_type]
|
946
966
|
labels.merge!(
|
947
967
|
"#{COMPUTE_CONSTANTS[:service]}/resource_name" => @vm_name)
|
948
968
|
|
@@ -954,138 +974,175 @@ module Fluent
|
|
954
974
|
labels
|
955
975
|
end
|
956
976
|
|
957
|
-
#
|
958
|
-
#
|
959
|
-
def
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
[group_resource, group_common_labels]
|
977
|
+
# Group the log entries by tag and local_resource_id pairs. Also filter out
|
978
|
+
# invalid non-Hash entries.
|
979
|
+
def group_log_entries_by_tag_and_local_resource_id(chunk)
|
980
|
+
groups = {}
|
981
|
+
chunk.msgpack_each do |tag, time, record|
|
982
|
+
unless record.is_a?(Hash)
|
983
|
+
@log.warn 'Dropping log entries with malformed record: ' \
|
984
|
+
"'#{record.inspect}'. " \
|
985
|
+
'A log record should be in JSON format.'
|
986
|
+
next
|
987
|
+
end
|
988
|
+
sanitized_tag = sanitize_tag(tag)
|
989
|
+
if sanitized_tag.nil?
|
990
|
+
@log.warn "Dropping log entries with invalid tag: '#{tag.inspect}'." \
|
991
|
+
' A tag should be a string with utf8 characters.'
|
992
|
+
next
|
993
|
+
end
|
994
|
+
local_resource_id = record.delete(LOCAL_RESOURCE_ID_KEY)
|
995
|
+
# A nil local_resource_id means "fall back to legacy".
|
996
|
+
hash_key = [sanitized_tag, local_resource_id].freeze
|
997
|
+
groups[hash_key] ||= []
|
998
|
+
groups[hash_key].push([time, record])
|
999
|
+
end
|
1000
|
+
groups
|
982
1001
|
end
|
983
1002
|
|
984
|
-
# Determine group level monitored resource
|
985
|
-
# entries.
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
1003
|
+
# Determine the group level monitored resource and common labels shared by a
|
1004
|
+
# collection of entries.
|
1005
|
+
def determine_group_level_monitored_resource_and_labels(tag,
|
1006
|
+
local_resource_id)
|
1007
|
+
resource = @resource.dup
|
1008
|
+
resource.labels = @resource.labels.dup
|
1009
|
+
common_labels = @common_labels.dup
|
1010
|
+
|
1011
|
+
# Change the resource type and set matched_regexp_group if the tag matches
|
1012
|
+
# certain regexp.
|
1013
|
+
matched_regexp_group = nil # @tag_regexp_list can be an empty list.
|
990
1014
|
@tag_regexp_list.each do |derived_type, tag_regexp|
|
991
|
-
|
992
|
-
|
993
|
-
|
1015
|
+
matched_regexp_group = tag_regexp.match(tag)
|
1016
|
+
if matched_regexp_group
|
1017
|
+
resource.type = derived_type
|
1018
|
+
break
|
1019
|
+
end
|
994
1020
|
end
|
995
|
-
[@resource.type, nil]
|
996
|
-
end
|
997
1021
|
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1022
|
+
# Determine the monitored resource based on the local_resource_id.
|
1023
|
+
# Different monitored resource types have unique ids in different format.
|
1024
|
+
# We will query Metadata Agent for the monitored resource. Return the
|
1025
|
+
# legacy monitored resource (either the instance resource or the resource
|
1026
|
+
# inferred from the tag) if failed to get a monitored resource from
|
1027
|
+
# Metadata Agent with this key.
|
1028
|
+
#
|
1029
|
+
# Docker container:
|
1030
|
+
# "container.<container_id>"
|
1031
|
+
# "containerName.<container_name>"
|
1032
|
+
# GKE container:
|
1033
|
+
# "gke_containerName.<namespace_id>.<pod_name>.<container_name>"
|
1034
|
+
if @enable_metadata_agent && local_resource_id
|
1035
|
+
@log.debug 'Calling metadata agent with local_resource_id: ' \
|
1036
|
+
"#{local_resource_id}."
|
1037
|
+
retrieved_resource = query_metadata_agent_for_monitored_resource(
|
1038
|
+
local_resource_id)
|
1039
|
+
@log.debug 'Retrieved monitored resource from metadata agent: ' \
|
1040
|
+
"#{retrieved_resource.inspect}."
|
1041
|
+
if retrieved_resource
|
1042
|
+
resource = retrieved_resource
|
1043
|
+
# TODO(qingling128): Fix this temporary renaming from 'gke_container'
|
1044
|
+
# to 'container'.
|
1045
|
+
resource.type = 'container' if resource.type == 'gke_container'
|
1046
|
+
end
|
1047
|
+
end
|
1003
1048
|
|
1004
|
-
|
1049
|
+
# Once the resource type is settled down, determine the labels.
|
1050
|
+
case resource.type
|
1005
1051
|
# Cloud Functions.
|
1006
1052
|
when CLOUDFUNCTIONS_CONSTANTS[:resource_type]
|
1007
|
-
|
1053
|
+
resource.labels.merge!(
|
1008
1054
|
'region' => @gcf_region,
|
1009
1055
|
'function_name' => decode_cloudfunctions_function_name(
|
1010
|
-
|
1056
|
+
matched_regexp_group['encoded_function_name'])
|
1011
1057
|
)
|
1012
|
-
|
1013
|
-
|
1014
|
-
|
1015
|
-
"#{CONTAINER_CONSTANTS[:service]}/instance_id" => instance_id,
|
1058
|
+
instance_id = resource.labels.delete('instance_id')
|
1059
|
+
common_labels.merge!(
|
1060
|
+
"#{GKE_CONSTANTS[:service]}/instance_id" => instance_id,
|
1016
1061
|
"#{COMPUTE_CONSTANTS[:service]}/resource_id" => instance_id,
|
1017
|
-
"#{
|
1018
|
-
|
1062
|
+
"#{GKE_CONSTANTS[:service]}/cluster_name" =>
|
1063
|
+
resource.labels.delete('cluster_name'),
|
1019
1064
|
"#{COMPUTE_CONSTANTS[:service]}/zone" =>
|
1020
|
-
|
1065
|
+
resource.labels.delete('zone')
|
1021
1066
|
)
|
1022
1067
|
|
1023
1068
|
# GKE container.
|
1024
|
-
when
|
1025
|
-
if
|
1069
|
+
when GKE_CONSTANTS[:resource_type]
|
1070
|
+
if matched_regexp_group
|
1026
1071
|
# We only expect one occurrence of each key in the match group.
|
1027
1072
|
resource_labels_candidates =
|
1028
|
-
|
1029
|
-
common_labels_candidates =
|
1030
|
-
|
1031
|
-
group_resource_labels.merge!(
|
1073
|
+
matched_regexp_group.names.zip(matched_regexp_group.captures).to_h
|
1074
|
+
common_labels_candidates = resource_labels_candidates.dup
|
1075
|
+
resource.labels.merge!(
|
1032
1076
|
delete_and_extract_labels(
|
1033
1077
|
resource_labels_candidates,
|
1034
1078
|
# The kubernetes_tag_regexp is poorly named. 'namespace_name' is
|
1035
1079
|
# in fact 'namespace_id'. 'pod_name' is in fact 'pod_id'.
|
1036
1080
|
# TODO(qingling128): Figure out how to put this map into
|
1037
|
-
# constants like
|
1081
|
+
# constants like GKE_CONSTANTS[:extra_resource_labels].
|
1038
1082
|
'container_name' => 'container_name',
|
1039
1083
|
'namespace_name' => 'namespace_id',
|
1040
1084
|
'pod_name' => 'pod_id'))
|
1041
1085
|
|
1042
|
-
|
1086
|
+
common_labels.merge!(
|
1043
1087
|
delete_and_extract_labels(
|
1044
1088
|
common_labels_candidates,
|
1045
|
-
|
1046
|
-
.map { |l| [l, "#{
|
1089
|
+
GKE_CONSTANTS[:extra_common_labels]
|
1090
|
+
.map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h))
|
1047
1091
|
end
|
1092
|
+
|
1093
|
+
# Docker container.
|
1094
|
+
# TODO(qingling128): Remove this logic once the resource is retrieved at a
|
1095
|
+
# proper time (b/65175256).
|
1096
|
+
when DOCKER_CONSTANTS[:resource_type]
|
1097
|
+
common_labels.delete("#{COMPUTE_CONSTANTS[:service]}/resource_name")
|
1048
1098
|
end
|
1049
1099
|
|
1050
|
-
|
1100
|
+
resource.freeze
|
1101
|
+
resource.labels.freeze
|
1102
|
+
common_labels.freeze
|
1103
|
+
|
1104
|
+
[resource, common_labels]
|
1051
1105
|
end
|
1052
1106
|
|
1053
|
-
# Extract entry resource and common labels that should be
|
1054
|
-
# individual entries
|
1055
|
-
def
|
1056
|
-
|
1057
|
-
|
1107
|
+
# Extract entry level monitored resource and common labels that should be
|
1108
|
+
# applied to individual entries.
|
1109
|
+
def determine_entry_level_monitored_resource_and_labels(
|
1110
|
+
group_level_resource, group_level_common_labels, record)
|
1111
|
+
resource = group_level_resource.dup
|
1112
|
+
resource.labels = group_level_resource.labels.dup
|
1113
|
+
common_labels = group_level_common_labels.dup
|
1058
1114
|
|
1115
|
+
case resource.type
|
1059
1116
|
# Cloud Functions.
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1117
|
+
when CLOUDFUNCTIONS_CONSTANTS[:resource_type]
|
1118
|
+
if record.key?('log')
|
1119
|
+
@cloudfunctions_log_match =
|
1120
|
+
@compiled_cloudfunctions_log_regexp.match(record['log'])
|
1121
|
+
common_labels['execution_id'] =
|
1122
|
+
@cloudfunctions_log_match['execution_id'] if
|
1123
|
+
@cloudfunctions_log_match &&
|
1124
|
+
@cloudfunctions_log_match['execution_id']
|
1125
|
+
end
|
1069
1126
|
|
1070
|
-
# GKE
|
1071
|
-
|
1127
|
+
# GKE container.
|
1128
|
+
when GKE_CONSTANTS[:resource_type]
|
1072
1129
|
# Move the stdout/stderr annotation from the record into a label.
|
1073
1130
|
common_labels.merge!(
|
1074
1131
|
delete_and_extract_labels(
|
1075
|
-
record, 'stream' => "#{
|
1132
|
+
record, 'stream' => "#{GKE_CONSTANTS[:service]}/stream"))
|
1076
1133
|
|
1077
1134
|
# If the record has been annotated by the kubernetes_metadata_filter
|
1078
1135
|
# plugin, then use that metadata. Otherwise, rely on commonLabels
|
1079
|
-
# populated
|
1136
|
+
# populated from the group's tag.
|
1080
1137
|
if record.key?('kubernetes')
|
1081
|
-
|
1138
|
+
resource.labels.merge!(
|
1082
1139
|
delete_and_extract_labels(
|
1083
|
-
record['kubernetes'],
|
1140
|
+
record['kubernetes'], GKE_CONSTANTS[:extra_resource_labels]
|
1084
1141
|
.map { |l| [l, l] }.to_h))
|
1085
1142
|
common_labels.merge!(
|
1086
1143
|
delete_and_extract_labels(
|
1087
|
-
record['kubernetes'],
|
1088
|
-
.map { |l| [l, "#{
|
1144
|
+
record['kubernetes'], GKE_CONSTANTS[:extra_common_labels]
|
1145
|
+
.map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h))
|
1089
1146
|
# Prepend label/ to all user-defined labels' keys.
|
1090
1147
|
if record['kubernetes'].key?('labels')
|
1091
1148
|
common_labels.merge!(
|
@@ -1111,14 +1168,56 @@ module Fluent
|
|
1111
1168
|
# Report them as monitored resource labels instead of common labels.
|
1112
1169
|
# e.g. "dataflow.googleapis.com/job_id" => "job_id"
|
1113
1170
|
[DATAFLOW_CONSTANTS, ML_CONSTANTS].each do |service_constants|
|
1114
|
-
next unless
|
1115
|
-
|
1171
|
+
next unless resource.type == service_constants[:resource_type]
|
1172
|
+
resource.labels.merge!(
|
1116
1173
|
delete_and_extract_labels(
|
1117
1174
|
common_labels, service_constants[:extra_common_labels]
|
1118
1175
|
.map { |l| ["#{service_constants[:service]}/#{l}", l] }.to_h))
|
1119
1176
|
end
|
1120
1177
|
|
1121
|
-
[
|
1178
|
+
[resource, common_labels]
|
1179
|
+
end
|
1180
|
+
|
1181
|
+
# Call Metadata Agent to get monitored resource information and parse
|
1182
|
+
# response to Google::Api::MonitoredResource.
|
1183
|
+
def query_metadata_agent_for_monitored_resource(local_resource_id)
|
1184
|
+
response = query_metadata_agent("monitoredResource/#{local_resource_id}")
|
1185
|
+
return nil if response.nil?
|
1186
|
+
begin
|
1187
|
+
resource = Google::Api::MonitoredResource.decode_json(response.to_json)
|
1188
|
+
rescue Google::Protobuf::ParseError, ArgumentError => e
|
1189
|
+
@log.error 'Error paring monitored resource from Metadata Agent. ' \
|
1190
|
+
"response: #{response.inspect}", error: e
|
1191
|
+
return nil
|
1192
|
+
end
|
1193
|
+
|
1194
|
+
# TODO(qingling128): Use Google::Api::MonitoredResource directly after we
|
1195
|
+
# upgrade gRPC version to include the fix for the protobuf map
|
1196
|
+
# corruption issue.
|
1197
|
+
Google::Apis::LoggingV2beta1::MonitoredResource.new(
|
1198
|
+
type: resource.type,
|
1199
|
+
labels: resource.labels.to_h
|
1200
|
+
)
|
1201
|
+
end
|
1202
|
+
|
1203
|
+
# Issue a request to the Metadata Agent's local API and parse the response
|
1204
|
+
# to JSON. Return nil in case of failure.
|
1205
|
+
def query_metadata_agent(path)
|
1206
|
+
url = "#{@metadata_agent_url}/#{path}"
|
1207
|
+
@log.debug("Calling Metadata Agent: #{url}")
|
1208
|
+
open(url) do |f|
|
1209
|
+
response = f.read
|
1210
|
+
parsed_hash = parse_json_or_nil(response)
|
1211
|
+
if parsed_hash.nil?
|
1212
|
+
@log.error 'Response from Metadata Agent is not in valid json ' \
|
1213
|
+
"format: '#{response.inspect}'."
|
1214
|
+
return nil
|
1215
|
+
end
|
1216
|
+
@log.debug "Response from Metadata Agent: #{parsed_hash}"
|
1217
|
+
return parsed_hash
|
1218
|
+
end
|
1219
|
+
rescue StandardError => e
|
1220
|
+
@log.error 'Error calling Metadata Agent.', error: e
|
1122
1221
|
end
|
1123
1222
|
|
1124
1223
|
# TODO: This functionality should eventually be available in another
|
@@ -1211,7 +1310,7 @@ module Fluent
|
|
1211
1310
|
[ts_secs, ts_nanos]
|
1212
1311
|
end
|
1213
1312
|
|
1214
|
-
def compute_severity(resource_type, record,
|
1313
|
+
def compute_severity(resource_type, record, entry_level_common_labels)
|
1215
1314
|
if resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type]
|
1216
1315
|
if @cloudfunctions_log_match && @cloudfunctions_log_match['severity']
|
1217
1316
|
return parse_severity(@cloudfunctions_log_match['severity'])
|
@@ -1226,9 +1325,8 @@ module Fluent
|
|
1226
1325
|
end
|
1227
1326
|
elsif record.key?('severity')
|
1228
1327
|
return parse_severity(record.delete('severity'))
|
1229
|
-
elsif resource_type ==
|
1230
|
-
|
1231
|
-
stream = entry_common_labels["#{CONTAINER_CONSTANTS[:service]}/stream"]
|
1328
|
+
elsif resource_type == GKE_CONSTANTS[:resource_type]
|
1329
|
+
stream = entry_level_common_labels["#{GKE_CONSTANTS[:service]}/stream"]
|
1232
1330
|
if stream == 'stdout'
|
1233
1331
|
return 'INFO'
|
1234
1332
|
elsif stream == 'stderr'
|
@@ -1452,9 +1550,8 @@ module Fluent
|
|
1452
1550
|
hash.nil? || !hash.is_a?(Hash)
|
1453
1551
|
label_map.each_with_object({}) \
|
1454
1552
|
do |(original_label, new_label), extracted_labels|
|
1455
|
-
|
1456
|
-
|
1457
|
-
hash.key?(original_label)
|
1553
|
+
value = hash.delete(original_label)
|
1554
|
+
extracted_labels[new_label] = convert_to_utf8(value.to_s) if value
|
1458
1555
|
end
|
1459
1556
|
end
|
1460
1557
|
|
@@ -1520,7 +1617,8 @@ module Fluent
|
|
1520
1617
|
text_payload = record['log']
|
1521
1618
|
elsif is_json
|
1522
1619
|
json_payload = record
|
1523
|
-
elsif
|
1620
|
+
elsif [GKE_CONSTANTS[:resource_type],
|
1621
|
+
DOCKER_CONSTANTS[:resource_type]].include?(resource_type) &&
|
1524
1622
|
record.key?('log')
|
1525
1623
|
text_payload = record['log']
|
1526
1624
|
elsif record.size == 1 && record.key?('message')
|
@@ -1550,7 +1648,7 @@ module Fluent
|
|
1550
1648
|
elsif resource.type == APPENGINE_CONSTANTS[:resource_type]
|
1551
1649
|
# Add a prefix to Managed VM logs to prevent namespace collisions.
|
1552
1650
|
tag = "#{APPENGINE_CONSTANTS[:service]}/#{tag}"
|
1553
|
-
elsif resource.type ==
|
1651
|
+
elsif resource.type == GKE_CONSTANTS[:resource_type]
|
1554
1652
|
# For Kubernetes logs, use just the container name as the log name
|
1555
1653
|
# if we have it.
|
1556
1654
|
if resource.labels && resource.labels.key?('container_name')
|