fluent-plugin-google-cloud 0.6.7 → 0.6.8.pre.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f19f3c278abefebea1b068850a3e7d09a31835e2
4
- data.tar.gz: e4588beb45b2dba23d3901a6a527091134c1f5a6
3
+ metadata.gz: f013270937ffeb8ac1addeb0b50cbd04b9434d8f
4
+ data.tar.gz: a19be3b8f5b49266e6055be20c6a8c693fae1fea
5
5
  SHA512:
6
- metadata.gz: 86369bce68f370907f35d589830ea73330032590ae43bc75b22bc7ff99157997740fdfeac46079c96fd832b90ca7bc36da9e66f8187e4f1c566277e028b38b0f
7
- data.tar.gz: 9c8189c85d4980c160403b09bd2ccb58c3bcd264f36062e994ea7bf63aa3fc70aa79cf357f54a83ff255ae9ceb7d9bee9243866f2a7d00af6b6b2c147001e549
6
+ metadata.gz: cfc74c7b17e028776423d30fbc438d1d5d146d532991bb2331044a35f54e1a6acfd6f14e6bc4b234f924ac5e6d62b268931b8c856d07532b9c610c85579f3906
7
+ data.tar.gz: bb14fdffce9b35b5c1ff4d9609d5ec473d19d69b74972b3ba20bc843d28e1118be19b89b112b961d7b903e246612eca551780c3e9c202fa852ad8bdec6f50f2d
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fluent-plugin-google-cloud (0.6.7)
4
+ fluent-plugin-google-cloud (0.6.8.pre.1)
5
5
  fluentd (~> 0.10)
6
6
  google-api-client (~> 0.9.0)
7
7
  google-cloud-logging (= 0.24.1)
@@ -55,7 +55,7 @@ GEM
55
55
  googleauth (~> 0.5.1)
56
56
  grpc (~> 1.0)
57
57
  rly (~> 0.2.3)
58
- google-protobuf (3.4.0.2-x86_64-linux)
58
+ google-protobuf (3.4.0.2)
59
59
  googleapis-common-protos (1.3.5)
60
60
  google-protobuf (~> 3.2)
61
61
  grpc (~> 1.0)
@@ -67,7 +67,7 @@ GEM
67
67
  multi_json (~> 1.11)
68
68
  os (~> 0.9)
69
69
  signet (~> 0.7)
70
- grpc (1.2.5-x86_64-linux)
70
+ grpc (1.2.5)
71
71
  google-protobuf (~> 3.1)
72
72
  googleauth (~> 0.5.1)
73
73
  hashdiff (0.3.6)
@@ -10,7 +10,7 @@ eos
10
10
  gem.homepage = \
11
11
  'https://github.com/GoogleCloudPlatform/fluent-plugin-google-cloud'
12
12
  gem.license = 'Apache-2.0'
13
- gem.version = '0.6.7'
13
+ gem.version = '0.6.8.pre.1'
14
14
  gem.authors = ['Todd Derr', 'Alex Robinson']
15
15
  gem.email = ['salty@google.com']
16
16
  gem.required_ruby_version = Gem::Requirement.new('>= 2.0')
@@ -38,12 +38,12 @@ end
38
38
  module Fluent
39
39
  # fluentd output plugin for the Stackdriver Logging API
40
40
  class GoogleCloudOutput < BufferedOutput
41
- # Constants for service names and resource types.
42
- module Constants
41
+ # Constants for service names, resource types and etc.
42
+ module ServiceConstants
43
43
  APPENGINE_CONSTANTS = {
44
44
  service: 'appengine.googleapis.com',
45
45
  resource_type: 'gae_app',
46
- metadata_attributes: %w(gae_backend_name gae_backend_version).to_set
46
+ metadata_attributes: %w(gae_backend_name gae_backend_version)
47
47
  }
48
48
  CLOUDFUNCTIONS_CONSTANTS = {
49
49
  service: 'cloudfunctions.googleapis.com',
@@ -53,12 +53,16 @@ module Fluent
53
53
  service: 'compute.googleapis.com',
54
54
  resource_type: 'gce_instance'
55
55
  }
56
- CONTAINER_CONSTANTS = {
56
+ GKE_CONSTANTS = {
57
57
  service: 'container.googleapis.com',
58
58
  resource_type: 'container',
59
59
  extra_resource_labels: %w(namespace_id pod_id container_name),
60
60
  extra_common_labels: %w(namespace_name pod_name),
61
- metadata_attributes: %w(kube-env).to_set
61
+ metadata_attributes: %w(kube-env)
62
+ }
63
+ DOCKER_CONSTANTS = {
64
+ service: 'docker.googleapis.com',
65
+ resource_type: 'docker_container'
62
66
  }
63
67
  DATAFLOW_CONSTANTS = {
64
68
  service: 'dataflow.googleapis.com',
@@ -68,8 +72,7 @@ module Fluent
68
72
  DATAPROC_CONSTANTS = {
69
73
  service: 'cluster.dataproc.googleapis.com',
70
74
  resource_type: 'cloud_dataproc_cluster',
71
- metadata_attributes:
72
- %w(dataproc-cluster-uuid dataproc-cluster-name).to_set
75
+ metadata_attributes: %w(dataproc-cluster-uuid dataproc-cluster-name)
73
76
  }
74
77
  EC2_CONSTANTS = {
75
78
  service: 'ec2.amazonaws.com',
@@ -83,7 +86,7 @@ module Fluent
83
86
 
84
87
  # The map between a subservice name and a resource type.
85
88
  SUBSERVICE_MAP = \
86
- [APPENGINE_CONSTANTS, CONTAINER_CONSTANTS, DATAFLOW_CONSTANTS,
89
+ [APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAFLOW_CONSTANTS,
87
90
  DATAPROC_CONSTANTS, ML_CONSTANTS]
88
91
  .map { |consts| [consts[:service], consts[:resource_type]] }.to_h
89
92
  # Default back to GCE if invalid value is detected.
@@ -91,18 +94,34 @@ module Fluent
91
94
 
92
95
  # The map between a resource type and expected subservice attributes.
93
96
  SUBSERVICE_METADATA_ATTRIBUTES = \
94
- [APPENGINE_CONSTANTS, CONTAINER_CONSTANTS, DATAPROC_CONSTANTS]
95
- .map { |consts| [consts[:resource_type], consts[:metadata_attributes]] }
96
- .to_h
97
+ [APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAPROC_CONSTANTS].map do |consts|
98
+ [consts[:resource_type], consts[:metadata_attributes].to_set]
99
+ end.to_h
100
+ end
97
101
 
98
- # Default values for JSON payload keys to set the "trace",
99
- # "sourceLocation", "operation" and "labels" fields in the LogEntry.
100
- DEFAULT_PAYLOAD_KEY_PREFIX = 'logging.googleapis.com'
102
+ # Constants for configuration.
103
+ module ConfigConstants
104
+ # Default values for JSON payload keys to set the "httpRequest",
105
+ # "operation", "sourceLocation", "trace" fields in the LogEntry.
101
106
  DEFAULT_HTTP_REQUEST_KEY = 'httpRequest'
102
- DEFAULT_OPERATION_KEY = "#{DEFAULT_PAYLOAD_KEY_PREFIX}/operation"
103
- DEFAULT_SOURCE_LOCATION_KEY =
104
- "#{DEFAULT_PAYLOAD_KEY_PREFIX}/sourceLocation"
105
- DEFAULT_TRACE_KEY = "#{DEFAULT_PAYLOAD_KEY_PREFIX}/trace"
107
+ DEFAULT_OPERATION_KEY = 'logging.googleapis.com/operation'
108
+ DEFAULT_SOURCE_LOCATION_KEY = 'logging.googleapis.com/sourceLocation'
109
+ DEFAULT_TRACE_KEY = 'logging.googleapis.com/trace'
110
+
111
+ DEFAULT_METADATA_AGENT_URL =
112
+ 'http://local-metadata-agent.stackdriver.com:8000'
113
+ end
114
+
115
+ # Constants for log entry field extraction.
116
+ module InternalConstants
117
+ # Use empty string as request path when the local_resource_id of monitored
118
+ # resource can be implicitly inferred by Metadata Agent.
119
+ IMPLICIT_LOCAL_RESOURCE_ID = ''
120
+
121
+ # The label name of local_resource_id in the json payload. When a record
122
+ # has this field in the payload, we will use the value to retrieve
123
+ # monitored resource from Stackdriver Metadata agent.
124
+ LOCAL_RESOURCE_ID_KEY = 'logging.googleapis.com/local_resource_id'
106
125
 
107
126
  # Map from each field name under LogEntry to corresponding variables
108
127
  # required to perform field value extraction from the log record.
@@ -155,12 +174,14 @@ module Fluent
155
174
  }
156
175
  end
157
176
 
158
- include self::Constants
177
+ include self::ServiceConstants
178
+ include self::ConfigConstants
179
+ include self::InternalConstants
159
180
 
160
181
  Fluent::Plugin.register_output('google_cloud', self)
161
182
 
162
183
  PLUGIN_NAME = 'Fluentd Google Cloud Logging plugin'
163
- PLUGIN_VERSION = '0.6.7'
184
+ PLUGIN_VERSION = '0.6.8.pre.1'
164
185
 
165
186
  # Name of the the Google cloud logging write scope.
166
187
  LOGGING_SCOPE = 'https://www.googleapis.com/auth/logging.write'
@@ -295,6 +316,11 @@ module Fluent
295
316
  config_param :monitoring_type, :string,
296
317
  :default => Monitoring::PrometheusMonitoringRegistry.name
297
318
 
319
+ # Whether to call metadata agent to retrieve monitored resource.
320
+ config_param :enable_metadata_agent, :bool, :default => false
321
+ config_param :metadata_agent_url, :string,
322
+ :default => DEFAULT_METADATA_AGENT_URL
323
+
298
324
  # rubocop:enable Style/HashSyntax
299
325
 
300
326
  # TODO: Add a log_name config option rather than just using the tag?
@@ -354,28 +380,32 @@ module Fluent
354
380
 
355
381
  @platform = detect_platform
356
382
 
357
- # Set required variables: @project_id, @vm_id, @vm_name and @zone by
358
- # making some requests to metadata server.
359
- #
360
- # Note: Once we support metadata injection at Logging API side, we might
361
- # no longer need to require all these metadata in logging agent. But for
362
- # now, they are still required.
363
- #
364
- # TODO(qingling128): After Metadata Agent support is added, try extracting
365
- # these info from responses from Metadata Agent first.
383
+ # Set agent-level monitored resource. This monitored resource is initiated
384
+ # as the logging agent starts up. It will be inherited by all log entries
385
+ # processed by this agent. First try to retrieve it via Metadata Agent.
386
+ if @enable_metadata_agent
387
+ # The local_resource_id for this should be the instance id. Since this
388
+ # can be implicitly inferred by Metadata Agent, we do not need to
389
+ # explicitly send the key.
390
+ # TODO(qingling128): Remove this logic once the resource is retrieved at
391
+ # a proper time (b/65175256).
392
+ @resource = query_metadata_agent_for_monitored_resource(
393
+ IMPLICIT_LOCAL_RESOURCE_ID)
394
+ end
395
+
396
+ # Set required variables: @project_id, @vm_id, @vm_name and @zone.
366
397
  set_required_metadata_variables
367
398
 
368
399
  # Retrieve monitored resource.
369
- #
370
- # TODO(qingling128): After Metadata Agent support is added, try retrieving
371
- # the monitored resource from Metadata Agent first.
372
- @resource = determine_agent_level_monitored_resource_via_legacy
400
+ # Fail over to retrieve monitored resource via the legacy path if we fail
401
+ # to get it from Metadata Agent.
402
+ @resource ||= determine_agent_level_monitored_resource_via_legacy
373
403
 
374
404
  # Set regexp that we should match tags against later on. Using a list
375
405
  # instead of a map to ensure order. For example, tags will be matched
376
406
  # against Cloud Functions first, then GKE.
377
407
  @tag_regexp_list = []
378
- if @resource.type == CONTAINER_CONSTANTS[:resource_type]
408
+ if @resource.type == GKE_CONSTANTS[:resource_type]
379
409
  # We only support Cloud Functions logs for GKE right now.
380
410
  if fetch_gce_metadata('instance/attributes/'
381
411
  ).split.include?('gcf_region')
@@ -388,7 +418,7 @@ module Fluent
388
418
  ]
389
419
  end
390
420
  @tag_regexp_list << [
391
- CONTAINER_CONSTANTS[:resource_type], @compiled_kubernetes_tag_regexp
421
+ GKE_CONSTANTS[:resource_type], @compiled_kubernetes_tag_regexp
392
422
  ]
393
423
  end
394
424
 
@@ -404,7 +434,7 @@ module Fluent
404
434
 
405
435
  # Log an informational message containing the Logs viewer URL
406
436
  @log.info 'Logs viewer address: https://console.cloud.google.com/logs/',
407
- "viewer?project=#{@project_id}&resource=#{@resource_type}/",
437
+ "viewer?project=#{@project_id}&resource=#{@resource.type}/",
408
438
  "instance_id/#{@vm_id}"
409
439
  end
410
440
 
@@ -420,39 +450,27 @@ module Fluent
420
450
  end
421
451
 
422
452
  def write(chunk)
423
- # Group the entries since we have to make one call per tag.
424
- grouped_entries = {}
425
- chunk.msgpack_each do |tag, *arr|
426
- sanitized_tag = sanitize_tag(tag)
427
- if sanitized_tag.nil?
428
- @log.warn "Dropping log entries with invalid tag: '#{tag}'. " \
429
- 'A tag should be a string with utf8 characters.'
430
- next
431
- end
432
- grouped_entries[sanitized_tag] ||= []
433
- grouped_entries[sanitized_tag].push(arr)
434
- end
453
+ grouped_entries = group_log_entries_by_tag_and_local_resource_id(chunk)
435
454
 
436
- grouped_entries.each do |tag, arr|
455
+ grouped_entries.each do |(tag, local_resource_id), arr|
437
456
  entries = []
438
- group_resource, group_common_labels =
439
- determine_group_level_monitored_resource_and_labels(tag)
457
+ group_level_resource, group_level_common_labels =
458
+ determine_group_level_monitored_resource_and_labels(
459
+ tag, local_resource_id)
440
460
 
441
461
  arr.each do |time, record|
442
- next unless record.is_a?(Hash)
443
-
444
- extracted_resource_labels, extracted_common_labels = \
445
- determine_entry_level_labels(group_resource, record)
446
- entry_resource = group_resource.dup
447
- entry_resource.labels.merge!(extracted_resource_labels)
448
- entry_common_labels = \
449
- group_common_labels.merge(extracted_common_labels)
462
+ entry_level_resource, entry_level_common_labels =
463
+ determine_entry_level_monitored_resource_and_labels(
464
+ group_level_resource, group_level_common_labels, record)
450
465
 
451
466
  is_json = false
452
467
  if @detect_json
453
- # Save the timestamp if available, then clear it out to allow for
454
- # determining whether we should parse the log or message field.
468
+ # Save the timestamp and severity if available, then clear it out to
469
+ # allow for determining whether we should parse the log or message
470
+ # field.
455
471
  timestamp = record.delete('time')
472
+ severity = record.delete('severity')
473
+
456
474
  # If the log is json, we want to export it as a structured log
457
475
  # unless there is additional metadata that would be lost.
458
476
  record_json = nil
@@ -467,16 +485,16 @@ module Fluent
467
485
  record = record_json
468
486
  is_json = true
469
487
  end
470
- # Restore timestamp if necessary.
471
- unless record.key?('time') || timestamp.nil?
472
- record['time'] = timestamp
473
- end
488
+ # Restore timestamp and severity if necessary. Note that we don't
489
+ # want to override these keys in the JSON we've just parsed.
490
+ record['time'] ||= timestamp if timestamp
491
+ record['severity'] ||= severity if severity
474
492
  end
475
493
 
476
494
  ts_secs, ts_nanos = compute_timestamp(
477
- entry_resource.type, record, time)
495
+ entry_level_resource.type, record, time)
478
496
  severity = compute_severity(
479
- entry_resource.type, record, entry_common_labels)
497
+ entry_level_resource.type, record, entry_level_common_labels)
480
498
 
481
499
  ts_secs = begin
482
500
  Integer ts_secs
@@ -488,12 +506,13 @@ module Fluent
488
506
  rescue ArgumentError, TypeError
489
507
  ts_nanos
490
508
  end
509
+
491
510
  if @use_grpc
492
511
  entry = Google::Logging::V2::LogEntry.new(
493
- labels: entry_common_labels,
512
+ labels: entry_level_common_labels,
494
513
  resource: Google::Api::MonitoredResource.new(
495
- type: entry_resource.type,
496
- labels: entry_resource.labels.to_h
514
+ type: entry_level_resource.type,
515
+ labels: entry_level_resource.labels.to_h
497
516
  ),
498
517
  severity: grpc_severity(severity)
499
518
  )
@@ -510,10 +529,11 @@ module Fluent
510
529
  end
511
530
  else
512
531
  # Remove the labels if we didn't populate them with anything.
513
- entry_resource.labels = nil if entry_resource.labels.empty?
532
+ entry_level_resource.labels = nil if
533
+ entry_level_resource.labels.empty?
514
534
  entry = Google::Apis::LoggingV2beta1::LogEntry.new(
515
- labels: entry_common_labels,
516
- resource: entry_resource,
535
+ labels: entry_level_common_labels,
536
+ resource: entry_level_resource,
517
537
  severity: severity,
518
538
  timestamp: {
519
539
  seconds: ts_secs,
@@ -522,13 +542,12 @@ module Fluent
522
542
  )
523
543
  end
524
544
 
525
- # Get fully-qualified trace id for LogEntry "trace" field per config.
545
+ # Get fully-qualified trace id for LogEntry "trace" field.
526
546
  fq_trace_id = record.delete(@trace_key)
527
547
  entry.trace = fq_trace_id if fq_trace_id
528
548
 
529
549
  set_log_entry_fields(record, entry)
530
-
531
- set_payload(entry_resource.type, record, entry, is_json)
550
+ set_payload(entry_level_resource.type, record, entry, is_json)
532
551
 
533
552
  entries.push(entry)
534
553
  end
@@ -536,21 +555,21 @@ module Fluent
536
555
  next if entries.empty?
537
556
 
538
557
  log_name = "projects/#{@project_id}/logs/#{log_name(
539
- tag, group_resource)}"
558
+ tag, group_level_resource)}"
540
559
 
541
560
  # Does the actual write to the cloud logging api.
542
561
  client = api_client
543
562
  if @use_grpc
544
563
  begin
545
- labels_utf8_pairs = group_common_labels.map do |k, v|
564
+ labels_utf8_pairs = group_level_common_labels.map do |k, v|
546
565
  [k.encode('utf-8'), convert_to_utf8(v)]
547
566
  end
548
567
 
549
568
  write_request = Google::Logging::V2::WriteLogEntriesRequest.new(
550
569
  log_name: log_name,
551
570
  resource: Google::Api::MonitoredResource.new(
552
- type: group_resource.type,
553
- labels: group_resource.labels.to_h
571
+ type: group_level_resource.type,
572
+ labels: group_level_resource.labels.to_h
554
573
  ),
555
574
  labels: labels_utf8_pairs.to_h,
556
575
  entries: entries
@@ -560,8 +579,8 @@ module Fluent
560
579
  increment_successful_requests_count
561
580
  increment_ingested_entries_count(entries.length)
562
581
 
563
- # Let the user explicitly know when the first call succeeded,
564
- # to aid with verification and troubleshooting.
582
+ # Let the user explicitly know when the first call succeeded, to aid
583
+ # with verification and troubleshooting.
565
584
  unless @successful_call
566
585
  @successful_call = true
567
586
  @log.info 'Successfully sent gRPC to Stackdriver Logging API.'
@@ -600,8 +619,8 @@ module Fluent
600
619
  @log.warn "Dropping #{dropped} log message(s)",
601
620
  error: error.to_s, error_code: error.code.to_s
602
621
  else
603
- # Assume this is a problem with the request itself
604
- # and don't retry.
622
+ # Assume this is a problem with the request itself and don't
623
+ # retry.
605
624
  dropped = entries.length
606
625
  increment_dropped_entries_count(dropped)
607
626
  @log.error "Unknown response code #{error.code} from the "\
@@ -614,8 +633,8 @@ module Fluent
614
633
  write_request = \
615
634
  Google::Apis::LoggingV2beta1::WriteLogEntriesRequest.new(
616
635
  log_name: log_name,
617
- resource: group_resource,
618
- labels: group_common_labels,
636
+ resource: group_level_resource,
637
+ labels: group_level_common_labels,
619
638
  entries: entries)
620
639
 
621
640
  # TODO: RequestOptions
@@ -628,8 +647,8 @@ module Fluent
628
647
  increment_successful_requests_count
629
648
  increment_ingested_entries_count(entries.length)
630
649
 
631
- # Let the user explicitly know when the first call succeeded,
632
- # to aid with verification and troubleshooting.
650
+ # Let the user explicitly know when the first call succeeded, to aid
651
+ # with verification and troubleshooting.
633
652
  unless @successful_call
634
653
  @successful_call = true
635
654
  @log.info 'Successfully sent to Stackdriver Logging API.'
@@ -820,8 +839,9 @@ module Fluent
820
839
 
821
840
  # Retrieve monitored resource via the legacy way.
822
841
  #
823
- # TODO(qingling128): Use this as only a fallback plan after Metadata Agent
824
- # support is added.
842
+ # Note: This is just a failover plan if we fail to get metadata from
843
+ # Metadata Agent. Thus it should be equivalent to what Metadata Agent
844
+ # returns.
825
845
  def determine_agent_level_monitored_resource_via_legacy
826
846
  resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
827
847
  labels: {})
@@ -883,7 +903,7 @@ module Fluent
883
903
  }
884
904
 
885
905
  # GKE container.
886
- when CONTAINER_CONSTANTS[:resource_type]
906
+ when GKE_CONSTANTS[:resource_type]
887
907
  raw_kube_env = fetch_gce_metadata('instance/attributes/kube-env')
888
908
  kube_env = YAML.load(raw_kube_env)
889
909
  return {
@@ -918,7 +938,7 @@ module Fluent
918
938
  rescue StandardError => e
919
939
  @log.error "Failed to set monitored resource labels for #{type}: ",
920
940
  error: e
921
- return {}
941
+ {}
922
942
  end
923
943
 
924
944
  # Determine the common labels that should be added to all log entries
@@ -942,7 +962,7 @@ module Fluent
942
962
 
943
963
  # GCE instance and GKE container.
944
964
  when COMPUTE_CONSTANTS[:resource_type],
945
- CONTAINER_CONSTANTS[:resource_type]
965
+ GKE_CONSTANTS[:resource_type]
946
966
  labels.merge!(
947
967
  "#{COMPUTE_CONSTANTS[:service]}/resource_name" => @vm_name)
948
968
 
@@ -954,138 +974,175 @@ module Fluent
954
974
  labels
955
975
  end
956
976
 
957
- # Determine the group level monitored resource and common labels shared by a
958
- # collection of entries.
959
- def determine_group_level_monitored_resource_and_labels(tag)
960
- # Determine group level monitored resource type. For certain types,
961
- # extract useful info from the tag and store those in
962
- # matched_regex_group.
963
- group_resource_type, matched_regex_group =
964
- determine_group_level_monitored_resource_type(tag)
965
-
966
- # Determine group level monitored resource labels and common labels.
967
- group_resource_labels, group_common_labels =
968
- determine_group_level_labels(group_resource_type, matched_regex_group)
969
-
970
- group_resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
971
- type: group_resource_type,
972
- labels: group_resource_labels.to_h
973
- )
974
-
975
- # Freeze the per-request state. Any further changes must be made on a
976
- # per-entry basis.
977
- group_resource.freeze
978
- group_resource.labels.freeze
979
- group_common_labels.freeze
980
-
981
- [group_resource, group_common_labels]
977
+ # Group the log entries by tag and local_resource_id pairs. Also filter out
978
+ # invalid non-Hash entries.
979
+ def group_log_entries_by_tag_and_local_resource_id(chunk)
980
+ groups = {}
981
+ chunk.msgpack_each do |tag, time, record|
982
+ unless record.is_a?(Hash)
983
+ @log.warn 'Dropping log entries with malformed record: ' \
984
+ "'#{record.inspect}'. " \
985
+ 'A log record should be in JSON format.'
986
+ next
987
+ end
988
+ sanitized_tag = sanitize_tag(tag)
989
+ if sanitized_tag.nil?
990
+ @log.warn "Dropping log entries with invalid tag: '#{tag.inspect}'." \
991
+ ' A tag should be a string with utf8 characters.'
992
+ next
993
+ end
994
+ local_resource_id = record.delete(LOCAL_RESOURCE_ID_KEY)
995
+ # A nil local_resource_id means "fall back to legacy".
996
+ hash_key = [sanitized_tag, local_resource_id].freeze
997
+ groups[hash_key] ||= []
998
+ groups[hash_key].push([time, record])
999
+ end
1000
+ groups
982
1001
  end
983
1002
 
984
- # Determine group level monitored resource type shared by a collection of
985
- # entries.
986
- # Return the resource type and tag regexp matched groups. The matched groups
987
- # only apply to some resource types. Return nil if not applicable or if
988
- # there is no match.
989
- def determine_group_level_monitored_resource_type(tag)
1003
+ # Determine the group level monitored resource and common labels shared by a
1004
+ # collection of entries.
1005
+ def determine_group_level_monitored_resource_and_labels(tag,
1006
+ local_resource_id)
1007
+ resource = @resource.dup
1008
+ resource.labels = @resource.labels.dup
1009
+ common_labels = @common_labels.dup
1010
+
1011
+ # Change the resource type and set matched_regexp_group if the tag matches
1012
+ # certain regexp.
1013
+ matched_regexp_group = nil # @tag_regexp_list can be an empty list.
990
1014
  @tag_regexp_list.each do |derived_type, tag_regexp|
991
- matched_regex_group = tag_regexp.match(tag)
992
- return [derived_type, matched_regex_group] if
993
- matched_regex_group
1015
+ matched_regexp_group = tag_regexp.match(tag)
1016
+ if matched_regexp_group
1017
+ resource.type = derived_type
1018
+ break
1019
+ end
994
1020
  end
995
- [@resource.type, nil]
996
- end
997
1021
 
998
- # Determine group level monitored resource labels and common labels. These
999
- # labels will be shared by a collection of entries.
1000
- def determine_group_level_labels(group_resource_type, matched_regex_group)
1001
- group_resource_labels = @resource.labels.dup
1002
- group_common_labels = @common_labels.dup
1022
+ # Determine the monitored resource based on the local_resource_id.
1023
+ # Different monitored resource types have unique ids in different format.
1024
+ # We will query Metadata Agent for the monitored resource. Return the
1025
+ # legacy monitored resource (either the instance resource or the resource
1026
+ # inferred from the tag) if failed to get a monitored resource from
1027
+ # Metadata Agent with this key.
1028
+ #
1029
+ # Docker container:
1030
+ # "container.<container_id>"
1031
+ # "containerName.<container_name>"
1032
+ # GKE container:
1033
+ # "gke_containerName.<namespace_id>.<pod_name>.<container_name>"
1034
+ if @enable_metadata_agent && local_resource_id
1035
+ @log.debug 'Calling metadata agent with local_resource_id: ' \
1036
+ "#{local_resource_id}."
1037
+ retrieved_resource = query_metadata_agent_for_monitored_resource(
1038
+ local_resource_id)
1039
+ @log.debug 'Retrieved monitored resource from metadata agent: ' \
1040
+ "#{retrieved_resource.inspect}."
1041
+ if retrieved_resource
1042
+ resource = retrieved_resource
1043
+ # TODO(qingling128): Fix this temporary renaming from 'gke_container'
1044
+ # to 'container'.
1045
+ resource.type = 'container' if resource.type == 'gke_container'
1046
+ end
1047
+ end
1003
1048
 
1004
- case group_resource_type
1049
+ # Once the resource type is settled down, determine the labels.
1050
+ case resource.type
1005
1051
  # Cloud Functions.
1006
1052
  when CLOUDFUNCTIONS_CONSTANTS[:resource_type]
1007
- group_resource_labels.merge!(
1053
+ resource.labels.merge!(
1008
1054
  'region' => @gcf_region,
1009
1055
  'function_name' => decode_cloudfunctions_function_name(
1010
- matched_regex_group['encoded_function_name'])
1056
+ matched_regexp_group['encoded_function_name'])
1011
1057
  )
1012
-
1013
- instance_id = group_resource_labels.delete('instance_id')
1014
- group_common_labels.merge!(
1015
- "#{CONTAINER_CONSTANTS[:service]}/instance_id" => instance_id,
1058
+ instance_id = resource.labels.delete('instance_id')
1059
+ common_labels.merge!(
1060
+ "#{GKE_CONSTANTS[:service]}/instance_id" => instance_id,
1016
1061
  "#{COMPUTE_CONSTANTS[:service]}/resource_id" => instance_id,
1017
- "#{CONTAINER_CONSTANTS[:service]}/cluster_name" =>
1018
- group_resource_labels.delete('cluster_name'),
1062
+ "#{GKE_CONSTANTS[:service]}/cluster_name" =>
1063
+ resource.labels.delete('cluster_name'),
1019
1064
  "#{COMPUTE_CONSTANTS[:service]}/zone" =>
1020
- group_resource_labels.delete('zone')
1065
+ resource.labels.delete('zone')
1021
1066
  )
1022
1067
 
1023
1068
  # GKE container.
1024
- when CONTAINER_CONSTANTS[:resource_type]
1025
- if matched_regex_group
1069
+ when GKE_CONSTANTS[:resource_type]
1070
+ if matched_regexp_group
1026
1071
  # We only expect one occurrence of each key in the match group.
1027
1072
  resource_labels_candidates =
1028
- matched_regex_group.names.zip(matched_regex_group.captures).to_h
1029
- common_labels_candidates =
1030
- resource_labels_candidates.dup
1031
- group_resource_labels.merge!(
1073
+ matched_regexp_group.names.zip(matched_regexp_group.captures).to_h
1074
+ common_labels_candidates = resource_labels_candidates.dup
1075
+ resource.labels.merge!(
1032
1076
  delete_and_extract_labels(
1033
1077
  resource_labels_candidates,
1034
1078
  # The kubernetes_tag_regexp is poorly named. 'namespace_name' is
1035
1079
  # in fact 'namespace_id'. 'pod_name' is in fact 'pod_id'.
1036
1080
  # TODO(qingling128): Figure out how to put this map into
1037
- # constants like CONTAINER_CONSTANTS[:extra_resource_labels].
1081
+ # constants like GKE_CONSTANTS[:extra_resource_labels].
1038
1082
  'container_name' => 'container_name',
1039
1083
  'namespace_name' => 'namespace_id',
1040
1084
  'pod_name' => 'pod_id'))
1041
1085
 
1042
- group_common_labels.merge!(
1086
+ common_labels.merge!(
1043
1087
  delete_and_extract_labels(
1044
1088
  common_labels_candidates,
1045
- CONTAINER_CONSTANTS[:extra_common_labels]
1046
- .map { |l| [l, "#{CONTAINER_CONSTANTS[:service]}/#{l}"] }.to_h))
1089
+ GKE_CONSTANTS[:extra_common_labels]
1090
+ .map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h))
1047
1091
  end
1092
+
1093
+ # Docker container.
1094
+ # TODO(qingling128): Remove this logic once the resource is retrieved at a
1095
+ # proper time (b/65175256).
1096
+ when DOCKER_CONSTANTS[:resource_type]
1097
+ common_labels.delete("#{COMPUTE_CONSTANTS[:service]}/resource_name")
1048
1098
  end
1049
1099
 
1050
- [group_resource_labels, group_common_labels]
1100
+ resource.freeze
1101
+ resource.labels.freeze
1102
+ common_labels.freeze
1103
+
1104
+ [resource, common_labels]
1051
1105
  end
1052
1106
 
1053
- # Extract entry resource and common labels that should be applied to
1054
- # individual entries from the group resource.
1055
- def determine_entry_level_labels(group_resource, record)
1056
- resource_labels = {}
1057
- common_labels = {}
1107
+ # Extract entry level monitored resource and common labels that should be
1108
+ # applied to individual entries.
1109
+ def determine_entry_level_monitored_resource_and_labels(
1110
+ group_level_resource, group_level_common_labels, record)
1111
+ resource = group_level_resource.dup
1112
+ resource.labels = group_level_resource.labels.dup
1113
+ common_labels = group_level_common_labels.dup
1058
1114
 
1115
+ case resource.type
1059
1116
  # Cloud Functions.
1060
- if group_resource.type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
1061
- record.key?('log')
1062
- @cloudfunctions_log_match =
1063
- @compiled_cloudfunctions_log_regexp.match(record['log'])
1064
- common_labels['execution_id'] =
1065
- @cloudfunctions_log_match['execution_id'] if \
1066
- @cloudfunctions_log_match &&
1067
- @cloudfunctions_log_match['execution_id']
1068
- end
1117
+ when CLOUDFUNCTIONS_CONSTANTS[:resource_type]
1118
+ if record.key?('log')
1119
+ @cloudfunctions_log_match =
1120
+ @compiled_cloudfunctions_log_regexp.match(record['log'])
1121
+ common_labels['execution_id'] =
1122
+ @cloudfunctions_log_match['execution_id'] if
1123
+ @cloudfunctions_log_match &&
1124
+ @cloudfunctions_log_match['execution_id']
1125
+ end
1069
1126
 
1070
- # GKE containers.
1071
- if group_resource.type == CONTAINER_CONSTANTS[:resource_type]
1127
+ # GKE container.
1128
+ when GKE_CONSTANTS[:resource_type]
1072
1129
  # Move the stdout/stderr annotation from the record into a label.
1073
1130
  common_labels.merge!(
1074
1131
  delete_and_extract_labels(
1075
- record, 'stream' => "#{CONTAINER_CONSTANTS[:service]}/stream"))
1132
+ record, 'stream' => "#{GKE_CONSTANTS[:service]}/stream"))
1076
1133
 
1077
1134
  # If the record has been annotated by the kubernetes_metadata_filter
1078
1135
  # plugin, then use that metadata. Otherwise, rely on commonLabels
1079
- # populated at the grouped_entries level from the group's tag.
1136
+ # populated from the group's tag.
1080
1137
  if record.key?('kubernetes')
1081
- resource_labels.merge!(
1138
+ resource.labels.merge!(
1082
1139
  delete_and_extract_labels(
1083
- record['kubernetes'], CONTAINER_CONSTANTS[:extra_resource_labels]
1140
+ record['kubernetes'], GKE_CONSTANTS[:extra_resource_labels]
1084
1141
  .map { |l| [l, l] }.to_h))
1085
1142
  common_labels.merge!(
1086
1143
  delete_and_extract_labels(
1087
- record['kubernetes'], CONTAINER_CONSTANTS[:extra_common_labels]
1088
- .map { |l| [l, "#{CONTAINER_CONSTANTS[:service]}/#{l}"] }.to_h))
1144
+ record['kubernetes'], GKE_CONSTANTS[:extra_common_labels]
1145
+ .map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h))
1089
1146
  # Prepend label/ to all user-defined labels' keys.
1090
1147
  if record['kubernetes'].key?('labels')
1091
1148
  common_labels.merge!(
@@ -1111,14 +1168,56 @@ module Fluent
1111
1168
  # Report them as monitored resource labels instead of common labels.
1112
1169
  # e.g. "dataflow.googleapis.com/job_id" => "job_id"
1113
1170
  [DATAFLOW_CONSTANTS, ML_CONSTANTS].each do |service_constants|
1114
- next unless group_resource.type == service_constants[:resource_type]
1115
- resource_labels.merge!(
1171
+ next unless resource.type == service_constants[:resource_type]
1172
+ resource.labels.merge!(
1116
1173
  delete_and_extract_labels(
1117
1174
  common_labels, service_constants[:extra_common_labels]
1118
1175
  .map { |l| ["#{service_constants[:service]}/#{l}", l] }.to_h))
1119
1176
  end
1120
1177
 
1121
- [resource_labels, common_labels]
1178
+ [resource, common_labels]
1179
+ end
1180
+
1181
+ # Call Metadata Agent to get monitored resource information and parse
1182
+ # response to Google::Api::MonitoredResource.
1183
+ def query_metadata_agent_for_monitored_resource(local_resource_id)
1184
+ response = query_metadata_agent("monitoredResource/#{local_resource_id}")
1185
+ return nil if response.nil?
1186
+ begin
1187
+ resource = Google::Api::MonitoredResource.decode_json(response.to_json)
1188
+ rescue Google::Protobuf::ParseError, ArgumentError => e
1189
+ @log.error 'Error paring monitored resource from Metadata Agent. ' \
1190
+ "response: #{response.inspect}", error: e
1191
+ return nil
1192
+ end
1193
+
1194
+ # TODO(qingling128): Use Google::Api::MonitoredResource directly after we
1195
+ # upgrade gRPC version to include the fix for the protobuf map
1196
+ # corruption issue.
1197
+ Google::Apis::LoggingV2beta1::MonitoredResource.new(
1198
+ type: resource.type,
1199
+ labels: resource.labels.to_h
1200
+ )
1201
+ end
1202
+
1203
+ # Issue a request to the Metadata Agent's local API and parse the response
1204
+ # to JSON. Return nil in case of failure.
1205
+ def query_metadata_agent(path)
1206
+ url = "#{@metadata_agent_url}/#{path}"
1207
+ @log.debug("Calling Metadata Agent: #{url}")
1208
+ open(url) do |f|
1209
+ response = f.read
1210
+ parsed_hash = parse_json_or_nil(response)
1211
+ if parsed_hash.nil?
1212
+ @log.error 'Response from Metadata Agent is not in valid json ' \
1213
+ "format: '#{response.inspect}'."
1214
+ return nil
1215
+ end
1216
+ @log.debug "Response from Metadata Agent: #{parsed_hash}"
1217
+ return parsed_hash
1218
+ end
1219
+ rescue StandardError => e
1220
+ @log.error 'Error calling Metadata Agent.', error: e
1122
1221
  end
1123
1222
 
1124
1223
  # TODO: This functionality should eventually be available in another
@@ -1211,7 +1310,7 @@ module Fluent
1211
1310
  [ts_secs, ts_nanos]
1212
1311
  end
1213
1312
 
1214
- def compute_severity(resource_type, record, entry_common_labels)
1313
+ def compute_severity(resource_type, record, entry_level_common_labels)
1215
1314
  if resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type]
1216
1315
  if @cloudfunctions_log_match && @cloudfunctions_log_match['severity']
1217
1316
  return parse_severity(@cloudfunctions_log_match['severity'])
@@ -1226,9 +1325,8 @@ module Fluent
1226
1325
  end
1227
1326
  elsif record.key?('severity')
1228
1327
  return parse_severity(record.delete('severity'))
1229
- elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
1230
- entry_common_labels.key?("#{CONTAINER_CONSTANTS[:service]}/stream")
1231
- stream = entry_common_labels["#{CONTAINER_CONSTANTS[:service]}/stream"]
1328
+ elsif resource_type == GKE_CONSTANTS[:resource_type]
1329
+ stream = entry_level_common_labels["#{GKE_CONSTANTS[:service]}/stream"]
1232
1330
  if stream == 'stdout'
1233
1331
  return 'INFO'
1234
1332
  elsif stream == 'stderr'
@@ -1452,9 +1550,8 @@ module Fluent
1452
1550
  hash.nil? || !hash.is_a?(Hash)
1453
1551
  label_map.each_with_object({}) \
1454
1552
  do |(original_label, new_label), extracted_labels|
1455
- extracted_labels[new_label] =
1456
- convert_to_utf8(hash.delete(original_label).to_s) if
1457
- hash.key?(original_label)
1553
+ value = hash.delete(original_label)
1554
+ extracted_labels[new_label] = convert_to_utf8(value.to_s) if value
1458
1555
  end
1459
1556
  end
1460
1557
 
@@ -1520,7 +1617,8 @@ module Fluent
1520
1617
  text_payload = record['log']
1521
1618
  elsif is_json
1522
1619
  json_payload = record
1523
- elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
1620
+ elsif [GKE_CONSTANTS[:resource_type],
1621
+ DOCKER_CONSTANTS[:resource_type]].include?(resource_type) &&
1524
1622
  record.key?('log')
1525
1623
  text_payload = record['log']
1526
1624
  elsif record.size == 1 && record.key?('message')
@@ -1550,7 +1648,7 @@ module Fluent
1550
1648
  elsif resource.type == APPENGINE_CONSTANTS[:resource_type]
1551
1649
  # Add a prefix to Managed VM logs to prevent namespace collisions.
1552
1650
  tag = "#{APPENGINE_CONSTANTS[:service]}/#{tag}"
1553
- elsif resource.type == CONTAINER_CONSTANTS[:resource_type]
1651
+ elsif resource.type == GKE_CONSTANTS[:resource_type]
1554
1652
  # For Kubernetes logs, use just the container name as the log name
1555
1653
  # if we have it.
1556
1654
  if resource.labels && resource.labels.key?('container_name')