fluent-plugin-google-cloud 0.6.7 → 0.6.8.pre.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f19f3c278abefebea1b068850a3e7d09a31835e2
4
- data.tar.gz: e4588beb45b2dba23d3901a6a527091134c1f5a6
3
+ metadata.gz: f013270937ffeb8ac1addeb0b50cbd04b9434d8f
4
+ data.tar.gz: a19be3b8f5b49266e6055be20c6a8c693fae1fea
5
5
  SHA512:
6
- metadata.gz: 86369bce68f370907f35d589830ea73330032590ae43bc75b22bc7ff99157997740fdfeac46079c96fd832b90ca7bc36da9e66f8187e4f1c566277e028b38b0f
7
- data.tar.gz: 9c8189c85d4980c160403b09bd2ccb58c3bcd264f36062e994ea7bf63aa3fc70aa79cf357f54a83ff255ae9ceb7d9bee9243866f2a7d00af6b6b2c147001e549
6
+ metadata.gz: cfc74c7b17e028776423d30fbc438d1d5d146d532991bb2331044a35f54e1a6acfd6f14e6bc4b234f924ac5e6d62b268931b8c856d07532b9c610c85579f3906
7
+ data.tar.gz: bb14fdffce9b35b5c1ff4d9609d5ec473d19d69b74972b3ba20bc843d28e1118be19b89b112b961d7b903e246612eca551780c3e9c202fa852ad8bdec6f50f2d
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fluent-plugin-google-cloud (0.6.7)
4
+ fluent-plugin-google-cloud (0.6.8.pre.1)
5
5
  fluentd (~> 0.10)
6
6
  google-api-client (~> 0.9.0)
7
7
  google-cloud-logging (= 0.24.1)
@@ -55,7 +55,7 @@ GEM
55
55
  googleauth (~> 0.5.1)
56
56
  grpc (~> 1.0)
57
57
  rly (~> 0.2.3)
58
- google-protobuf (3.4.0.2-x86_64-linux)
58
+ google-protobuf (3.4.0.2)
59
59
  googleapis-common-protos (1.3.5)
60
60
  google-protobuf (~> 3.2)
61
61
  grpc (~> 1.0)
@@ -67,7 +67,7 @@ GEM
67
67
  multi_json (~> 1.11)
68
68
  os (~> 0.9)
69
69
  signet (~> 0.7)
70
- grpc (1.2.5-x86_64-linux)
70
+ grpc (1.2.5)
71
71
  google-protobuf (~> 3.1)
72
72
  googleauth (~> 0.5.1)
73
73
  hashdiff (0.3.6)
@@ -10,7 +10,7 @@ eos
10
10
  gem.homepage = \
11
11
  'https://github.com/GoogleCloudPlatform/fluent-plugin-google-cloud'
12
12
  gem.license = 'Apache-2.0'
13
- gem.version = '0.6.7'
13
+ gem.version = '0.6.8.pre.1'
14
14
  gem.authors = ['Todd Derr', 'Alex Robinson']
15
15
  gem.email = ['salty@google.com']
16
16
  gem.required_ruby_version = Gem::Requirement.new('>= 2.0')
@@ -38,12 +38,12 @@ end
38
38
  module Fluent
39
39
  # fluentd output plugin for the Stackdriver Logging API
40
40
  class GoogleCloudOutput < BufferedOutput
41
- # Constants for service names and resource types.
42
- module Constants
41
+ # Constants for service names, resource types and etc.
42
+ module ServiceConstants
43
43
  APPENGINE_CONSTANTS = {
44
44
  service: 'appengine.googleapis.com',
45
45
  resource_type: 'gae_app',
46
- metadata_attributes: %w(gae_backend_name gae_backend_version).to_set
46
+ metadata_attributes: %w(gae_backend_name gae_backend_version)
47
47
  }
48
48
  CLOUDFUNCTIONS_CONSTANTS = {
49
49
  service: 'cloudfunctions.googleapis.com',
@@ -53,12 +53,16 @@ module Fluent
53
53
  service: 'compute.googleapis.com',
54
54
  resource_type: 'gce_instance'
55
55
  }
56
- CONTAINER_CONSTANTS = {
56
+ GKE_CONSTANTS = {
57
57
  service: 'container.googleapis.com',
58
58
  resource_type: 'container',
59
59
  extra_resource_labels: %w(namespace_id pod_id container_name),
60
60
  extra_common_labels: %w(namespace_name pod_name),
61
- metadata_attributes: %w(kube-env).to_set
61
+ metadata_attributes: %w(kube-env)
62
+ }
63
+ DOCKER_CONSTANTS = {
64
+ service: 'docker.googleapis.com',
65
+ resource_type: 'docker_container'
62
66
  }
63
67
  DATAFLOW_CONSTANTS = {
64
68
  service: 'dataflow.googleapis.com',
@@ -68,8 +72,7 @@ module Fluent
68
72
  DATAPROC_CONSTANTS = {
69
73
  service: 'cluster.dataproc.googleapis.com',
70
74
  resource_type: 'cloud_dataproc_cluster',
71
- metadata_attributes:
72
- %w(dataproc-cluster-uuid dataproc-cluster-name).to_set
75
+ metadata_attributes: %w(dataproc-cluster-uuid dataproc-cluster-name)
73
76
  }
74
77
  EC2_CONSTANTS = {
75
78
  service: 'ec2.amazonaws.com',
@@ -83,7 +86,7 @@ module Fluent
83
86
 
84
87
  # The map between a subservice name and a resource type.
85
88
  SUBSERVICE_MAP = \
86
- [APPENGINE_CONSTANTS, CONTAINER_CONSTANTS, DATAFLOW_CONSTANTS,
89
+ [APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAFLOW_CONSTANTS,
87
90
  DATAPROC_CONSTANTS, ML_CONSTANTS]
88
91
  .map { |consts| [consts[:service], consts[:resource_type]] }.to_h
89
92
  # Default back to GCE if invalid value is detected.
@@ -91,18 +94,34 @@ module Fluent
91
94
 
92
95
  # The map between a resource type and expected subservice attributes.
93
96
  SUBSERVICE_METADATA_ATTRIBUTES = \
94
- [APPENGINE_CONSTANTS, CONTAINER_CONSTANTS, DATAPROC_CONSTANTS]
95
- .map { |consts| [consts[:resource_type], consts[:metadata_attributes]] }
96
- .to_h
97
+ [APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAPROC_CONSTANTS].map do |consts|
98
+ [consts[:resource_type], consts[:metadata_attributes].to_set]
99
+ end.to_h
100
+ end
97
101
 
98
- # Default values for JSON payload keys to set the "trace",
99
- # "sourceLocation", "operation" and "labels" fields in the LogEntry.
100
- DEFAULT_PAYLOAD_KEY_PREFIX = 'logging.googleapis.com'
102
+ # Constants for configuration.
103
+ module ConfigConstants
104
+ # Default values for JSON payload keys to set the "httpRequest",
105
+ # "operation", "sourceLocation", "trace" fields in the LogEntry.
101
106
  DEFAULT_HTTP_REQUEST_KEY = 'httpRequest'
102
- DEFAULT_OPERATION_KEY = "#{DEFAULT_PAYLOAD_KEY_PREFIX}/operation"
103
- DEFAULT_SOURCE_LOCATION_KEY =
104
- "#{DEFAULT_PAYLOAD_KEY_PREFIX}/sourceLocation"
105
- DEFAULT_TRACE_KEY = "#{DEFAULT_PAYLOAD_KEY_PREFIX}/trace"
107
+ DEFAULT_OPERATION_KEY = 'logging.googleapis.com/operation'
108
+ DEFAULT_SOURCE_LOCATION_KEY = 'logging.googleapis.com/sourceLocation'
109
+ DEFAULT_TRACE_KEY = 'logging.googleapis.com/trace'
110
+
111
+ DEFAULT_METADATA_AGENT_URL =
112
+ 'http://local-metadata-agent.stackdriver.com:8000'
113
+ end
114
+
115
+ # Constants for log entry field extraction.
116
+ module InternalConstants
117
+ # Use empty string as request path when the local_resource_id of monitored
118
+ # resource can be implicitly inferred by Metadata Agent.
119
+ IMPLICIT_LOCAL_RESOURCE_ID = ''
120
+
121
+ # The label name of local_resource_id in the json payload. When a record
122
+ # has this field in the payload, we will use the value to retrieve
123
+ # monitored resource from Stackdriver Metadata agent.
124
+ LOCAL_RESOURCE_ID_KEY = 'logging.googleapis.com/local_resource_id'
106
125
 
107
126
  # Map from each field name under LogEntry to corresponding variables
108
127
  # required to perform field value extraction from the log record.
@@ -155,12 +174,14 @@ module Fluent
155
174
  }
156
175
  end
157
176
 
158
- include self::Constants
177
+ include self::ServiceConstants
178
+ include self::ConfigConstants
179
+ include self::InternalConstants
159
180
 
160
181
  Fluent::Plugin.register_output('google_cloud', self)
161
182
 
162
183
  PLUGIN_NAME = 'Fluentd Google Cloud Logging plugin'
163
- PLUGIN_VERSION = '0.6.7'
184
+ PLUGIN_VERSION = '0.6.8.pre.1'
164
185
 
165
186
  # Name of the the Google cloud logging write scope.
166
187
  LOGGING_SCOPE = 'https://www.googleapis.com/auth/logging.write'
@@ -295,6 +316,11 @@ module Fluent
295
316
  config_param :monitoring_type, :string,
296
317
  :default => Monitoring::PrometheusMonitoringRegistry.name
297
318
 
319
+ # Whether to call metadata agent to retrieve monitored resource.
320
+ config_param :enable_metadata_agent, :bool, :default => false
321
+ config_param :metadata_agent_url, :string,
322
+ :default => DEFAULT_METADATA_AGENT_URL
323
+
298
324
  # rubocop:enable Style/HashSyntax
299
325
 
300
326
  # TODO: Add a log_name config option rather than just using the tag?
@@ -354,28 +380,32 @@ module Fluent
354
380
 
355
381
  @platform = detect_platform
356
382
 
357
- # Set required variables: @project_id, @vm_id, @vm_name and @zone by
358
- # making some requests to metadata server.
359
- #
360
- # Note: Once we support metadata injection at Logging API side, we might
361
- # no longer need to require all these metadata in logging agent. But for
362
- # now, they are still required.
363
- #
364
- # TODO(qingling128): After Metadata Agent support is added, try extracting
365
- # these info from responses from Metadata Agent first.
383
+ # Set agent-level monitored resource. This monitored resource is initiated
384
+ # as the logging agent starts up. It will be inherited by all log entries
385
+ # processed by this agent. First try to retrieve it via Metadata Agent.
386
+ if @enable_metadata_agent
387
+ # The local_resource_id for this should be the instance id. Since this
388
+ # can be implicitly inferred by Metadata Agent, we do not need to
389
+ # explicitly send the key.
390
+ # TODO(qingling128): Remove this logic once the resource is retrieved at
391
+ # a proper time (b/65175256).
392
+ @resource = query_metadata_agent_for_monitored_resource(
393
+ IMPLICIT_LOCAL_RESOURCE_ID)
394
+ end
395
+
396
+ # Set required variables: @project_id, @vm_id, @vm_name and @zone.
366
397
  set_required_metadata_variables
367
398
 
368
399
  # Retrieve monitored resource.
369
- #
370
- # TODO(qingling128): After Metadata Agent support is added, try retrieving
371
- # the monitored resource from Metadata Agent first.
372
- @resource = determine_agent_level_monitored_resource_via_legacy
400
+ # Fail over to retrieve monitored resource via the legacy path if we fail
401
+ # to get it from Metadata Agent.
402
+ @resource ||= determine_agent_level_monitored_resource_via_legacy
373
403
 
374
404
  # Set regexp that we should match tags against later on. Using a list
375
405
  # instead of a map to ensure order. For example, tags will be matched
376
406
  # against Cloud Functions first, then GKE.
377
407
  @tag_regexp_list = []
378
- if @resource.type == CONTAINER_CONSTANTS[:resource_type]
408
+ if @resource.type == GKE_CONSTANTS[:resource_type]
379
409
  # We only support Cloud Functions logs for GKE right now.
380
410
  if fetch_gce_metadata('instance/attributes/'
381
411
  ).split.include?('gcf_region')
@@ -388,7 +418,7 @@ module Fluent
388
418
  ]
389
419
  end
390
420
  @tag_regexp_list << [
391
- CONTAINER_CONSTANTS[:resource_type], @compiled_kubernetes_tag_regexp
421
+ GKE_CONSTANTS[:resource_type], @compiled_kubernetes_tag_regexp
392
422
  ]
393
423
  end
394
424
 
@@ -404,7 +434,7 @@ module Fluent
404
434
 
405
435
  # Log an informational message containing the Logs viewer URL
406
436
  @log.info 'Logs viewer address: https://console.cloud.google.com/logs/',
407
- "viewer?project=#{@project_id}&resource=#{@resource_type}/",
437
+ "viewer?project=#{@project_id}&resource=#{@resource.type}/",
408
438
  "instance_id/#{@vm_id}"
409
439
  end
410
440
 
@@ -420,39 +450,27 @@ module Fluent
420
450
  end
421
451
 
422
452
  def write(chunk)
423
- # Group the entries since we have to make one call per tag.
424
- grouped_entries = {}
425
- chunk.msgpack_each do |tag, *arr|
426
- sanitized_tag = sanitize_tag(tag)
427
- if sanitized_tag.nil?
428
- @log.warn "Dropping log entries with invalid tag: '#{tag}'. " \
429
- 'A tag should be a string with utf8 characters.'
430
- next
431
- end
432
- grouped_entries[sanitized_tag] ||= []
433
- grouped_entries[sanitized_tag].push(arr)
434
- end
453
+ grouped_entries = group_log_entries_by_tag_and_local_resource_id(chunk)
435
454
 
436
- grouped_entries.each do |tag, arr|
455
+ grouped_entries.each do |(tag, local_resource_id), arr|
437
456
  entries = []
438
- group_resource, group_common_labels =
439
- determine_group_level_monitored_resource_and_labels(tag)
457
+ group_level_resource, group_level_common_labels =
458
+ determine_group_level_monitored_resource_and_labels(
459
+ tag, local_resource_id)
440
460
 
441
461
  arr.each do |time, record|
442
- next unless record.is_a?(Hash)
443
-
444
- extracted_resource_labels, extracted_common_labels = \
445
- determine_entry_level_labels(group_resource, record)
446
- entry_resource = group_resource.dup
447
- entry_resource.labels.merge!(extracted_resource_labels)
448
- entry_common_labels = \
449
- group_common_labels.merge(extracted_common_labels)
462
+ entry_level_resource, entry_level_common_labels =
463
+ determine_entry_level_monitored_resource_and_labels(
464
+ group_level_resource, group_level_common_labels, record)
450
465
 
451
466
  is_json = false
452
467
  if @detect_json
453
- # Save the timestamp if available, then clear it out to allow for
454
- # determining whether we should parse the log or message field.
468
+ # Save the timestamp and severity if available, then clear it out to
469
+ # allow for determining whether we should parse the log or message
470
+ # field.
455
471
  timestamp = record.delete('time')
472
+ severity = record.delete('severity')
473
+
456
474
  # If the log is json, we want to export it as a structured log
457
475
  # unless there is additional metadata that would be lost.
458
476
  record_json = nil
@@ -467,16 +485,16 @@ module Fluent
467
485
  record = record_json
468
486
  is_json = true
469
487
  end
470
- # Restore timestamp if necessary.
471
- unless record.key?('time') || timestamp.nil?
472
- record['time'] = timestamp
473
- end
488
+ # Restore timestamp and severity if necessary. Note that we don't
489
+ # want to override these keys in the JSON we've just parsed.
490
+ record['time'] ||= timestamp if timestamp
491
+ record['severity'] ||= severity if severity
474
492
  end
475
493
 
476
494
  ts_secs, ts_nanos = compute_timestamp(
477
- entry_resource.type, record, time)
495
+ entry_level_resource.type, record, time)
478
496
  severity = compute_severity(
479
- entry_resource.type, record, entry_common_labels)
497
+ entry_level_resource.type, record, entry_level_common_labels)
480
498
 
481
499
  ts_secs = begin
482
500
  Integer ts_secs
@@ -488,12 +506,13 @@ module Fluent
488
506
  rescue ArgumentError, TypeError
489
507
  ts_nanos
490
508
  end
509
+
491
510
  if @use_grpc
492
511
  entry = Google::Logging::V2::LogEntry.new(
493
- labels: entry_common_labels,
512
+ labels: entry_level_common_labels,
494
513
  resource: Google::Api::MonitoredResource.new(
495
- type: entry_resource.type,
496
- labels: entry_resource.labels.to_h
514
+ type: entry_level_resource.type,
515
+ labels: entry_level_resource.labels.to_h
497
516
  ),
498
517
  severity: grpc_severity(severity)
499
518
  )
@@ -510,10 +529,11 @@ module Fluent
510
529
  end
511
530
  else
512
531
  # Remove the labels if we didn't populate them with anything.
513
- entry_resource.labels = nil if entry_resource.labels.empty?
532
+ entry_level_resource.labels = nil if
533
+ entry_level_resource.labels.empty?
514
534
  entry = Google::Apis::LoggingV2beta1::LogEntry.new(
515
- labels: entry_common_labels,
516
- resource: entry_resource,
535
+ labels: entry_level_common_labels,
536
+ resource: entry_level_resource,
517
537
  severity: severity,
518
538
  timestamp: {
519
539
  seconds: ts_secs,
@@ -522,13 +542,12 @@ module Fluent
522
542
  )
523
543
  end
524
544
 
525
- # Get fully-qualified trace id for LogEntry "trace" field per config.
545
+ # Get fully-qualified trace id for LogEntry "trace" field.
526
546
  fq_trace_id = record.delete(@trace_key)
527
547
  entry.trace = fq_trace_id if fq_trace_id
528
548
 
529
549
  set_log_entry_fields(record, entry)
530
-
531
- set_payload(entry_resource.type, record, entry, is_json)
550
+ set_payload(entry_level_resource.type, record, entry, is_json)
532
551
 
533
552
  entries.push(entry)
534
553
  end
@@ -536,21 +555,21 @@ module Fluent
536
555
  next if entries.empty?
537
556
 
538
557
  log_name = "projects/#{@project_id}/logs/#{log_name(
539
- tag, group_resource)}"
558
+ tag, group_level_resource)}"
540
559
 
541
560
  # Does the actual write to the cloud logging api.
542
561
  client = api_client
543
562
  if @use_grpc
544
563
  begin
545
- labels_utf8_pairs = group_common_labels.map do |k, v|
564
+ labels_utf8_pairs = group_level_common_labels.map do |k, v|
546
565
  [k.encode('utf-8'), convert_to_utf8(v)]
547
566
  end
548
567
 
549
568
  write_request = Google::Logging::V2::WriteLogEntriesRequest.new(
550
569
  log_name: log_name,
551
570
  resource: Google::Api::MonitoredResource.new(
552
- type: group_resource.type,
553
- labels: group_resource.labels.to_h
571
+ type: group_level_resource.type,
572
+ labels: group_level_resource.labels.to_h
554
573
  ),
555
574
  labels: labels_utf8_pairs.to_h,
556
575
  entries: entries
@@ -560,8 +579,8 @@ module Fluent
560
579
  increment_successful_requests_count
561
580
  increment_ingested_entries_count(entries.length)
562
581
 
563
- # Let the user explicitly know when the first call succeeded,
564
- # to aid with verification and troubleshooting.
582
+ # Let the user explicitly know when the first call succeeded, to aid
583
+ # with verification and troubleshooting.
565
584
  unless @successful_call
566
585
  @successful_call = true
567
586
  @log.info 'Successfully sent gRPC to Stackdriver Logging API.'
@@ -600,8 +619,8 @@ module Fluent
600
619
  @log.warn "Dropping #{dropped} log message(s)",
601
620
  error: error.to_s, error_code: error.code.to_s
602
621
  else
603
- # Assume this is a problem with the request itself
604
- # and don't retry.
622
+ # Assume this is a problem with the request itself and don't
623
+ # retry.
605
624
  dropped = entries.length
606
625
  increment_dropped_entries_count(dropped)
607
626
  @log.error "Unknown response code #{error.code} from the "\
@@ -614,8 +633,8 @@ module Fluent
614
633
  write_request = \
615
634
  Google::Apis::LoggingV2beta1::WriteLogEntriesRequest.new(
616
635
  log_name: log_name,
617
- resource: group_resource,
618
- labels: group_common_labels,
636
+ resource: group_level_resource,
637
+ labels: group_level_common_labels,
619
638
  entries: entries)
620
639
 
621
640
  # TODO: RequestOptions
@@ -628,8 +647,8 @@ module Fluent
628
647
  increment_successful_requests_count
629
648
  increment_ingested_entries_count(entries.length)
630
649
 
631
- # Let the user explicitly know when the first call succeeded,
632
- # to aid with verification and troubleshooting.
650
+ # Let the user explicitly know when the first call succeeded, to aid
651
+ # with verification and troubleshooting.
633
652
  unless @successful_call
634
653
  @successful_call = true
635
654
  @log.info 'Successfully sent to Stackdriver Logging API.'
@@ -820,8 +839,9 @@ module Fluent
820
839
 
821
840
  # Retrieve monitored resource via the legacy way.
822
841
  #
823
- # TODO(qingling128): Use this as only a fallback plan after Metadata Agent
824
- # support is added.
842
+ # Note: This is just a failover plan if we fail to get metadata from
843
+ # Metadata Agent. Thus it should be equivalent to what Metadata Agent
844
+ # returns.
825
845
  def determine_agent_level_monitored_resource_via_legacy
826
846
  resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
827
847
  labels: {})
@@ -883,7 +903,7 @@ module Fluent
883
903
  }
884
904
 
885
905
  # GKE container.
886
- when CONTAINER_CONSTANTS[:resource_type]
906
+ when GKE_CONSTANTS[:resource_type]
887
907
  raw_kube_env = fetch_gce_metadata('instance/attributes/kube-env')
888
908
  kube_env = YAML.load(raw_kube_env)
889
909
  return {
@@ -918,7 +938,7 @@ module Fluent
918
938
  rescue StandardError => e
919
939
  @log.error "Failed to set monitored resource labels for #{type}: ",
920
940
  error: e
921
- return {}
941
+ {}
922
942
  end
923
943
 
924
944
  # Determine the common labels that should be added to all log entries
@@ -942,7 +962,7 @@ module Fluent
942
962
 
943
963
  # GCE instance and GKE container.
944
964
  when COMPUTE_CONSTANTS[:resource_type],
945
- CONTAINER_CONSTANTS[:resource_type]
965
+ GKE_CONSTANTS[:resource_type]
946
966
  labels.merge!(
947
967
  "#{COMPUTE_CONSTANTS[:service]}/resource_name" => @vm_name)
948
968
 
@@ -954,138 +974,175 @@ module Fluent
954
974
  labels
955
975
  end
956
976
 
957
- # Determine the group level monitored resource and common labels shared by a
958
- # collection of entries.
959
- def determine_group_level_monitored_resource_and_labels(tag)
960
- # Determine group level monitored resource type. For certain types,
961
- # extract useful info from the tag and store those in
962
- # matched_regex_group.
963
- group_resource_type, matched_regex_group =
964
- determine_group_level_monitored_resource_type(tag)
965
-
966
- # Determine group level monitored resource labels and common labels.
967
- group_resource_labels, group_common_labels =
968
- determine_group_level_labels(group_resource_type, matched_regex_group)
969
-
970
- group_resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
971
- type: group_resource_type,
972
- labels: group_resource_labels.to_h
973
- )
974
-
975
- # Freeze the per-request state. Any further changes must be made on a
976
- # per-entry basis.
977
- group_resource.freeze
978
- group_resource.labels.freeze
979
- group_common_labels.freeze
980
-
981
- [group_resource, group_common_labels]
977
+ # Group the log entries by tag and local_resource_id pairs. Also filter out
978
+ # invalid non-Hash entries.
979
+ def group_log_entries_by_tag_and_local_resource_id(chunk)
980
+ groups = {}
981
+ chunk.msgpack_each do |tag, time, record|
982
+ unless record.is_a?(Hash)
983
+ @log.warn 'Dropping log entries with malformed record: ' \
984
+ "'#{record.inspect}'. " \
985
+ 'A log record should be in JSON format.'
986
+ next
987
+ end
988
+ sanitized_tag = sanitize_tag(tag)
989
+ if sanitized_tag.nil?
990
+ @log.warn "Dropping log entries with invalid tag: '#{tag.inspect}'." \
991
+ ' A tag should be a string with utf8 characters.'
992
+ next
993
+ end
994
+ local_resource_id = record.delete(LOCAL_RESOURCE_ID_KEY)
995
+ # A nil local_resource_id means "fall back to legacy".
996
+ hash_key = [sanitized_tag, local_resource_id].freeze
997
+ groups[hash_key] ||= []
998
+ groups[hash_key].push([time, record])
999
+ end
1000
+ groups
982
1001
  end
983
1002
 
984
- # Determine group level monitored resource type shared by a collection of
985
- # entries.
986
- # Return the resource type and tag regexp matched groups. The matched groups
987
- # only apply to some resource types. Return nil if not applicable or if
988
- # there is no match.
989
- def determine_group_level_monitored_resource_type(tag)
1003
+ # Determine the group level monitored resource and common labels shared by a
1004
+ # collection of entries.
1005
+ def determine_group_level_monitored_resource_and_labels(tag,
1006
+ local_resource_id)
1007
+ resource = @resource.dup
1008
+ resource.labels = @resource.labels.dup
1009
+ common_labels = @common_labels.dup
1010
+
1011
+ # Change the resource type and set matched_regexp_group if the tag matches
1012
+ # certain regexp.
1013
+ matched_regexp_group = nil # @tag_regexp_list can be an empty list.
990
1014
  @tag_regexp_list.each do |derived_type, tag_regexp|
991
- matched_regex_group = tag_regexp.match(tag)
992
- return [derived_type, matched_regex_group] if
993
- matched_regex_group
1015
+ matched_regexp_group = tag_regexp.match(tag)
1016
+ if matched_regexp_group
1017
+ resource.type = derived_type
1018
+ break
1019
+ end
994
1020
  end
995
- [@resource.type, nil]
996
- end
997
1021
 
998
- # Determine group level monitored resource labels and common labels. These
999
- # labels will be shared by a collection of entries.
1000
- def determine_group_level_labels(group_resource_type, matched_regex_group)
1001
- group_resource_labels = @resource.labels.dup
1002
- group_common_labels = @common_labels.dup
1022
+ # Determine the monitored resource based on the local_resource_id.
1023
+ # Different monitored resource types have unique ids in different format.
1024
+ # We will query Metadata Agent for the monitored resource. Return the
1025
+ # legacy monitored resource (either the instance resource or the resource
1026
+ # inferred from the tag) if failed to get a monitored resource from
1027
+ # Metadata Agent with this key.
1028
+ #
1029
+ # Docker container:
1030
+ # "container.<container_id>"
1031
+ # "containerName.<container_name>"
1032
+ # GKE container:
1033
+ # "gke_containerName.<namespace_id>.<pod_name>.<container_name>"
1034
+ if @enable_metadata_agent && local_resource_id
1035
+ @log.debug 'Calling metadata agent with local_resource_id: ' \
1036
+ "#{local_resource_id}."
1037
+ retrieved_resource = query_metadata_agent_for_monitored_resource(
1038
+ local_resource_id)
1039
+ @log.debug 'Retrieved monitored resource from metadata agent: ' \
1040
+ "#{retrieved_resource.inspect}."
1041
+ if retrieved_resource
1042
+ resource = retrieved_resource
1043
+ # TODO(qingling128): Fix this temporary renaming from 'gke_container'
1044
+ # to 'container'.
1045
+ resource.type = 'container' if resource.type == 'gke_container'
1046
+ end
1047
+ end
1003
1048
 
1004
- case group_resource_type
1049
+ # Once the resource type is settled down, determine the labels.
1050
+ case resource.type
1005
1051
  # Cloud Functions.
1006
1052
  when CLOUDFUNCTIONS_CONSTANTS[:resource_type]
1007
- group_resource_labels.merge!(
1053
+ resource.labels.merge!(
1008
1054
  'region' => @gcf_region,
1009
1055
  'function_name' => decode_cloudfunctions_function_name(
1010
- matched_regex_group['encoded_function_name'])
1056
+ matched_regexp_group['encoded_function_name'])
1011
1057
  )
1012
-
1013
- instance_id = group_resource_labels.delete('instance_id')
1014
- group_common_labels.merge!(
1015
- "#{CONTAINER_CONSTANTS[:service]}/instance_id" => instance_id,
1058
+ instance_id = resource.labels.delete('instance_id')
1059
+ common_labels.merge!(
1060
+ "#{GKE_CONSTANTS[:service]}/instance_id" => instance_id,
1016
1061
  "#{COMPUTE_CONSTANTS[:service]}/resource_id" => instance_id,
1017
- "#{CONTAINER_CONSTANTS[:service]}/cluster_name" =>
1018
- group_resource_labels.delete('cluster_name'),
1062
+ "#{GKE_CONSTANTS[:service]}/cluster_name" =>
1063
+ resource.labels.delete('cluster_name'),
1019
1064
  "#{COMPUTE_CONSTANTS[:service]}/zone" =>
1020
- group_resource_labels.delete('zone')
1065
+ resource.labels.delete('zone')
1021
1066
  )
1022
1067
 
1023
1068
  # GKE container.
1024
- when CONTAINER_CONSTANTS[:resource_type]
1025
- if matched_regex_group
1069
+ when GKE_CONSTANTS[:resource_type]
1070
+ if matched_regexp_group
1026
1071
  # We only expect one occurrence of each key in the match group.
1027
1072
  resource_labels_candidates =
1028
- matched_regex_group.names.zip(matched_regex_group.captures).to_h
1029
- common_labels_candidates =
1030
- resource_labels_candidates.dup
1031
- group_resource_labels.merge!(
1073
+ matched_regexp_group.names.zip(matched_regexp_group.captures).to_h
1074
+ common_labels_candidates = resource_labels_candidates.dup
1075
+ resource.labels.merge!(
1032
1076
  delete_and_extract_labels(
1033
1077
  resource_labels_candidates,
1034
1078
  # The kubernetes_tag_regexp is poorly named. 'namespace_name' is
1035
1079
  # in fact 'namespace_id'. 'pod_name' is in fact 'pod_id'.
1036
1080
  # TODO(qingling128): Figure out how to put this map into
1037
- # constants like CONTAINER_CONSTANTS[:extra_resource_labels].
1081
+ # constants like GKE_CONSTANTS[:extra_resource_labels].
1038
1082
  'container_name' => 'container_name',
1039
1083
  'namespace_name' => 'namespace_id',
1040
1084
  'pod_name' => 'pod_id'))
1041
1085
 
1042
- group_common_labels.merge!(
1086
+ common_labels.merge!(
1043
1087
  delete_and_extract_labels(
1044
1088
  common_labels_candidates,
1045
- CONTAINER_CONSTANTS[:extra_common_labels]
1046
- .map { |l| [l, "#{CONTAINER_CONSTANTS[:service]}/#{l}"] }.to_h))
1089
+ GKE_CONSTANTS[:extra_common_labels]
1090
+ .map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h))
1047
1091
  end
1092
+
1093
+ # Docker container.
1094
+ # TODO(qingling128): Remove this logic once the resource is retrieved at a
1095
+ # proper time (b/65175256).
1096
+ when DOCKER_CONSTANTS[:resource_type]
1097
+ common_labels.delete("#{COMPUTE_CONSTANTS[:service]}/resource_name")
1048
1098
  end
1049
1099
 
1050
- [group_resource_labels, group_common_labels]
1100
+ resource.freeze
1101
+ resource.labels.freeze
1102
+ common_labels.freeze
1103
+
1104
+ [resource, common_labels]
1051
1105
  end
1052
1106
 
1053
- # Extract entry resource and common labels that should be applied to
1054
- # individual entries from the group resource.
1055
- def determine_entry_level_labels(group_resource, record)
1056
- resource_labels = {}
1057
- common_labels = {}
1107
+ # Extract entry level monitored resource and common labels that should be
1108
+ # applied to individual entries.
1109
+ def determine_entry_level_monitored_resource_and_labels(
1110
+ group_level_resource, group_level_common_labels, record)
1111
+ resource = group_level_resource.dup
1112
+ resource.labels = group_level_resource.labels.dup
1113
+ common_labels = group_level_common_labels.dup
1058
1114
 
1115
+ case resource.type
1059
1116
  # Cloud Functions.
1060
- if group_resource.type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
1061
- record.key?('log')
1062
- @cloudfunctions_log_match =
1063
- @compiled_cloudfunctions_log_regexp.match(record['log'])
1064
- common_labels['execution_id'] =
1065
- @cloudfunctions_log_match['execution_id'] if \
1066
- @cloudfunctions_log_match &&
1067
- @cloudfunctions_log_match['execution_id']
1068
- end
1117
+ when CLOUDFUNCTIONS_CONSTANTS[:resource_type]
1118
+ if record.key?('log')
1119
+ @cloudfunctions_log_match =
1120
+ @compiled_cloudfunctions_log_regexp.match(record['log'])
1121
+ common_labels['execution_id'] =
1122
+ @cloudfunctions_log_match['execution_id'] if
1123
+ @cloudfunctions_log_match &&
1124
+ @cloudfunctions_log_match['execution_id']
1125
+ end
1069
1126
 
1070
- # GKE containers.
1071
- if group_resource.type == CONTAINER_CONSTANTS[:resource_type]
1127
+ # GKE container.
1128
+ when GKE_CONSTANTS[:resource_type]
1072
1129
  # Move the stdout/stderr annotation from the record into a label.
1073
1130
  common_labels.merge!(
1074
1131
  delete_and_extract_labels(
1075
- record, 'stream' => "#{CONTAINER_CONSTANTS[:service]}/stream"))
1132
+ record, 'stream' => "#{GKE_CONSTANTS[:service]}/stream"))
1076
1133
 
1077
1134
  # If the record has been annotated by the kubernetes_metadata_filter
1078
1135
  # plugin, then use that metadata. Otherwise, rely on commonLabels
1079
- # populated at the grouped_entries level from the group's tag.
1136
+ # populated from the group's tag.
1080
1137
  if record.key?('kubernetes')
1081
- resource_labels.merge!(
1138
+ resource.labels.merge!(
1082
1139
  delete_and_extract_labels(
1083
- record['kubernetes'], CONTAINER_CONSTANTS[:extra_resource_labels]
1140
+ record['kubernetes'], GKE_CONSTANTS[:extra_resource_labels]
1084
1141
  .map { |l| [l, l] }.to_h))
1085
1142
  common_labels.merge!(
1086
1143
  delete_and_extract_labels(
1087
- record['kubernetes'], CONTAINER_CONSTANTS[:extra_common_labels]
1088
- .map { |l| [l, "#{CONTAINER_CONSTANTS[:service]}/#{l}"] }.to_h))
1144
+ record['kubernetes'], GKE_CONSTANTS[:extra_common_labels]
1145
+ .map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h))
1089
1146
  # Prepend label/ to all user-defined labels' keys.
1090
1147
  if record['kubernetes'].key?('labels')
1091
1148
  common_labels.merge!(
@@ -1111,14 +1168,56 @@ module Fluent
1111
1168
  # Report them as monitored resource labels instead of common labels.
1112
1169
  # e.g. "dataflow.googleapis.com/job_id" => "job_id"
1113
1170
  [DATAFLOW_CONSTANTS, ML_CONSTANTS].each do |service_constants|
1114
- next unless group_resource.type == service_constants[:resource_type]
1115
- resource_labels.merge!(
1171
+ next unless resource.type == service_constants[:resource_type]
1172
+ resource.labels.merge!(
1116
1173
  delete_and_extract_labels(
1117
1174
  common_labels, service_constants[:extra_common_labels]
1118
1175
  .map { |l| ["#{service_constants[:service]}/#{l}", l] }.to_h))
1119
1176
  end
1120
1177
 
1121
- [resource_labels, common_labels]
1178
+ [resource, common_labels]
1179
+ end
1180
+
1181
+ # Call Metadata Agent to get monitored resource information and parse
1182
+ # response to Google::Api::MonitoredResource.
1183
+ def query_metadata_agent_for_monitored_resource(local_resource_id)
1184
+ response = query_metadata_agent("monitoredResource/#{local_resource_id}")
1185
+ return nil if response.nil?
1186
+ begin
1187
+ resource = Google::Api::MonitoredResource.decode_json(response.to_json)
1188
+ rescue Google::Protobuf::ParseError, ArgumentError => e
1189
+ @log.error 'Error paring monitored resource from Metadata Agent. ' \
1190
+ "response: #{response.inspect}", error: e
1191
+ return nil
1192
+ end
1193
+
1194
+ # TODO(qingling128): Use Google::Api::MonitoredResource directly after we
1195
+ # upgrade gRPC version to include the fix for the protobuf map
1196
+ # corruption issue.
1197
+ Google::Apis::LoggingV2beta1::MonitoredResource.new(
1198
+ type: resource.type,
1199
+ labels: resource.labels.to_h
1200
+ )
1201
+ end
1202
+
1203
+ # Issue a request to the Metadata Agent's local API and parse the response
1204
+ # to JSON. Return nil in case of failure.
1205
+ def query_metadata_agent(path)
1206
+ url = "#{@metadata_agent_url}/#{path}"
1207
+ @log.debug("Calling Metadata Agent: #{url}")
1208
+ open(url) do |f|
1209
+ response = f.read
1210
+ parsed_hash = parse_json_or_nil(response)
1211
+ if parsed_hash.nil?
1212
+ @log.error 'Response from Metadata Agent is not in valid json ' \
1213
+ "format: '#{response.inspect}'."
1214
+ return nil
1215
+ end
1216
+ @log.debug "Response from Metadata Agent: #{parsed_hash}"
1217
+ return parsed_hash
1218
+ end
1219
+ rescue StandardError => e
1220
+ @log.error 'Error calling Metadata Agent.', error: e
1122
1221
  end
1123
1222
 
1124
1223
  # TODO: This functionality should eventually be available in another
@@ -1211,7 +1310,7 @@ module Fluent
1211
1310
  [ts_secs, ts_nanos]
1212
1311
  end
1213
1312
 
1214
- def compute_severity(resource_type, record, entry_common_labels)
1313
+ def compute_severity(resource_type, record, entry_level_common_labels)
1215
1314
  if resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type]
1216
1315
  if @cloudfunctions_log_match && @cloudfunctions_log_match['severity']
1217
1316
  return parse_severity(@cloudfunctions_log_match['severity'])
@@ -1226,9 +1325,8 @@ module Fluent
1226
1325
  end
1227
1326
  elsif record.key?('severity')
1228
1327
  return parse_severity(record.delete('severity'))
1229
- elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
1230
- entry_common_labels.key?("#{CONTAINER_CONSTANTS[:service]}/stream")
1231
- stream = entry_common_labels["#{CONTAINER_CONSTANTS[:service]}/stream"]
1328
+ elsif resource_type == GKE_CONSTANTS[:resource_type]
1329
+ stream = entry_level_common_labels["#{GKE_CONSTANTS[:service]}/stream"]
1232
1330
  if stream == 'stdout'
1233
1331
  return 'INFO'
1234
1332
  elsif stream == 'stderr'
@@ -1452,9 +1550,8 @@ module Fluent
1452
1550
  hash.nil? || !hash.is_a?(Hash)
1453
1551
  label_map.each_with_object({}) \
1454
1552
  do |(original_label, new_label), extracted_labels|
1455
- extracted_labels[new_label] =
1456
- convert_to_utf8(hash.delete(original_label).to_s) if
1457
- hash.key?(original_label)
1553
+ value = hash.delete(original_label)
1554
+ extracted_labels[new_label] = convert_to_utf8(value.to_s) if value
1458
1555
  end
1459
1556
  end
1460
1557
 
@@ -1520,7 +1617,8 @@ module Fluent
1520
1617
  text_payload = record['log']
1521
1618
  elsif is_json
1522
1619
  json_payload = record
1523
- elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
1620
+ elsif [GKE_CONSTANTS[:resource_type],
1621
+ DOCKER_CONSTANTS[:resource_type]].include?(resource_type) &&
1524
1622
  record.key?('log')
1525
1623
  text_payload = record['log']
1526
1624
  elsif record.size == 1 && record.key?('message')
@@ -1550,7 +1648,7 @@ module Fluent
1550
1648
  elsif resource.type == APPENGINE_CONSTANTS[:resource_type]
1551
1649
  # Add a prefix to Managed VM logs to prevent namespace collisions.
1552
1650
  tag = "#{APPENGINE_CONSTANTS[:service]}/#{tag}"
1553
- elsif resource.type == CONTAINER_CONSTANTS[:resource_type]
1651
+ elsif resource.type == GKE_CONSTANTS[:resource_type]
1554
1652
  # For Kubernetes logs, use just the container name as the log name
1555
1653
  # if we have it.
1556
1654
  if resource.labels && resource.labels.key?('container_name')