fluent-plugin-google-cloud 0.6.6 → 0.6.7.pre.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1dc5ad48b790072401a4ed3d0b48e02e57a2a697
4
- data.tar.gz: e11801bebd08a12a47f7e84332fc72db19463772
3
+ metadata.gz: 4356aeba5dd3cdab64c30787a1dbc798f34f5182
4
+ data.tar.gz: 81ffb3cb3017fa43d42e3a45fce18cf8ddada913
5
5
  SHA512:
6
- metadata.gz: 2340a5b449123bbb88ba39ac155856f3561820e9a8468593aae5cc81a1b871b4602ce8a0143a7e3c909f10cedcf0515e5b297959da92ab23a3e0046025694026
7
- data.tar.gz: b66cc54e2372fbf79c2a2fd20828417a9dfef516e5ff5dce16b9b7af66644c7bf9a283dbcd17c9b1d54cfebaa5eb989695608f429db0f1db6869dfe385b00ec0
6
+ metadata.gz: faf46b39db9539a778d3c5144d8382e899e009ef17029334817959d316c979ab1419b91be0dcc61d146e94baf41e43b7a9e606699ac6bea1e595ade86dcd9595
7
+ data.tar.gz: 10f148519e9ff916b914e3e24020331d702f93878f54314476f907a63c0db081aee4ceda2f3b40d190a48e5b74cc58d314b62fd4d2901110613ae604850b1a1a
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fluent-plugin-google-cloud (0.6.6)
4
+ fluent-plugin-google-cloud (0.6.7.pre.1)
5
5
  fluentd (~> 0.10)
6
6
  google-api-client (~> 0.9.0)
7
7
  google-cloud-logging (= 0.24.1)
@@ -55,7 +55,7 @@ GEM
55
55
  googleauth (~> 0.5.1)
56
56
  grpc (~> 1.0)
57
57
  rly (~> 0.2.3)
58
- google-protobuf (3.4.0.2)
58
+ google-protobuf (3.4.0.2-x86_64-linux)
59
59
  googleapis-common-protos (1.3.5)
60
60
  google-protobuf (~> 3.2)
61
61
  grpc (~> 1.0)
@@ -67,7 +67,7 @@ GEM
67
67
  multi_json (~> 1.11)
68
68
  os (~> 0.9)
69
69
  signet (~> 0.7)
70
- grpc (1.2.5)
70
+ grpc (1.2.5-x86_64-linux)
71
71
  google-protobuf (~> 3.1)
72
72
  googleauth (~> 0.5.1)
73
73
  hashdiff (0.3.6)
@@ -88,7 +88,7 @@ GEM
88
88
  mocha (1.3.0)
89
89
  metaclass (~> 0.0.1)
90
90
  msgpack (1.1.0)
91
- multi_json (1.12.1)
91
+ multi_json (1.12.2)
92
92
  multipart-post (2.0.0)
93
93
  os (0.9.6)
94
94
  parser (2.4.0.0)
@@ -10,7 +10,7 @@ eos
10
10
  gem.homepage = \
11
11
  'https://github.com/GoogleCloudPlatform/fluent-plugin-google-cloud'
12
12
  gem.license = 'Apache-2.0'
13
- gem.version = '0.6.6'
13
+ gem.version = '0.6.7.pre.1'
14
14
  gem.authors = ['Todd Derr', 'Alex Robinson']
15
15
  gem.email = ['salty@google.com']
16
16
  gem.required_ruby_version = Gem::Requirement.new('>= 2.0')
@@ -38,12 +38,12 @@ end
38
38
  module Fluent
39
39
  # fluentd output plugin for the Stackdriver Logging API
40
40
  class GoogleCloudOutput < BufferedOutput
41
- # Constants for service names and resource types.
42
- module Constants
41
+ # Constants for service names, resource types and etc.
42
+ module ServiceConstants
43
43
  APPENGINE_CONSTANTS = {
44
44
  service: 'appengine.googleapis.com',
45
45
  resource_type: 'gae_app',
46
- metadata_attributes: %w(gae_backend_name gae_backend_version).to_set
46
+ metadata_attributes: %w(gae_backend_name gae_backend_version)
47
47
  }
48
48
  CLOUDFUNCTIONS_CONSTANTS = {
49
49
  service: 'cloudfunctions.googleapis.com',
@@ -53,12 +53,16 @@ module Fluent
53
53
  service: 'compute.googleapis.com',
54
54
  resource_type: 'gce_instance'
55
55
  }
56
- CONTAINER_CONSTANTS = {
56
+ GKE_CONSTANTS = {
57
57
  service: 'container.googleapis.com',
58
58
  resource_type: 'container',
59
59
  extra_resource_labels: %w(namespace_id pod_id container_name),
60
60
  extra_common_labels: %w(namespace_name pod_name),
61
- metadata_attributes: %w(kube-env).to_set
61
+ metadata_attributes: %w(kube-env)
62
+ }
63
+ DOCKER_CONSTANTS = {
64
+ service: 'docker.googleapis.com',
65
+ resource_type: 'docker_container'
62
66
  }
63
67
  DATAFLOW_CONSTANTS = {
64
68
  service: 'dataflow.googleapis.com',
@@ -68,8 +72,7 @@ module Fluent
68
72
  DATAPROC_CONSTANTS = {
69
73
  service: 'cluster.dataproc.googleapis.com',
70
74
  resource_type: 'cloud_dataproc_cluster',
71
- metadata_attributes:
72
- %w(dataproc-cluster-uuid dataproc-cluster-name).to_set
75
+ metadata_attributes: %w(dataproc-cluster-uuid dataproc-cluster-name)
73
76
  }
74
77
  EC2_CONSTANTS = {
75
78
  service: 'ec2.amazonaws.com',
@@ -83,7 +86,7 @@ module Fluent
83
86
 
84
87
  # The map between a subservice name and a resource type.
85
88
  SUBSERVICE_MAP = \
86
- [APPENGINE_CONSTANTS, CONTAINER_CONSTANTS, DATAFLOW_CONSTANTS,
89
+ [APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAFLOW_CONSTANTS,
87
90
  DATAPROC_CONSTANTS, ML_CONSTANTS]
88
91
  .map { |consts| [consts[:service], consts[:resource_type]] }.to_h
89
92
  # Default back to GCE if invalid value is detected.
@@ -91,18 +94,35 @@ module Fluent
91
94
 
92
95
  # The map between a resource type and expected subservice attributes.
93
96
  SUBSERVICE_METADATA_ATTRIBUTES = \
94
- [APPENGINE_CONSTANTS, CONTAINER_CONSTANTS, DATAPROC_CONSTANTS]
95
- .map { |consts| [consts[:resource_type], consts[:metadata_attributes]] }
96
- .to_h
97
+ [APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAPROC_CONSTANTS]
98
+ .map do |consts|
99
+ [consts[:resource_type], consts[:metadata_attributes].to_set]
100
+ end.to_h
101
+ end
97
102
 
98
- # Default values for JSON payload keys to set the "trace",
99
- # "sourceLocation", "operation" and "labels" fields in the LogEntry.
100
- DEFAULT_PAYLOAD_KEY_PREFIX = 'logging.googleapis.com'
103
+ # Constants for configuration.
104
+ module ConfigConstants
105
+ # Default values for JSON payload keys to set the "httpRequest",
106
+ # "operation", "sourceLocation", "trace" fields in the LogEntry.
101
107
  DEFAULT_HTTP_REQUEST_KEY = 'httpRequest'
102
- DEFAULT_OPERATION_KEY = "#{DEFAULT_PAYLOAD_KEY_PREFIX}/operation"
103
- DEFAULT_SOURCE_LOCATION_KEY =
104
- "#{DEFAULT_PAYLOAD_KEY_PREFIX}/sourceLocation"
105
- DEFAULT_TRACE_KEY = "#{DEFAULT_PAYLOAD_KEY_PREFIX}/trace"
108
+ DEFAULT_OPERATION_KEY = 'logging.googleapis.com/operation'
109
+ DEFAULT_SOURCE_LOCATION_KEY = 'logging.googleapis.com/sourceLocation'
110
+ DEFAULT_TRACE_KEY = 'logging.googleapis.com/trace'
111
+
112
+ DEFAULT_METADATA_AGENT_URL =
113
+ 'http://local-metadata-agent.stackdriver.com:8000'
114
+ end
115
+
116
+ # Constants for log entry field extraction.
117
+ module InternalConstants
118
+ # Use empty string as request path when the local_resource_id of monitored
119
+ # resource can be implicitly inferred by Metadata Agent.
120
+ IMPLICIT_LOCAL_RESOURCE_ID = ''
121
+
122
+ # The label name of local_resource_id in the json payload. When a record
123
+ # has this field in the payload, we will use the value to retrieve
124
+ # monitored resource from Stackdriver Metadata agent.
125
+ LOCAL_RESOURCE_ID_KEY = 'logging.googleapis.com/local_resource_id'
106
126
 
107
127
  # Map from each field name under LogEntry to corresponding variables
108
128
  # required to perform field value extraction from the log record.
@@ -155,12 +175,14 @@ module Fluent
155
175
  }
156
176
  end
157
177
 
158
- include self::Constants
178
+ include self::ServiceConstants
179
+ include self::ConfigConstants
180
+ include self::InternalConstants
159
181
 
160
182
  Fluent::Plugin.register_output('google_cloud', self)
161
183
 
162
184
  PLUGIN_NAME = 'Fluentd Google Cloud Logging plugin'
163
- PLUGIN_VERSION = '0.6.6'
185
+ PLUGIN_VERSION = '0.6.7.pre.1'
164
186
 
165
187
  # Name of the the Google cloud logging write scope.
166
188
  LOGGING_SCOPE = 'https://www.googleapis.com/auth/logging.write'
@@ -198,6 +220,11 @@ module Fluent
198
220
  DEFAULT_SOURCE_LOCATION_KEY
199
221
  config_param :trace_key, :string, :default => DEFAULT_TRACE_KEY
200
222
 
223
+ # Whether to try to detect if the record is a text log entry with JSON
224
+ # content that needs to be parsed.
225
+ config_param :detect_json, :bool, :default => false
226
+ # TODO(igorpeshansky): Add a parameter for the text field in the payload.
227
+
201
228
  # Whether to try to detect if the VM is owned by a "subservice" such as App
202
229
  # Engine of Kubernetes, rather than just associating the logs with the
203
230
  # compute service of the platform. This currently only has any effect when
@@ -290,6 +317,11 @@ module Fluent
290
317
  config_param :monitoring_type, :string,
291
318
  :default => Monitoring::PrometheusMonitoringRegistry.name
292
319
 
320
+ # Whether to call metadata agent to retrieve monitored resource.
321
+ config_param :enable_metadata_agent, :bool, :default => false
322
+ config_param :metadata_agent_url, :string,
323
+ :default => DEFAULT_METADATA_AGENT_URL
324
+
293
325
  # rubocop:enable Style/HashSyntax
294
326
 
295
327
  # TODO: Add a log_name config option rather than just using the tag?
@@ -349,28 +381,30 @@ module Fluent
349
381
 
350
382
  @platform = detect_platform
351
383
 
352
- # Set required variables: @project_id, @vm_id, @vm_name and @zone by
353
- # making some requests to metadata server.
354
- #
355
- # Note: Once we support metadata injection at Logging API side, we might
356
- # no longer need to require all these metadata in logging agent. But for
357
- # now, they are still required.
358
- #
359
- # TODO(qingling128): After Metadata Agent support is added, try extracting
360
- # these info from responses from Metadata Agent first.
384
+ # Set agent-level monitored resource. This monitored resource is initiated
385
+ # as the logging agent starts up. It will be inherited by all log entries
386
+ # processed by this agent. First try to retrieve it via Metadata Agent.
387
+ if @enable_metadata_agent
388
+ # The local_resource_id for this should be the instance id. Since this
389
+ # can be implicitly inferred by Metadata Agent, we do not need to
390
+ # explicitly send the key.
391
+ @resource = call_metadata_agent_for_monitored_resource(
392
+ IMPLICIT_LOCAL_RESOURCE_ID)
393
+ end
394
+
395
+ # Set required variables: @project_id, @vm_id, @vm_name and @zone.
361
396
  set_required_metadata_variables
362
397
 
363
398
  # Retrieve monitored resource.
364
- #
365
- # TODO(qingling128): After Metadata Agent support is added, try retrieving
366
- # the monitored resource from Metadata Agent first.
367
- @resource = determine_agent_level_monitored_resource_via_legacy
399
+ # Fail over to retrieve monitored resource via the legacy path if we fail
400
+ # to get it from Metadata Agent.
401
+ @resource ||= determine_agent_level_monitored_resource_via_legacy
368
402
 
369
403
  # Set regexp that we should match tags against later on. Using a list
370
404
  # instead of a map to ensure order. For example, tags will be matched
371
405
  # against Cloud Functions first, then GKE.
372
406
  @tag_regexp_list = []
373
- if @resource.type == CONTAINER_CONSTANTS[:resource_type]
407
+ if @resource.type == GKE_CONSTANTS[:resource_type]
374
408
  # We only support Cloud Functions logs for GKE right now.
375
409
  if fetch_gce_metadata('instance/attributes/'
376
410
  ).split.include?('gcf_region')
@@ -383,7 +417,7 @@ module Fluent
383
417
  ]
384
418
  end
385
419
  @tag_regexp_list << [
386
- CONTAINER_CONSTANTS[:resource_type], @compiled_kubernetes_tag_regexp
420
+ GKE_CONSTANTS[:resource_type], @compiled_kubernetes_tag_regexp
387
421
  ]
388
422
  end
389
423
 
@@ -399,7 +433,7 @@ module Fluent
399
433
 
400
434
  # Log an informational message containing the Logs viewer URL
401
435
  @log.info 'Logs viewer address: https://console.cloud.google.com/logs/',
402
- "viewer?project=#{@project_id}&resource=#{@resource_type}/",
436
+ "viewer?project=#{@project_id}&resource=#{@resource.type}/",
403
437
  "instance_id/#{@vm_id}"
404
438
  end
405
439
 
@@ -415,62 +449,52 @@ module Fluent
415
449
  end
416
450
 
417
451
  def write(chunk)
418
- # Group the entries since we have to make one call per tag.
419
- grouped_entries = {}
420
- chunk.msgpack_each do |tag, *arr|
421
- sanitized_tag = sanitize_tag(tag)
422
- if sanitized_tag.nil?
423
- @log.warn "Dropping log entries with invalid tag: '#{tag}'. " \
424
- 'A tag should be a string with utf8 characters.'
425
- next
426
- end
427
- grouped_entries[sanitized_tag] ||= []
428
- grouped_entries[sanitized_tag].push(arr)
429
- end
452
+ grouped_entries = group_log_entries_by_tag_and_local_resource_id(chunk)
430
453
 
431
- grouped_entries.each do |tag, arr|
454
+ grouped_entries.each do |(tag, local_resource_id), arr|
432
455
  entries = []
433
- group_resource, group_common_labels =
434
- determine_group_level_monitored_resource_and_labels(tag)
456
+ group_level_resource, group_level_common_labels =
457
+ determine_group_level_monitored_resource_and_labels(
458
+ tag, local_resource_id)
435
459
 
436
460
  arr.each do |time, record|
437
- next unless record.is_a?(Hash)
438
-
439
- extracted_resource_labels, extracted_common_labels = \
440
- determine_entry_level_labels(group_resource, record)
441
- entry_resource = group_resource.dup
442
- entry_resource.labels.merge!(extracted_resource_labels)
443
- entry_common_labels = \
444
- group_common_labels.merge(extracted_common_labels)
445
-
446
- if entry_resource.type == CONTAINER_CONSTANTS[:resource_type]
447
- # Save the timestamp if available, then clear it out to allow for
448
- # determining whether we should parse the log or message field.
449
- timestamp = record.key?('time') ? record['time'] : nil
450
- record.delete('time')
461
+ entry_level_resource, entry_level_common_labels =
462
+ determine_entry_level_monitored_resource_and_labels(
463
+ group_level_resource, group_level_common_labels, record)
464
+
465
+ is_json = false
466
+ if @detect_json
467
+ # Save the timestamp and severity if available, then clear it out to
468
+ # allow for determining whether we should parse the log or message
469
+ # field.
470
+ timestamp = record.delete('time')
471
+ severity = record.delete('severity')
472
+
451
473
  # If the log is json, we want to export it as a structured log
452
474
  # unless there is additional metadata that would be lost.
453
- is_json = false
454
- if record.length == 1 && record.key?('log')
455
- record_json = parse_json_or_nil(record['log'])
456
- end
457
- if record.length == 1 && record.key?('message')
458
- record_json = parse_json_or_nil(record['message'])
475
+ record_json = nil
476
+ if record.length == 1
477
+ %w(log message msg).each do |field|
478
+ if record.key?(field)
479
+ record_json = parse_json_or_nil(record[field])
480
+ end
481
+ end
459
482
  end
460
483
  unless record_json.nil?
461
484
  record = record_json
462
485
  is_json = true
463
486
  end
464
- # Restore timestamp if necessary.
465
- unless record.key?('time') || timestamp.nil?
466
- record['time'] = timestamp
467
- end
487
+ # Restore timestamp and severity if necessary. Note that the nested
488
+ # json might also has 'time' and 'severity' fields. If that is the
489
+ # case, we do not want to override the value.
490
+ record['time'] ||= timestamp if timestamp
491
+ record['severity'] ||= severity if severity
468
492
  end
469
493
 
470
494
  ts_secs, ts_nanos = compute_timestamp(
471
- entry_resource.type, record, time)
495
+ entry_level_resource.type, record, time)
472
496
  severity = compute_severity(
473
- entry_resource.type, record, entry_common_labels)
497
+ entry_level_resource.type, record, entry_level_common_labels)
474
498
 
475
499
  ts_secs = begin
476
500
  Integer ts_secs
@@ -482,12 +506,13 @@ module Fluent
482
506
  rescue ArgumentError, TypeError
483
507
  ts_nanos
484
508
  end
509
+
485
510
  if @use_grpc
486
511
  entry = Google::Logging::V2::LogEntry.new(
487
- labels: entry_common_labels,
512
+ labels: entry_level_common_labels,
488
513
  resource: Google::Api::MonitoredResource.new(
489
- type: entry_resource.type,
490
- labels: entry_resource.labels.to_h
514
+ type: entry_level_resource.type,
515
+ labels: entry_level_resource.labels.to_h
491
516
  ),
492
517
  severity: grpc_severity(severity)
493
518
  )
@@ -504,10 +529,11 @@ module Fluent
504
529
  end
505
530
  else
506
531
  # Remove the labels if we didn't populate them with anything.
507
- entry_resource.labels = nil if entry_resource.labels.empty?
532
+ entry_level_resource.labels = nil if
533
+ entry_level_resource.labels.empty?
508
534
  entry = Google::Apis::LoggingV2beta1::LogEntry.new(
509
- labels: entry_common_labels,
510
- resource: entry_resource,
535
+ labels: entry_level_common_labels,
536
+ resource: entry_level_resource,
511
537
  severity: severity,
512
538
  timestamp: {
513
539
  seconds: ts_secs,
@@ -516,17 +542,12 @@ module Fluent
516
542
  )
517
543
  end
518
544
 
519
- # Get fully-qualified trace id for LogEntry "trace" field per config.
545
+ # Get fully-qualified trace id for LogEntry "trace" field.
520
546
  fq_trace_id = record.delete(@trace_key)
521
547
  entry.trace = fq_trace_id if fq_trace_id
522
548
 
523
549
  set_log_entry_fields(record, entry)
524
-
525
- if @use_grpc
526
- set_payload_grpc(entry_resource.type, record, entry, is_json)
527
- else
528
- set_payload(entry_resource.type, record, entry, is_json)
529
- end
550
+ set_payload(entry_level_resource.type, record, entry, is_json)
530
551
 
531
552
  entries.push(entry)
532
553
  end
@@ -534,21 +555,21 @@ module Fluent
534
555
  next if entries.empty?
535
556
 
536
557
  log_name = "projects/#{@project_id}/logs/#{log_name(
537
- tag, group_resource)}"
558
+ tag, group_level_resource)}"
538
559
 
539
560
  # Does the actual write to the cloud logging api.
540
561
  client = api_client
541
562
  if @use_grpc
542
563
  begin
543
- labels_utf8_pairs = group_common_labels.map do |k, v|
564
+ labels_utf8_pairs = group_level_common_labels.map do |k, v|
544
565
  [k.encode('utf-8'), convert_to_utf8(v)]
545
566
  end
546
567
 
547
568
  write_request = Google::Logging::V2::WriteLogEntriesRequest.new(
548
569
  log_name: log_name,
549
570
  resource: Google::Api::MonitoredResource.new(
550
- type: group_resource.type,
551
- labels: group_resource.labels.to_h
571
+ type: group_level_resource.type,
572
+ labels: group_level_resource.labels.to_h
552
573
  ),
553
574
  labels: labels_utf8_pairs.to_h,
554
575
  entries: entries
@@ -558,8 +579,8 @@ module Fluent
558
579
  increment_successful_requests_count
559
580
  increment_ingested_entries_count(entries.length)
560
581
 
561
- # Let the user explicitly know when the first call succeeded,
562
- # to aid with verification and troubleshooting.
582
+ # Let the user explicitly know when the first call succeeded, to aid
583
+ # with verification and troubleshooting.
563
584
  unless @successful_call
564
585
  @successful_call = true
565
586
  @log.info 'Successfully sent gRPC to Stackdriver Logging API.'
@@ -598,8 +619,8 @@ module Fluent
598
619
  @log.warn "Dropping #{dropped} log message(s)",
599
620
  error: error.to_s, error_code: error.code.to_s
600
621
  else
601
- # Assume this is a problem with the request itself
602
- # and don't retry.
622
+ # Assume this is a problem with the request itself and don't
623
+ # retry.
603
624
  dropped = entries.length
604
625
  increment_dropped_entries_count(dropped)
605
626
  @log.error "Unknown response code #{error.code} from the "\
@@ -612,8 +633,8 @@ module Fluent
612
633
  write_request = \
613
634
  Google::Apis::LoggingV2beta1::WriteLogEntriesRequest.new(
614
635
  log_name: log_name,
615
- resource: group_resource,
616
- labels: group_common_labels,
636
+ resource: group_level_resource,
637
+ labels: group_level_common_labels,
617
638
  entries: entries)
618
639
 
619
640
  # TODO: RequestOptions
@@ -626,8 +647,8 @@ module Fluent
626
647
  increment_successful_requests_count
627
648
  increment_ingested_entries_count(entries.length)
628
649
 
629
- # Let the user explicitly know when the first call succeeded,
630
- # to aid with verification and troubleshooting.
650
+ # Let the user explicitly know when the first call succeeded, to aid
651
+ # with verification and troubleshooting.
631
652
  unless @successful_call
632
653
  @successful_call = true
633
654
  @log.info 'Successfully sent to Stackdriver Logging API.'
@@ -818,8 +839,9 @@ module Fluent
818
839
 
819
840
  # Retrieve monitored resource via the legacy way.
820
841
  #
821
- # TODO(qingling128): Use this as only a fallback plan after Metadata Agent
822
- # support is added.
842
+ # Note: This is just a failover plan if we fail to get metadata from
843
+ # Metadata Agent. Thus it should be equivalent to what Metadata Agent
844
+ # returns.
823
845
  def determine_agent_level_monitored_resource_via_legacy
824
846
  resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
825
847
  labels: {})
@@ -881,7 +903,7 @@ module Fluent
881
903
  }
882
904
 
883
905
  # GKE container.
884
- when CONTAINER_CONSTANTS[:resource_type]
906
+ when GKE_CONSTANTS[:resource_type]
885
907
  raw_kube_env = fetch_gce_metadata('instance/attributes/kube-env')
886
908
  kube_env = YAML.load(raw_kube_env)
887
909
  return {
@@ -916,7 +938,7 @@ module Fluent
916
938
  rescue StandardError => e
917
939
  @log.error "Failed to set monitored resource labels for #{type}: ",
918
940
  error: e
919
- return {}
941
+ {}
920
942
  end
921
943
 
922
944
  # Determine the common labels that should be added to all log entries
@@ -940,7 +962,7 @@ module Fluent
940
962
 
941
963
  # GCE instance and GKE container.
942
964
  when COMPUTE_CONSTANTS[:resource_type],
943
- CONTAINER_CONSTANTS[:resource_type]
965
+ GKE_CONSTANTS[:resource_type]
944
966
  labels.merge!(
945
967
  "#{COMPUTE_CONSTANTS[:service]}/resource_name" => @vm_name)
946
968
 
@@ -952,138 +974,175 @@ module Fluent
952
974
  labels
953
975
  end
954
976
 
955
- # Determine the group level monitored resource and common labels shared by a
956
- # collection of entries.
957
- def determine_group_level_monitored_resource_and_labels(tag)
958
- # Determine group level monitored resource type. For certain types,
959
- # extract useful info from the tag and store those in
960
- # matched_regex_group.
961
- group_resource_type, matched_regex_group =
962
- determine_group_level_monitored_resource_type(tag)
963
-
964
- # Determine group level monitored resource labels and common labels.
965
- group_resource_labels, group_common_labels =
966
- determine_group_level_labels(group_resource_type, matched_regex_group)
967
-
968
- group_resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
969
- type: group_resource_type,
970
- labels: group_resource_labels.to_h
971
- )
972
-
973
- # Freeze the per-request state. Any further changes must be made on a
974
- # per-entry basis.
975
- group_resource.freeze
976
- group_resource.labels.freeze
977
- group_common_labels.freeze
978
-
979
- [group_resource, group_common_labels]
977
+ # Group the log entries by tag and local_resource_id pairs.
978
+ def group_log_entries_by_tag_and_local_resource_id(chunk)
979
+ groups = {}
980
+ chunk.msgpack_each do |tag, time, record|
981
+ unless record.is_a?(Hash)
982
+ @log.warn 'Dropping log entries with malformed record: ' \
983
+ "'#{record.inspect}'. " \
984
+ 'A log record should be in JSON format.'
985
+ next
986
+ end
987
+ sanitized_tag = sanitize_tag(tag)
988
+ if sanitized_tag.nil?
989
+ @log.warn "Dropping log entries with invalid tag: '#{tag.inspect}'." \
990
+ ' A tag should be a string with utf8 characters.'
991
+ next
992
+ end
993
+ local_resource_id = record.delete(LOCAL_RESOURCE_ID_KEY)
994
+ # A nil local_resource_id means "fall back to legacy".
995
+ hash_key = [sanitized_tag, local_resource_id].freeze
996
+ groups[hash_key] ||= []
997
+ groups[hash_key].push([time, record])
998
+ end
999
+ groups
980
1000
  end
981
1001
 
982
- # Determine group level monitored resource type shared by a collection of
983
- # entries.
984
- # Return the resource type and tag regexp matched groups. The matched groups
985
- # only apply to some resource types. Return nil if not applicable or if
986
- # there is no match.
987
- def determine_group_level_monitored_resource_type(tag)
1002
+ # Determine the group level monitored resource and common labels shared by a
1003
+ # collection of entries.
1004
+ def determine_group_level_monitored_resource_and_labels(tag,
1005
+ local_resource_id)
1006
+ resource = @resource.dup
1007
+ resource.labels = @resource.labels.dup
1008
+ common_labels = @common_labels.dup
1009
+
1010
+ # Change the resource type and set matched_regexp_group if the tag matches
1011
+ # certain regexp.
1012
+ matched_regexp_group = nil # @tag_regexp_list can be an empty list.
988
1013
  @tag_regexp_list.each do |derived_type, tag_regexp|
989
- matched_regex_group = tag_regexp.match(tag)
990
- return [derived_type, matched_regex_group] if
991
- matched_regex_group
1014
+ matched_regexp_group = tag_regexp.match(tag)
1015
+ if matched_regexp_group
1016
+ resource.type = derived_type
1017
+ break
1018
+ end
992
1019
  end
993
- [@resource.type, nil]
994
- end
995
1020
 
996
- # Determine group level monitored resource labels and common labels. These
997
- # labels will be shared by a collection of entries.
998
- def determine_group_level_labels(group_resource_type, matched_regex_group)
999
- group_resource_labels = @resource.labels.dup
1000
- group_common_labels = @common_labels.dup
1021
+ # Determine the monitored resource based on the local_resource_id.
1022
+ # Different monitored resource types have unique ids in different format.
1023
+ # We will query Metadata Agent for the monitored resource. Return the
1024
+ # legacy monitored resource (either the instance resource or the resource
1025
+ # inferred from the tag) if failed to get a monitored resource from
1026
+ # Metadata Agent with this key.
1027
+ #
1028
+ # Docker container:
1029
+ # "container.<container_id>"
1030
+ # "containerName.<container_name>"
1031
+ # GKE container:
1032
+ # "gke_containerName.<namespace_id>.<pod_name>.<container_name>"
1033
+ if @enable_metadata_agent && local_resource_id
1034
+ @log.debug 'Calling metadata agent with local_resource_id: ' \
1035
+ "#{local_resource_id}."
1036
+ retrieved_resource = call_metadata_agent_for_monitored_resource(
1037
+ local_resource_id)
1038
+ @log.debug 'Retrieved monitored resource from metadata agent: ' \
1039
+ "#{retrieved_resource.inspect}."
1040
+ unless retrieved_resource.nil?
1041
+ # TODO(qingling128): Fix this temporary renaming from 'gke_container'
1042
+ # to 'container'.
1043
+ retrieved_resource.type = 'container' if
1044
+ retrieved_resource.type == 'gke_container'
1045
+ resource = retrieved_resource
1046
+ end
1047
+ end
1001
1048
 
1002
- case group_resource_type
1049
+ # Once the resource type is settled down, determine the labels.
1050
+ case resource.type
1003
1051
  # Cloud Functions.
1004
1052
  when CLOUDFUNCTIONS_CONSTANTS[:resource_type]
1005
- group_resource_labels.merge!(
1053
+ resource.labels.merge!(
1006
1054
  'region' => @gcf_region,
1007
1055
  'function_name' => decode_cloudfunctions_function_name(
1008
- matched_regex_group['encoded_function_name'])
1056
+ matched_regexp_group['encoded_function_name'])
1009
1057
  )
1010
-
1011
- instance_id = group_resource_labels.delete('instance_id')
1012
- group_common_labels.merge!(
1013
- "#{CONTAINER_CONSTANTS[:service]}/instance_id" => instance_id,
1058
+ instance_id = resource.labels.delete('instance_id')
1059
+ common_labels.merge!(
1060
+ "#{GKE_CONSTANTS[:service]}/instance_id" => instance_id,
1014
1061
  "#{COMPUTE_CONSTANTS[:service]}/resource_id" => instance_id,
1015
- "#{CONTAINER_CONSTANTS[:service]}/cluster_name" =>
1016
- group_resource_labels.delete('cluster_name'),
1062
+ "#{GKE_CONSTANTS[:service]}/cluster_name" =>
1063
+ resource.labels.delete('cluster_name'),
1017
1064
  "#{COMPUTE_CONSTANTS[:service]}/zone" =>
1018
- group_resource_labels.delete('zone')
1065
+ resource.labels.delete('zone')
1019
1066
  )
1020
1067
 
1021
1068
  # GKE container.
1022
- when CONTAINER_CONSTANTS[:resource_type]
1023
- if matched_regex_group
1069
+ when GKE_CONSTANTS[:resource_type]
1070
+ if matched_regexp_group
1024
1071
  # We only expect one occurrence of each key in the match group.
1025
1072
  resource_labels_candidates =
1026
- matched_regex_group.names.zip(matched_regex_group.captures).to_h
1027
- common_labels_candidates =
1028
- resource_labels_candidates.dup
1029
- group_resource_labels.merge!(
1073
+ matched_regexp_group.names.zip(matched_regexp_group.captures).to_h
1074
+ common_labels_candidates = resource_labels_candidates.dup
1075
+ resource.labels.merge!(
1030
1076
  delete_and_extract_labels(
1031
1077
  resource_labels_candidates,
1032
1078
  # The kubernetes_tag_regexp is poorly named. 'namespace_name' is
1033
1079
  # in fact 'namespace_id'. 'pod_name' is in fact 'pod_id'.
1034
1080
  # TODO(qingling128): Figure out how to put this map into
1035
- # constants like CONTAINER_CONSTANTS[:extra_resource_labels].
1081
+ # constants like GKE_CONSTANTS[:extra_resource_labels].
1036
1082
  'container_name' => 'container_name',
1037
1083
  'namespace_name' => 'namespace_id',
1038
1084
  'pod_name' => 'pod_id'))
1039
1085
 
1040
- group_common_labels.merge!(
1086
+ common_labels.merge!(
1041
1087
  delete_and_extract_labels(
1042
1088
  common_labels_candidates,
1043
- CONTAINER_CONSTANTS[:extra_common_labels]
1044
- .map { |l| [l, "#{CONTAINER_CONSTANTS[:service]}/#{l}"] }.to_h))
1089
+ GKE_CONSTANTS[:extra_common_labels]
1090
+ .map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h))
1045
1091
  end
1092
+
1093
+ # Docker container.
1094
+ # TODO(qingling128): Remove this logic once the resource is retrieved at a
1095
+ # proper time (b/65175256).
1096
+ when DOCKER_CONSTANTS[:resource_type]
1097
+ common_labels.delete("#{COMPUTE_CONSTANTS[:service]}/resource_name")
1046
1098
  end
1047
1099
 
1048
- [group_resource_labels, group_common_labels]
1100
+ resource.freeze
1101
+ resource.labels.freeze
1102
+ common_labels.freeze
1103
+
1104
+ [resource, common_labels]
1049
1105
  end
1050
1106
 
1051
- # Extract entry resource and common labels that should be applied to
1052
- # individual entries from the group resource.
1053
- def determine_entry_level_labels(group_resource, record)
1054
- resource_labels = {}
1055
- common_labels = {}
1107
+ # Extract entry level monitored resource and common labels that should be
1108
+ # applied to individual entries.
1109
+ def determine_entry_level_monitored_resource_and_labels(
1110
+ group_level_resource, group_level_common_labels, record)
1111
+ resource = group_level_resource.dup
1112
+ resource.labels = group_level_resource.labels.dup
1113
+ common_labels = group_level_common_labels.dup
1056
1114
 
1115
+ case resource.type
1057
1116
  # Cloud Functions.
1058
- if group_resource.type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
1059
- record.key?('log')
1060
- @cloudfunctions_log_match =
1061
- @compiled_cloudfunctions_log_regexp.match(record['log'])
1062
- common_labels['execution_id'] =
1063
- @cloudfunctions_log_match['execution_id'] if \
1064
- @cloudfunctions_log_match &&
1065
- @cloudfunctions_log_match['execution_id']
1066
- end
1117
+ when CLOUDFUNCTIONS_CONSTANTS[:resource_type]
1118
+ if record.key?('log')
1119
+ @cloudfunctions_log_match =
1120
+ @compiled_cloudfunctions_log_regexp.match(record['log'])
1121
+ common_labels['execution_id'] =
1122
+ @cloudfunctions_log_match['execution_id'] if
1123
+ @cloudfunctions_log_match &&
1124
+ @cloudfunctions_log_match['execution_id']
1125
+ end
1067
1126
 
1068
- # GKE containers.
1069
- if group_resource.type == CONTAINER_CONSTANTS[:resource_type]
1127
+ # GKE container.
1128
+ when GKE_CONSTANTS[:resource_type]
1070
1129
  # Move the stdout/stderr annotation from the record into a label.
1071
1130
  common_labels.merge!(
1072
1131
  delete_and_extract_labels(
1073
- record, 'stream' => "#{CONTAINER_CONSTANTS[:service]}/stream"))
1132
+ record, 'stream' => "#{GKE_CONSTANTS[:service]}/stream"))
1074
1133
 
1075
1134
  # If the record has been annotated by the kubernetes_metadata_filter
1076
1135
  # plugin, then use that metadata. Otherwise, rely on commonLabels
1077
- # populated at the grouped_entries level from the group's tag.
1136
+ # populated from the group's tag.
1078
1137
  if record.key?('kubernetes')
1079
- resource_labels.merge!(
1138
+ resource.labels.merge!(
1080
1139
  delete_and_extract_labels(
1081
- record['kubernetes'], CONTAINER_CONSTANTS[:extra_resource_labels]
1140
+ record['kubernetes'], GKE_CONSTANTS[:extra_resource_labels]
1082
1141
  .map { |l| [l, l] }.to_h))
1083
1142
  common_labels.merge!(
1084
1143
  delete_and_extract_labels(
1085
- record['kubernetes'], CONTAINER_CONSTANTS[:extra_common_labels]
1086
- .map { |l| [l, "#{CONTAINER_CONSTANTS[:service]}/#{l}"] }.to_h))
1144
+ record['kubernetes'], GKE_CONSTANTS[:extra_common_labels]
1145
+ .map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h))
1087
1146
  # Prepend label/ to all user-defined labels' keys.
1088
1147
  if record['kubernetes'].key?('labels')
1089
1148
  common_labels.merge!(
@@ -1109,14 +1168,56 @@ module Fluent
1109
1168
  # Report them as monitored resource labels instead of common labels.
1110
1169
  # e.g. "dataflow.googleapis.com/job_id" => "job_id"
1111
1170
  [DATAFLOW_CONSTANTS, ML_CONSTANTS].each do |service_constants|
1112
- next unless group_resource.type == service_constants[:resource_type]
1113
- resource_labels.merge!(
1171
+ next unless resource.type == service_constants[:resource_type]
1172
+ resource.labels.merge!(
1114
1173
  delete_and_extract_labels(
1115
1174
  common_labels, service_constants[:extra_common_labels]
1116
1175
  .map { |l| ["#{service_constants[:service]}/#{l}", l] }.to_h))
1117
1176
  end
1118
1177
 
1119
- [resource_labels, common_labels]
1178
+ [resource, common_labels]
1179
+ end
1180
+
1181
+ # Call Metadata Agent to get monitored resource information and parse
1182
+ # response to Google::Api::MonitoredResource.
1183
+ def call_metadata_agent_for_monitored_resource(local_resource_id)
1184
+ response = query_metadata_agent("monitoredResource/#{local_resource_id}")
1185
+ return nil if response.nil?
1186
+ begin
1187
+ resource = Google::Api::MonitoredResource.decode_json(response.to_json)
1188
+ rescue Google::Protobuf::ParseError, ArgumentError => e
1189
+ @log.error 'Error paring monitored resource from Metadata Agent. ' \
1190
+ "response: #{response.inspect}", error: e
1191
+ return nil
1192
+ end
1193
+
1194
+ # TODO(qingling128): Use Google::Api::MonitoredResource directly after we
1195
+ # upgrade gRPC version to include the fix for the protobuf map
1196
+ # corruption issue.
1197
+ Google::Apis::LoggingV2beta1::MonitoredResource.new(
1198
+ type: resource.type,
1199
+ labels: resource.labels.to_h
1200
+ )
1201
+ end
1202
+
1203
+ # Issue a request to the Metadata Agent's local API and parse the response
1204
+ # to JSON. Return nil in case of failure.
1205
+ def query_metadata_agent(path)
1206
+ url = "#{@metadata_agent_url}/#{path}"
1207
+ @log.debug("Calling Metadata Agent: #{url}")
1208
+ open(url) do |f|
1209
+ response = f.read
1210
+ parsed_hash = parse_json_or_nil(response)
1211
+ if parsed_hash.nil?
1212
+ @log.error 'Response from Metadata Agent is not in valid json ' \
1213
+ "format: '#{response.inspect}'."
1214
+ return nil
1215
+ end
1216
+ @log.debug "Response from Metadata Agent: #{parsed_hash}"
1217
+ return parsed_hash
1218
+ end
1219
+ rescue StandardError => e
1220
+ @log.error 'Error calling Metadata Agent.', error: e
1120
1221
  end
1121
1222
 
1122
1223
  # TODO: This functionality should eventually be available in another
@@ -1209,7 +1310,7 @@ module Fluent
1209
1310
  [ts_secs, ts_nanos]
1210
1311
  end
1211
1312
 
1212
- def compute_severity(resource_type, record, entry_common_labels)
1313
+ def compute_severity(resource_type, record, entry_level_common_labels)
1213
1314
  if resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type]
1214
1315
  if @cloudfunctions_log_match && @cloudfunctions_log_match['severity']
1215
1316
  return parse_severity(@cloudfunctions_log_match['severity'])
@@ -1224,9 +1325,8 @@ module Fluent
1224
1325
  end
1225
1326
  elsif record.key?('severity')
1226
1327
  return parse_severity(record.delete('severity'))
1227
- elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
1228
- entry_common_labels.key?("#{CONTAINER_CONSTANTS[:service]}/stream")
1229
- stream = entry_common_labels["#{CONTAINER_CONSTANTS[:service]}/stream"]
1328
+ elsif resource_type == GKE_CONSTANTS[:resource_type]
1329
+ stream = entry_level_common_labels["#{GKE_CONSTANTS[:service]}/stream"]
1230
1330
  if stream == 'stdout'
1231
1331
  return 'INFO'
1232
1332
  elsif stream == 'stderr'
@@ -1296,6 +1396,9 @@ module Fluent
1296
1396
  'FINE' => 'DEBUG',
1297
1397
  'FINER' => 'DEBUG',
1298
1398
  'FINEST' => 'DEBUG',
1399
+ # java.util.logging levels (only missing ones from above listed).
1400
+ 'SEVERE' => 'ERROR',
1401
+ 'CONFIG' => 'DEBUG',
1299
1402
  # nginx levels (only missing ones from above listed).
1300
1403
  'CRIT' => 'CRITICAL',
1301
1404
  'EMERG' => 'EMERGENCY',
@@ -1447,34 +1550,8 @@ module Fluent
1447
1550
  hash.nil? || !hash.is_a?(Hash)
1448
1551
  label_map.each_with_object({}) \
1449
1552
  do |(original_label, new_label), extracted_labels|
1450
- extracted_labels[new_label] =
1451
- convert_to_utf8(hash.delete(original_label).to_s) if
1452
- hash.key?(original_label)
1453
- end
1454
- end
1455
-
1456
- def set_payload(resource_type, record, entry, is_json)
1457
- # If this is a Cloud Functions log that matched the expected regexp,
1458
- # use text payload. Otherwise, use JSON if we found valid JSON, or text
1459
- # payload in the following cases:
1460
- # 1. This is a Cloud Functions log and the 'log' key is available
1461
- # 2. This is an unstructured Container log and the 'log' key is available
1462
- # 3. The only remaining key is 'message'
1463
- if resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
1464
- @cloudfunctions_log_match
1465
- entry.text_payload = @cloudfunctions_log_match['text']
1466
- elsif resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
1467
- record.key?('log')
1468
- entry.text_payload = record['log']
1469
- elsif is_json
1470
- entry.json_payload = record
1471
- elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
1472
- record.key?('log')
1473
- entry.text_payload = record['log']
1474
- elsif record.size == 1 && record.key?('message')
1475
- entry.text_payload = record['message']
1476
- else
1477
- entry.json_payload = record
1553
+ value = hash.delete(original_label)
1554
+ extracted_labels[new_label] = convert_to_utf8(value.to_s) if value
1478
1555
  end
1479
1556
  end
1480
1557
 
@@ -1522,7 +1599,10 @@ module Fluent
1522
1599
  ret
1523
1600
  end
1524
1601
 
1525
- def set_payload_grpc(resource_type, record, entry, is_json)
1602
+ def set_payload(resource_type, record, entry, is_json)
1603
+ # Only one of {text_payload, json_payload} will be set.
1604
+ text_payload = nil
1605
+ json_payload = nil
1526
1606
  # If this is a Cloud Functions log that matched the expected regexp,
1527
1607
  # use text payload. Otherwise, use JSON if we found valid JSON, or text
1528
1608
  # payload in the following cases:
@@ -1531,20 +1611,34 @@ module Fluent
1531
1611
  # 3. The only remaining key is 'message'
1532
1612
  if resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
1533
1613
  @cloudfunctions_log_match
1534
- entry.text_payload = convert_to_utf8(
1535
- @cloudfunctions_log_match['text'])
1614
+ text_payload = @cloudfunctions_log_match['text']
1536
1615
  elsif resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
1537
1616
  record.key?('log')
1538
- entry.text_payload = convert_to_utf8(record['log'])
1617
+ text_payload = record['log']
1539
1618
  elsif is_json
1540
- entry.json_payload = struct_from_ruby(record)
1541
- elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
1619
+ json_payload = record
1620
+ elsif [GKE_CONSTANTS[:resource_type],
1621
+ DOCKER_CONSTANTS[:resource_type]].include?(resource_type) &&
1542
1622
  record.key?('log')
1543
- entry.text_payload = convert_to_utf8(record['log'])
1623
+ text_payload = record['log']
1544
1624
  elsif record.size == 1 && record.key?('message')
1545
- entry.text_payload = convert_to_utf8(record['message'])
1625
+ text_payload = record['message']
1546
1626
  else
1547
- entry.json_payload = struct_from_ruby(record)
1627
+ json_payload = record
1628
+ end
1629
+
1630
+ if json_payload
1631
+ entry.json_payload = if @use_grpc
1632
+ struct_from_ruby(json_payload)
1633
+ else
1634
+ json_payload
1635
+ end
1636
+ elsif text_payload
1637
+ entry.text_payload = if @use_grpc
1638
+ convert_to_utf8(text_payload)
1639
+ else
1640
+ text_payload
1641
+ end
1548
1642
  end
1549
1643
  end
1550
1644
 
@@ -1554,7 +1648,7 @@ module Fluent
1554
1648
  elsif resource.type == APPENGINE_CONSTANTS[:resource_type]
1555
1649
  # Add a prefix to Managed VM logs to prevent namespace collisions.
1556
1650
  tag = "#{APPENGINE_CONSTANTS[:service]}/#{tag}"
1557
- elsif resource.type == CONTAINER_CONSTANTS[:resource_type]
1651
+ elsif resource.type == GKE_CONSTANTS[:resource_type]
1558
1652
  # For Kubernetes logs, use just the container name as the log name
1559
1653
  # if we have it.
1560
1654
  if resource.labels && resource.labels.key?('container_name')