fluent-plugin-google-cloud 0.6.6 → 0.6.7.pre.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1dc5ad48b790072401a4ed3d0b48e02e57a2a697
4
- data.tar.gz: e11801bebd08a12a47f7e84332fc72db19463772
3
+ metadata.gz: 4356aeba5dd3cdab64c30787a1dbc798f34f5182
4
+ data.tar.gz: 81ffb3cb3017fa43d42e3a45fce18cf8ddada913
5
5
  SHA512:
6
- metadata.gz: 2340a5b449123bbb88ba39ac155856f3561820e9a8468593aae5cc81a1b871b4602ce8a0143a7e3c909f10cedcf0515e5b297959da92ab23a3e0046025694026
7
- data.tar.gz: b66cc54e2372fbf79c2a2fd20828417a9dfef516e5ff5dce16b9b7af66644c7bf9a283dbcd17c9b1d54cfebaa5eb989695608f429db0f1db6869dfe385b00ec0
6
+ metadata.gz: faf46b39db9539a778d3c5144d8382e899e009ef17029334817959d316c979ab1419b91be0dcc61d146e94baf41e43b7a9e606699ac6bea1e595ade86dcd9595
7
+ data.tar.gz: 10f148519e9ff916b914e3e24020331d702f93878f54314476f907a63c0db081aee4ceda2f3b40d190a48e5b74cc58d314b62fd4d2901110613ae604850b1a1a
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fluent-plugin-google-cloud (0.6.6)
4
+ fluent-plugin-google-cloud (0.6.7.pre.1)
5
5
  fluentd (~> 0.10)
6
6
  google-api-client (~> 0.9.0)
7
7
  google-cloud-logging (= 0.24.1)
@@ -55,7 +55,7 @@ GEM
55
55
  googleauth (~> 0.5.1)
56
56
  grpc (~> 1.0)
57
57
  rly (~> 0.2.3)
58
- google-protobuf (3.4.0.2)
58
+ google-protobuf (3.4.0.2-x86_64-linux)
59
59
  googleapis-common-protos (1.3.5)
60
60
  google-protobuf (~> 3.2)
61
61
  grpc (~> 1.0)
@@ -67,7 +67,7 @@ GEM
67
67
  multi_json (~> 1.11)
68
68
  os (~> 0.9)
69
69
  signet (~> 0.7)
70
- grpc (1.2.5)
70
+ grpc (1.2.5-x86_64-linux)
71
71
  google-protobuf (~> 3.1)
72
72
  googleauth (~> 0.5.1)
73
73
  hashdiff (0.3.6)
@@ -88,7 +88,7 @@ GEM
88
88
  mocha (1.3.0)
89
89
  metaclass (~> 0.0.1)
90
90
  msgpack (1.1.0)
91
- multi_json (1.12.1)
91
+ multi_json (1.12.2)
92
92
  multipart-post (2.0.0)
93
93
  os (0.9.6)
94
94
  parser (2.4.0.0)
@@ -10,7 +10,7 @@ eos
10
10
  gem.homepage = \
11
11
  'https://github.com/GoogleCloudPlatform/fluent-plugin-google-cloud'
12
12
  gem.license = 'Apache-2.0'
13
- gem.version = '0.6.6'
13
+ gem.version = '0.6.7.pre.1'
14
14
  gem.authors = ['Todd Derr', 'Alex Robinson']
15
15
  gem.email = ['salty@google.com']
16
16
  gem.required_ruby_version = Gem::Requirement.new('>= 2.0')
@@ -38,12 +38,12 @@ end
38
38
  module Fluent
39
39
  # fluentd output plugin for the Stackdriver Logging API
40
40
  class GoogleCloudOutput < BufferedOutput
41
- # Constants for service names and resource types.
42
- module Constants
41
+ # Constants for service names, resource types and etc.
42
+ module ServiceConstants
43
43
  APPENGINE_CONSTANTS = {
44
44
  service: 'appengine.googleapis.com',
45
45
  resource_type: 'gae_app',
46
- metadata_attributes: %w(gae_backend_name gae_backend_version).to_set
46
+ metadata_attributes: %w(gae_backend_name gae_backend_version)
47
47
  }
48
48
  CLOUDFUNCTIONS_CONSTANTS = {
49
49
  service: 'cloudfunctions.googleapis.com',
@@ -53,12 +53,16 @@ module Fluent
53
53
  service: 'compute.googleapis.com',
54
54
  resource_type: 'gce_instance'
55
55
  }
56
- CONTAINER_CONSTANTS = {
56
+ GKE_CONSTANTS = {
57
57
  service: 'container.googleapis.com',
58
58
  resource_type: 'container',
59
59
  extra_resource_labels: %w(namespace_id pod_id container_name),
60
60
  extra_common_labels: %w(namespace_name pod_name),
61
- metadata_attributes: %w(kube-env).to_set
61
+ metadata_attributes: %w(kube-env)
62
+ }
63
+ DOCKER_CONSTANTS = {
64
+ service: 'docker.googleapis.com',
65
+ resource_type: 'docker_container'
62
66
  }
63
67
  DATAFLOW_CONSTANTS = {
64
68
  service: 'dataflow.googleapis.com',
@@ -68,8 +72,7 @@ module Fluent
68
72
  DATAPROC_CONSTANTS = {
69
73
  service: 'cluster.dataproc.googleapis.com',
70
74
  resource_type: 'cloud_dataproc_cluster',
71
- metadata_attributes:
72
- %w(dataproc-cluster-uuid dataproc-cluster-name).to_set
75
+ metadata_attributes: %w(dataproc-cluster-uuid dataproc-cluster-name)
73
76
  }
74
77
  EC2_CONSTANTS = {
75
78
  service: 'ec2.amazonaws.com',
@@ -83,7 +86,7 @@ module Fluent
83
86
 
84
87
  # The map between a subservice name and a resource type.
85
88
  SUBSERVICE_MAP = \
86
- [APPENGINE_CONSTANTS, CONTAINER_CONSTANTS, DATAFLOW_CONSTANTS,
89
+ [APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAFLOW_CONSTANTS,
87
90
  DATAPROC_CONSTANTS, ML_CONSTANTS]
88
91
  .map { |consts| [consts[:service], consts[:resource_type]] }.to_h
89
92
  # Default back to GCE if invalid value is detected.
@@ -91,18 +94,35 @@ module Fluent
91
94
 
92
95
  # The map between a resource type and expected subservice attributes.
93
96
  SUBSERVICE_METADATA_ATTRIBUTES = \
94
- [APPENGINE_CONSTANTS, CONTAINER_CONSTANTS, DATAPROC_CONSTANTS]
95
- .map { |consts| [consts[:resource_type], consts[:metadata_attributes]] }
96
- .to_h
97
+ [APPENGINE_CONSTANTS, GKE_CONSTANTS, DATAPROC_CONSTANTS]
98
+ .map do |consts|
99
+ [consts[:resource_type], consts[:metadata_attributes].to_set]
100
+ end.to_h
101
+ end
97
102
 
98
- # Default values for JSON payload keys to set the "trace",
99
- # "sourceLocation", "operation" and "labels" fields in the LogEntry.
100
- DEFAULT_PAYLOAD_KEY_PREFIX = 'logging.googleapis.com'
103
+ # Constants for configuration.
104
+ module ConfigConstants
105
+ # Default values for JSON payload keys to set the "httpRequest",
106
+ # "operation", "sourceLocation", "trace" fields in the LogEntry.
101
107
  DEFAULT_HTTP_REQUEST_KEY = 'httpRequest'
102
- DEFAULT_OPERATION_KEY = "#{DEFAULT_PAYLOAD_KEY_PREFIX}/operation"
103
- DEFAULT_SOURCE_LOCATION_KEY =
104
- "#{DEFAULT_PAYLOAD_KEY_PREFIX}/sourceLocation"
105
- DEFAULT_TRACE_KEY = "#{DEFAULT_PAYLOAD_KEY_PREFIX}/trace"
108
+ DEFAULT_OPERATION_KEY = 'logging.googleapis.com/operation'
109
+ DEFAULT_SOURCE_LOCATION_KEY = 'logging.googleapis.com/sourceLocation'
110
+ DEFAULT_TRACE_KEY = 'logging.googleapis.com/trace'
111
+
112
+ DEFAULT_METADATA_AGENT_URL =
113
+ 'http://local-metadata-agent.stackdriver.com:8000'
114
+ end
115
+
116
+ # Constants for log entry field extraction.
117
+ module InternalConstants
118
+ # Use empty string as request path when the local_resource_id of monitored
119
+ # resource can be implicitly inferred by Metadata Agent.
120
+ IMPLICIT_LOCAL_RESOURCE_ID = ''
121
+
122
+ # The label name of local_resource_id in the json payload. When a record
123
+ # has this field in the payload, we will use the value to retrieve
124
+ # monitored resource from Stackdriver Metadata agent.
125
+ LOCAL_RESOURCE_ID_KEY = 'logging.googleapis.com/local_resource_id'
106
126
 
107
127
  # Map from each field name under LogEntry to corresponding variables
108
128
  # required to perform field value extraction from the log record.
@@ -155,12 +175,14 @@ module Fluent
155
175
  }
156
176
  end
157
177
 
158
- include self::Constants
178
+ include self::ServiceConstants
179
+ include self::ConfigConstants
180
+ include self::InternalConstants
159
181
 
160
182
  Fluent::Plugin.register_output('google_cloud', self)
161
183
 
162
184
  PLUGIN_NAME = 'Fluentd Google Cloud Logging plugin'
163
- PLUGIN_VERSION = '0.6.6'
185
+ PLUGIN_VERSION = '0.6.7.pre.1'
164
186
 
165
187
  # Name of the the Google cloud logging write scope.
166
188
  LOGGING_SCOPE = 'https://www.googleapis.com/auth/logging.write'
@@ -198,6 +220,11 @@ module Fluent
198
220
  DEFAULT_SOURCE_LOCATION_KEY
199
221
  config_param :trace_key, :string, :default => DEFAULT_TRACE_KEY
200
222
 
223
+ # Whether to try to detect if the record is a text log entry with JSON
224
+ # content that needs to be parsed.
225
+ config_param :detect_json, :bool, :default => false
226
+ # TODO(igorpeshansky): Add a parameter for the text field in the payload.
227
+
201
228
  # Whether to try to detect if the VM is owned by a "subservice" such as App
202
229
  # Engine of Kubernetes, rather than just associating the logs with the
203
230
  # compute service of the platform. This currently only has any effect when
@@ -290,6 +317,11 @@ module Fluent
290
317
  config_param :monitoring_type, :string,
291
318
  :default => Monitoring::PrometheusMonitoringRegistry.name
292
319
 
320
+ # Whether to call metadata agent to retrieve monitored resource.
321
+ config_param :enable_metadata_agent, :bool, :default => false
322
+ config_param :metadata_agent_url, :string,
323
+ :default => DEFAULT_METADATA_AGENT_URL
324
+
293
325
  # rubocop:enable Style/HashSyntax
294
326
 
295
327
  # TODO: Add a log_name config option rather than just using the tag?
@@ -349,28 +381,30 @@ module Fluent
349
381
 
350
382
  @platform = detect_platform
351
383
 
352
- # Set required variables: @project_id, @vm_id, @vm_name and @zone by
353
- # making some requests to metadata server.
354
- #
355
- # Note: Once we support metadata injection at Logging API side, we might
356
- # no longer need to require all these metadata in logging agent. But for
357
- # now, they are still required.
358
- #
359
- # TODO(qingling128): After Metadata Agent support is added, try extracting
360
- # these info from responses from Metadata Agent first.
384
+ # Set agent-level monitored resource. This monitored resource is initiated
385
+ # as the logging agent starts up. It will be inherited by all log entries
386
+ # processed by this agent. First try to retrieve it via Metadata Agent.
387
+ if @enable_metadata_agent
388
+ # The local_resource_id for this should be the instance id. Since this
389
+ # can be implicitly inferred by Metadata Agent, we do not need to
390
+ # explicitly send the key.
391
+ @resource = call_metadata_agent_for_monitored_resource(
392
+ IMPLICIT_LOCAL_RESOURCE_ID)
393
+ end
394
+
395
+ # Set required variables: @project_id, @vm_id, @vm_name and @zone.
361
396
  set_required_metadata_variables
362
397
 
363
398
  # Retrieve monitored resource.
364
- #
365
- # TODO(qingling128): After Metadata Agent support is added, try retrieving
366
- # the monitored resource from Metadata Agent first.
367
- @resource = determine_agent_level_monitored_resource_via_legacy
399
+ # Fail over to retrieve monitored resource via the legacy path if we fail
400
+ # to get it from Metadata Agent.
401
+ @resource ||= determine_agent_level_monitored_resource_via_legacy
368
402
 
369
403
  # Set regexp that we should match tags against later on. Using a list
370
404
  # instead of a map to ensure order. For example, tags will be matched
371
405
  # against Cloud Functions first, then GKE.
372
406
  @tag_regexp_list = []
373
- if @resource.type == CONTAINER_CONSTANTS[:resource_type]
407
+ if @resource.type == GKE_CONSTANTS[:resource_type]
374
408
  # We only support Cloud Functions logs for GKE right now.
375
409
  if fetch_gce_metadata('instance/attributes/'
376
410
  ).split.include?('gcf_region')
@@ -383,7 +417,7 @@ module Fluent
383
417
  ]
384
418
  end
385
419
  @tag_regexp_list << [
386
- CONTAINER_CONSTANTS[:resource_type], @compiled_kubernetes_tag_regexp
420
+ GKE_CONSTANTS[:resource_type], @compiled_kubernetes_tag_regexp
387
421
  ]
388
422
  end
389
423
 
@@ -399,7 +433,7 @@ module Fluent
399
433
 
400
434
  # Log an informational message containing the Logs viewer URL
401
435
  @log.info 'Logs viewer address: https://console.cloud.google.com/logs/',
402
- "viewer?project=#{@project_id}&resource=#{@resource_type}/",
436
+ "viewer?project=#{@project_id}&resource=#{@resource.type}/",
403
437
  "instance_id/#{@vm_id}"
404
438
  end
405
439
 
@@ -415,62 +449,52 @@ module Fluent
415
449
  end
416
450
 
417
451
  def write(chunk)
418
- # Group the entries since we have to make one call per tag.
419
- grouped_entries = {}
420
- chunk.msgpack_each do |tag, *arr|
421
- sanitized_tag = sanitize_tag(tag)
422
- if sanitized_tag.nil?
423
- @log.warn "Dropping log entries with invalid tag: '#{tag}'. " \
424
- 'A tag should be a string with utf8 characters.'
425
- next
426
- end
427
- grouped_entries[sanitized_tag] ||= []
428
- grouped_entries[sanitized_tag].push(arr)
429
- end
452
+ grouped_entries = group_log_entries_by_tag_and_local_resource_id(chunk)
430
453
 
431
- grouped_entries.each do |tag, arr|
454
+ grouped_entries.each do |(tag, local_resource_id), arr|
432
455
  entries = []
433
- group_resource, group_common_labels =
434
- determine_group_level_monitored_resource_and_labels(tag)
456
+ group_level_resource, group_level_common_labels =
457
+ determine_group_level_monitored_resource_and_labels(
458
+ tag, local_resource_id)
435
459
 
436
460
  arr.each do |time, record|
437
- next unless record.is_a?(Hash)
438
-
439
- extracted_resource_labels, extracted_common_labels = \
440
- determine_entry_level_labels(group_resource, record)
441
- entry_resource = group_resource.dup
442
- entry_resource.labels.merge!(extracted_resource_labels)
443
- entry_common_labels = \
444
- group_common_labels.merge(extracted_common_labels)
445
-
446
- if entry_resource.type == CONTAINER_CONSTANTS[:resource_type]
447
- # Save the timestamp if available, then clear it out to allow for
448
- # determining whether we should parse the log or message field.
449
- timestamp = record.key?('time') ? record['time'] : nil
450
- record.delete('time')
461
+ entry_level_resource, entry_level_common_labels =
462
+ determine_entry_level_monitored_resource_and_labels(
463
+ group_level_resource, group_level_common_labels, record)
464
+
465
+ is_json = false
466
+ if @detect_json
467
+ # Save the timestamp and severity if available, then clear it out to
468
+ # allow for determining whether we should parse the log or message
469
+ # field.
470
+ timestamp = record.delete('time')
471
+ severity = record.delete('severity')
472
+
451
473
  # If the log is json, we want to export it as a structured log
452
474
  # unless there is additional metadata that would be lost.
453
- is_json = false
454
- if record.length == 1 && record.key?('log')
455
- record_json = parse_json_or_nil(record['log'])
456
- end
457
- if record.length == 1 && record.key?('message')
458
- record_json = parse_json_or_nil(record['message'])
475
+ record_json = nil
476
+ if record.length == 1
477
+ %w(log message msg).each do |field|
478
+ if record.key?(field)
479
+ record_json = parse_json_or_nil(record[field])
480
+ end
481
+ end
459
482
  end
460
483
  unless record_json.nil?
461
484
  record = record_json
462
485
  is_json = true
463
486
  end
464
- # Restore timestamp if necessary.
465
- unless record.key?('time') || timestamp.nil?
466
- record['time'] = timestamp
467
- end
487
+ # Restore timestamp and severity if necessary. Note that the nested
488
+ # json might also has 'time' and 'severity' fields. If that is the
489
+ # case, we do not want to override the value.
490
+ record['time'] ||= timestamp if timestamp
491
+ record['severity'] ||= severity if severity
468
492
  end
469
493
 
470
494
  ts_secs, ts_nanos = compute_timestamp(
471
- entry_resource.type, record, time)
495
+ entry_level_resource.type, record, time)
472
496
  severity = compute_severity(
473
- entry_resource.type, record, entry_common_labels)
497
+ entry_level_resource.type, record, entry_level_common_labels)
474
498
 
475
499
  ts_secs = begin
476
500
  Integer ts_secs
@@ -482,12 +506,13 @@ module Fluent
482
506
  rescue ArgumentError, TypeError
483
507
  ts_nanos
484
508
  end
509
+
485
510
  if @use_grpc
486
511
  entry = Google::Logging::V2::LogEntry.new(
487
- labels: entry_common_labels,
512
+ labels: entry_level_common_labels,
488
513
  resource: Google::Api::MonitoredResource.new(
489
- type: entry_resource.type,
490
- labels: entry_resource.labels.to_h
514
+ type: entry_level_resource.type,
515
+ labels: entry_level_resource.labels.to_h
491
516
  ),
492
517
  severity: grpc_severity(severity)
493
518
  )
@@ -504,10 +529,11 @@ module Fluent
504
529
  end
505
530
  else
506
531
  # Remove the labels if we didn't populate them with anything.
507
- entry_resource.labels = nil if entry_resource.labels.empty?
532
+ entry_level_resource.labels = nil if
533
+ entry_level_resource.labels.empty?
508
534
  entry = Google::Apis::LoggingV2beta1::LogEntry.new(
509
- labels: entry_common_labels,
510
- resource: entry_resource,
535
+ labels: entry_level_common_labels,
536
+ resource: entry_level_resource,
511
537
  severity: severity,
512
538
  timestamp: {
513
539
  seconds: ts_secs,
@@ -516,17 +542,12 @@ module Fluent
516
542
  )
517
543
  end
518
544
 
519
- # Get fully-qualified trace id for LogEntry "trace" field per config.
545
+ # Get fully-qualified trace id for LogEntry "trace" field.
520
546
  fq_trace_id = record.delete(@trace_key)
521
547
  entry.trace = fq_trace_id if fq_trace_id
522
548
 
523
549
  set_log_entry_fields(record, entry)
524
-
525
- if @use_grpc
526
- set_payload_grpc(entry_resource.type, record, entry, is_json)
527
- else
528
- set_payload(entry_resource.type, record, entry, is_json)
529
- end
550
+ set_payload(entry_level_resource.type, record, entry, is_json)
530
551
 
531
552
  entries.push(entry)
532
553
  end
@@ -534,21 +555,21 @@ module Fluent
534
555
  next if entries.empty?
535
556
 
536
557
  log_name = "projects/#{@project_id}/logs/#{log_name(
537
- tag, group_resource)}"
558
+ tag, group_level_resource)}"
538
559
 
539
560
  # Does the actual write to the cloud logging api.
540
561
  client = api_client
541
562
  if @use_grpc
542
563
  begin
543
- labels_utf8_pairs = group_common_labels.map do |k, v|
564
+ labels_utf8_pairs = group_level_common_labels.map do |k, v|
544
565
  [k.encode('utf-8'), convert_to_utf8(v)]
545
566
  end
546
567
 
547
568
  write_request = Google::Logging::V2::WriteLogEntriesRequest.new(
548
569
  log_name: log_name,
549
570
  resource: Google::Api::MonitoredResource.new(
550
- type: group_resource.type,
551
- labels: group_resource.labels.to_h
571
+ type: group_level_resource.type,
572
+ labels: group_level_resource.labels.to_h
552
573
  ),
553
574
  labels: labels_utf8_pairs.to_h,
554
575
  entries: entries
@@ -558,8 +579,8 @@ module Fluent
558
579
  increment_successful_requests_count
559
580
  increment_ingested_entries_count(entries.length)
560
581
 
561
- # Let the user explicitly know when the first call succeeded,
562
- # to aid with verification and troubleshooting.
582
+ # Let the user explicitly know when the first call succeeded, to aid
583
+ # with verification and troubleshooting.
563
584
  unless @successful_call
564
585
  @successful_call = true
565
586
  @log.info 'Successfully sent gRPC to Stackdriver Logging API.'
@@ -598,8 +619,8 @@ module Fluent
598
619
  @log.warn "Dropping #{dropped} log message(s)",
599
620
  error: error.to_s, error_code: error.code.to_s
600
621
  else
601
- # Assume this is a problem with the request itself
602
- # and don't retry.
622
+ # Assume this is a problem with the request itself and don't
623
+ # retry.
603
624
  dropped = entries.length
604
625
  increment_dropped_entries_count(dropped)
605
626
  @log.error "Unknown response code #{error.code} from the "\
@@ -612,8 +633,8 @@ module Fluent
612
633
  write_request = \
613
634
  Google::Apis::LoggingV2beta1::WriteLogEntriesRequest.new(
614
635
  log_name: log_name,
615
- resource: group_resource,
616
- labels: group_common_labels,
636
+ resource: group_level_resource,
637
+ labels: group_level_common_labels,
617
638
  entries: entries)
618
639
 
619
640
  # TODO: RequestOptions
@@ -626,8 +647,8 @@ module Fluent
626
647
  increment_successful_requests_count
627
648
  increment_ingested_entries_count(entries.length)
628
649
 
629
- # Let the user explicitly know when the first call succeeded,
630
- # to aid with verification and troubleshooting.
650
+ # Let the user explicitly know when the first call succeeded, to aid
651
+ # with verification and troubleshooting.
631
652
  unless @successful_call
632
653
  @successful_call = true
633
654
  @log.info 'Successfully sent to Stackdriver Logging API.'
@@ -818,8 +839,9 @@ module Fluent
818
839
 
819
840
  # Retrieve monitored resource via the legacy way.
820
841
  #
821
- # TODO(qingling128): Use this as only a fallback plan after Metadata Agent
822
- # support is added.
842
+ # Note: This is just a failover plan if we fail to get metadata from
843
+ # Metadata Agent. Thus it should be equivalent to what Metadata Agent
844
+ # returns.
823
845
  def determine_agent_level_monitored_resource_via_legacy
824
846
  resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
825
847
  labels: {})
@@ -881,7 +903,7 @@ module Fluent
881
903
  }
882
904
 
883
905
  # GKE container.
884
- when CONTAINER_CONSTANTS[:resource_type]
906
+ when GKE_CONSTANTS[:resource_type]
885
907
  raw_kube_env = fetch_gce_metadata('instance/attributes/kube-env')
886
908
  kube_env = YAML.load(raw_kube_env)
887
909
  return {
@@ -916,7 +938,7 @@ module Fluent
916
938
  rescue StandardError => e
917
939
  @log.error "Failed to set monitored resource labels for #{type}: ",
918
940
  error: e
919
- return {}
941
+ {}
920
942
  end
921
943
 
922
944
  # Determine the common labels that should be added to all log entries
@@ -940,7 +962,7 @@ module Fluent
940
962
 
941
963
  # GCE instance and GKE container.
942
964
  when COMPUTE_CONSTANTS[:resource_type],
943
- CONTAINER_CONSTANTS[:resource_type]
965
+ GKE_CONSTANTS[:resource_type]
944
966
  labels.merge!(
945
967
  "#{COMPUTE_CONSTANTS[:service]}/resource_name" => @vm_name)
946
968
 
@@ -952,138 +974,175 @@ module Fluent
952
974
  labels
953
975
  end
954
976
 
955
- # Determine the group level monitored resource and common labels shared by a
956
- # collection of entries.
957
- def determine_group_level_monitored_resource_and_labels(tag)
958
- # Determine group level monitored resource type. For certain types,
959
- # extract useful info from the tag and store those in
960
- # matched_regex_group.
961
- group_resource_type, matched_regex_group =
962
- determine_group_level_monitored_resource_type(tag)
963
-
964
- # Determine group level monitored resource labels and common labels.
965
- group_resource_labels, group_common_labels =
966
- determine_group_level_labels(group_resource_type, matched_regex_group)
967
-
968
- group_resource = Google::Apis::LoggingV2beta1::MonitoredResource.new(
969
- type: group_resource_type,
970
- labels: group_resource_labels.to_h
971
- )
972
-
973
- # Freeze the per-request state. Any further changes must be made on a
974
- # per-entry basis.
975
- group_resource.freeze
976
- group_resource.labels.freeze
977
- group_common_labels.freeze
978
-
979
- [group_resource, group_common_labels]
977
+ # Group the log entries by tag and local_resource_id pairs.
978
+ def group_log_entries_by_tag_and_local_resource_id(chunk)
979
+ groups = {}
980
+ chunk.msgpack_each do |tag, time, record|
981
+ unless record.is_a?(Hash)
982
+ @log.warn 'Dropping log entries with malformed record: ' \
983
+ "'#{record.inspect}'. " \
984
+ 'A log record should be in JSON format.'
985
+ next
986
+ end
987
+ sanitized_tag = sanitize_tag(tag)
988
+ if sanitized_tag.nil?
989
+ @log.warn "Dropping log entries with invalid tag: '#{tag.inspect}'." \
990
+ ' A tag should be a string with utf8 characters.'
991
+ next
992
+ end
993
+ local_resource_id = record.delete(LOCAL_RESOURCE_ID_KEY)
994
+ # A nil local_resource_id means "fall back to legacy".
995
+ hash_key = [sanitized_tag, local_resource_id].freeze
996
+ groups[hash_key] ||= []
997
+ groups[hash_key].push([time, record])
998
+ end
999
+ groups
980
1000
  end
981
1001
 
982
- # Determine group level monitored resource type shared by a collection of
983
- # entries.
984
- # Return the resource type and tag regexp matched groups. The matched groups
985
- # only apply to some resource types. Return nil if not applicable or if
986
- # there is no match.
987
- def determine_group_level_monitored_resource_type(tag)
1002
+ # Determine the group level monitored resource and common labels shared by a
1003
+ # collection of entries.
1004
+ def determine_group_level_monitored_resource_and_labels(tag,
1005
+ local_resource_id)
1006
+ resource = @resource.dup
1007
+ resource.labels = @resource.labels.dup
1008
+ common_labels = @common_labels.dup
1009
+
1010
+ # Change the resource type and set matched_regexp_group if the tag matches
1011
+ # certain regexp.
1012
+ matched_regexp_group = nil # @tag_regexp_list can be an empty list.
988
1013
  @tag_regexp_list.each do |derived_type, tag_regexp|
989
- matched_regex_group = tag_regexp.match(tag)
990
- return [derived_type, matched_regex_group] if
991
- matched_regex_group
1014
+ matched_regexp_group = tag_regexp.match(tag)
1015
+ if matched_regexp_group
1016
+ resource.type = derived_type
1017
+ break
1018
+ end
992
1019
  end
993
- [@resource.type, nil]
994
- end
995
1020
 
996
- # Determine group level monitored resource labels and common labels. These
997
- # labels will be shared by a collection of entries.
998
- def determine_group_level_labels(group_resource_type, matched_regex_group)
999
- group_resource_labels = @resource.labels.dup
1000
- group_common_labels = @common_labels.dup
1021
+ # Determine the monitored resource based on the local_resource_id.
1022
+ # Different monitored resource types have unique ids in different format.
1023
+ # We will query Metadata Agent for the monitored resource. Return the
1024
+ # legacy monitored resource (either the instance resource or the resource
1025
+ # inferred from the tag) if failed to get a monitored resource from
1026
+ # Metadata Agent with this key.
1027
+ #
1028
+ # Docker container:
1029
+ # "container.<container_id>"
1030
+ # "containerName.<container_name>"
1031
+ # GKE container:
1032
+ # "gke_containerName.<namespace_id>.<pod_name>.<container_name>"
1033
+ if @enable_metadata_agent && local_resource_id
1034
+ @log.debug 'Calling metadata agent with local_resource_id: ' \
1035
+ "#{local_resource_id}."
1036
+ retrieved_resource = call_metadata_agent_for_monitored_resource(
1037
+ local_resource_id)
1038
+ @log.debug 'Retrieved monitored resource from metadata agent: ' \
1039
+ "#{retrieved_resource.inspect}."
1040
+ unless retrieved_resource.nil?
1041
+ # TODO(qingling128): Fix this temporary renaming from 'gke_container'
1042
+ # to 'container'.
1043
+ retrieved_resource.type = 'container' if
1044
+ retrieved_resource.type == 'gke_container'
1045
+ resource = retrieved_resource
1046
+ end
1047
+ end
1001
1048
 
1002
- case group_resource_type
1049
+ # Once the resource type is settled down, determine the labels.
1050
+ case resource.type
1003
1051
  # Cloud Functions.
1004
1052
  when CLOUDFUNCTIONS_CONSTANTS[:resource_type]
1005
- group_resource_labels.merge!(
1053
+ resource.labels.merge!(
1006
1054
  'region' => @gcf_region,
1007
1055
  'function_name' => decode_cloudfunctions_function_name(
1008
- matched_regex_group['encoded_function_name'])
1056
+ matched_regexp_group['encoded_function_name'])
1009
1057
  )
1010
-
1011
- instance_id = group_resource_labels.delete('instance_id')
1012
- group_common_labels.merge!(
1013
- "#{CONTAINER_CONSTANTS[:service]}/instance_id" => instance_id,
1058
+ instance_id = resource.labels.delete('instance_id')
1059
+ common_labels.merge!(
1060
+ "#{GKE_CONSTANTS[:service]}/instance_id" => instance_id,
1014
1061
  "#{COMPUTE_CONSTANTS[:service]}/resource_id" => instance_id,
1015
- "#{CONTAINER_CONSTANTS[:service]}/cluster_name" =>
1016
- group_resource_labels.delete('cluster_name'),
1062
+ "#{GKE_CONSTANTS[:service]}/cluster_name" =>
1063
+ resource.labels.delete('cluster_name'),
1017
1064
  "#{COMPUTE_CONSTANTS[:service]}/zone" =>
1018
- group_resource_labels.delete('zone')
1065
+ resource.labels.delete('zone')
1019
1066
  )
1020
1067
 
1021
1068
  # GKE container.
1022
- when CONTAINER_CONSTANTS[:resource_type]
1023
- if matched_regex_group
1069
+ when GKE_CONSTANTS[:resource_type]
1070
+ if matched_regexp_group
1024
1071
  # We only expect one occurrence of each key in the match group.
1025
1072
  resource_labels_candidates =
1026
- matched_regex_group.names.zip(matched_regex_group.captures).to_h
1027
- common_labels_candidates =
1028
- resource_labels_candidates.dup
1029
- group_resource_labels.merge!(
1073
+ matched_regexp_group.names.zip(matched_regexp_group.captures).to_h
1074
+ common_labels_candidates = resource_labels_candidates.dup
1075
+ resource.labels.merge!(
1030
1076
  delete_and_extract_labels(
1031
1077
  resource_labels_candidates,
1032
1078
  # The kubernetes_tag_regexp is poorly named. 'namespace_name' is
1033
1079
  # in fact 'namespace_id'. 'pod_name' is in fact 'pod_id'.
1034
1080
  # TODO(qingling128): Figure out how to put this map into
1035
- # constants like CONTAINER_CONSTANTS[:extra_resource_labels].
1081
+ # constants like GKE_CONSTANTS[:extra_resource_labels].
1036
1082
  'container_name' => 'container_name',
1037
1083
  'namespace_name' => 'namespace_id',
1038
1084
  'pod_name' => 'pod_id'))
1039
1085
 
1040
- group_common_labels.merge!(
1086
+ common_labels.merge!(
1041
1087
  delete_and_extract_labels(
1042
1088
  common_labels_candidates,
1043
- CONTAINER_CONSTANTS[:extra_common_labels]
1044
- .map { |l| [l, "#{CONTAINER_CONSTANTS[:service]}/#{l}"] }.to_h))
1089
+ GKE_CONSTANTS[:extra_common_labels]
1090
+ .map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h))
1045
1091
  end
1092
+
1093
+ # Docker container.
1094
+ # TODO(qingling128): Remove this logic once the resource is retrieved at a
1095
+ # proper time (b/65175256).
1096
+ when DOCKER_CONSTANTS[:resource_type]
1097
+ common_labels.delete("#{COMPUTE_CONSTANTS[:service]}/resource_name")
1046
1098
  end
1047
1099
 
1048
- [group_resource_labels, group_common_labels]
1100
+ resource.freeze
1101
+ resource.labels.freeze
1102
+ common_labels.freeze
1103
+
1104
+ [resource, common_labels]
1049
1105
  end
1050
1106
 
1051
- # Extract entry resource and common labels that should be applied to
1052
- # individual entries from the group resource.
1053
- def determine_entry_level_labels(group_resource, record)
1054
- resource_labels = {}
1055
- common_labels = {}
1107
+ # Extract entry level monitored resource and common labels that should be
1108
+ # applied to individual entries.
1109
+ def determine_entry_level_monitored_resource_and_labels(
1110
+ group_level_resource, group_level_common_labels, record)
1111
+ resource = group_level_resource.dup
1112
+ resource.labels = group_level_resource.labels.dup
1113
+ common_labels = group_level_common_labels.dup
1056
1114
 
1115
+ case resource.type
1057
1116
  # Cloud Functions.
1058
- if group_resource.type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
1059
- record.key?('log')
1060
- @cloudfunctions_log_match =
1061
- @compiled_cloudfunctions_log_regexp.match(record['log'])
1062
- common_labels['execution_id'] =
1063
- @cloudfunctions_log_match['execution_id'] if \
1064
- @cloudfunctions_log_match &&
1065
- @cloudfunctions_log_match['execution_id']
1066
- end
1117
+ when CLOUDFUNCTIONS_CONSTANTS[:resource_type]
1118
+ if record.key?('log')
1119
+ @cloudfunctions_log_match =
1120
+ @compiled_cloudfunctions_log_regexp.match(record['log'])
1121
+ common_labels['execution_id'] =
1122
+ @cloudfunctions_log_match['execution_id'] if
1123
+ @cloudfunctions_log_match &&
1124
+ @cloudfunctions_log_match['execution_id']
1125
+ end
1067
1126
 
1068
- # GKE containers.
1069
- if group_resource.type == CONTAINER_CONSTANTS[:resource_type]
1127
+ # GKE container.
1128
+ when GKE_CONSTANTS[:resource_type]
1070
1129
  # Move the stdout/stderr annotation from the record into a label.
1071
1130
  common_labels.merge!(
1072
1131
  delete_and_extract_labels(
1073
- record, 'stream' => "#{CONTAINER_CONSTANTS[:service]}/stream"))
1132
+ record, 'stream' => "#{GKE_CONSTANTS[:service]}/stream"))
1074
1133
 
1075
1134
  # If the record has been annotated by the kubernetes_metadata_filter
1076
1135
  # plugin, then use that metadata. Otherwise, rely on commonLabels
1077
- # populated at the grouped_entries level from the group's tag.
1136
+ # populated from the group's tag.
1078
1137
  if record.key?('kubernetes')
1079
- resource_labels.merge!(
1138
+ resource.labels.merge!(
1080
1139
  delete_and_extract_labels(
1081
- record['kubernetes'], CONTAINER_CONSTANTS[:extra_resource_labels]
1140
+ record['kubernetes'], GKE_CONSTANTS[:extra_resource_labels]
1082
1141
  .map { |l| [l, l] }.to_h))
1083
1142
  common_labels.merge!(
1084
1143
  delete_and_extract_labels(
1085
- record['kubernetes'], CONTAINER_CONSTANTS[:extra_common_labels]
1086
- .map { |l| [l, "#{CONTAINER_CONSTANTS[:service]}/#{l}"] }.to_h))
1144
+ record['kubernetes'], GKE_CONSTANTS[:extra_common_labels]
1145
+ .map { |l| [l, "#{GKE_CONSTANTS[:service]}/#{l}"] }.to_h))
1087
1146
  # Prepend label/ to all user-defined labels' keys.
1088
1147
  if record['kubernetes'].key?('labels')
1089
1148
  common_labels.merge!(
@@ -1109,14 +1168,56 @@ module Fluent
1109
1168
  # Report them as monitored resource labels instead of common labels.
1110
1169
  # e.g. "dataflow.googleapis.com/job_id" => "job_id"
1111
1170
  [DATAFLOW_CONSTANTS, ML_CONSTANTS].each do |service_constants|
1112
- next unless group_resource.type == service_constants[:resource_type]
1113
- resource_labels.merge!(
1171
+ next unless resource.type == service_constants[:resource_type]
1172
+ resource.labels.merge!(
1114
1173
  delete_and_extract_labels(
1115
1174
  common_labels, service_constants[:extra_common_labels]
1116
1175
  .map { |l| ["#{service_constants[:service]}/#{l}", l] }.to_h))
1117
1176
  end
1118
1177
 
1119
- [resource_labels, common_labels]
1178
+ [resource, common_labels]
1179
+ end
1180
+
1181
+ # Call Metadata Agent to get monitored resource information and parse
1182
+ # response to Google::Api::MonitoredResource.
1183
+ def call_metadata_agent_for_monitored_resource(local_resource_id)
1184
+ response = query_metadata_agent("monitoredResource/#{local_resource_id}")
1185
+ return nil if response.nil?
1186
+ begin
1187
+ resource = Google::Api::MonitoredResource.decode_json(response.to_json)
1188
+ rescue Google::Protobuf::ParseError, ArgumentError => e
1189
+ @log.error 'Error paring monitored resource from Metadata Agent. ' \
1190
+ "response: #{response.inspect}", error: e
1191
+ return nil
1192
+ end
1193
+
1194
+ # TODO(qingling128): Use Google::Api::MonitoredResource directly after we
1195
+ # upgrade gRPC version to include the fix for the protobuf map
1196
+ # corruption issue.
1197
+ Google::Apis::LoggingV2beta1::MonitoredResource.new(
1198
+ type: resource.type,
1199
+ labels: resource.labels.to_h
1200
+ )
1201
+ end
1202
+
1203
+ # Issue a request to the Metadata Agent's local API and parse the response
1204
+ # to JSON. Return nil in case of failure.
1205
+ def query_metadata_agent(path)
1206
+ url = "#{@metadata_agent_url}/#{path}"
1207
+ @log.debug("Calling Metadata Agent: #{url}")
1208
+ open(url) do |f|
1209
+ response = f.read
1210
+ parsed_hash = parse_json_or_nil(response)
1211
+ if parsed_hash.nil?
1212
+ @log.error 'Response from Metadata Agent is not in valid json ' \
1213
+ "format: '#{response.inspect}'."
1214
+ return nil
1215
+ end
1216
+ @log.debug "Response from Metadata Agent: #{parsed_hash}"
1217
+ return parsed_hash
1218
+ end
1219
+ rescue StandardError => e
1220
+ @log.error 'Error calling Metadata Agent.', error: e
1120
1221
  end
1121
1222
 
1122
1223
  # TODO: This functionality should eventually be available in another
@@ -1209,7 +1310,7 @@ module Fluent
1209
1310
  [ts_secs, ts_nanos]
1210
1311
  end
1211
1312
 
1212
- def compute_severity(resource_type, record, entry_common_labels)
1313
+ def compute_severity(resource_type, record, entry_level_common_labels)
1213
1314
  if resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type]
1214
1315
  if @cloudfunctions_log_match && @cloudfunctions_log_match['severity']
1215
1316
  return parse_severity(@cloudfunctions_log_match['severity'])
@@ -1224,9 +1325,8 @@ module Fluent
1224
1325
  end
1225
1326
  elsif record.key?('severity')
1226
1327
  return parse_severity(record.delete('severity'))
1227
- elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
1228
- entry_common_labels.key?("#{CONTAINER_CONSTANTS[:service]}/stream")
1229
- stream = entry_common_labels["#{CONTAINER_CONSTANTS[:service]}/stream"]
1328
+ elsif resource_type == GKE_CONSTANTS[:resource_type]
1329
+ stream = entry_level_common_labels["#{GKE_CONSTANTS[:service]}/stream"]
1230
1330
  if stream == 'stdout'
1231
1331
  return 'INFO'
1232
1332
  elsif stream == 'stderr'
@@ -1296,6 +1396,9 @@ module Fluent
1296
1396
  'FINE' => 'DEBUG',
1297
1397
  'FINER' => 'DEBUG',
1298
1398
  'FINEST' => 'DEBUG',
1399
+ # java.util.logging levels (only missing ones from above listed).
1400
+ 'SEVERE' => 'ERROR',
1401
+ 'CONFIG' => 'DEBUG',
1299
1402
  # nginx levels (only missing ones from above listed).
1300
1403
  'CRIT' => 'CRITICAL',
1301
1404
  'EMERG' => 'EMERGENCY',
@@ -1447,34 +1550,8 @@ module Fluent
1447
1550
  hash.nil? || !hash.is_a?(Hash)
1448
1551
  label_map.each_with_object({}) \
1449
1552
  do |(original_label, new_label), extracted_labels|
1450
- extracted_labels[new_label] =
1451
- convert_to_utf8(hash.delete(original_label).to_s) if
1452
- hash.key?(original_label)
1453
- end
1454
- end
1455
-
1456
- def set_payload(resource_type, record, entry, is_json)
1457
- # If this is a Cloud Functions log that matched the expected regexp,
1458
- # use text payload. Otherwise, use JSON if we found valid JSON, or text
1459
- # payload in the following cases:
1460
- # 1. This is a Cloud Functions log and the 'log' key is available
1461
- # 2. This is an unstructured Container log and the 'log' key is available
1462
- # 3. The only remaining key is 'message'
1463
- if resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
1464
- @cloudfunctions_log_match
1465
- entry.text_payload = @cloudfunctions_log_match['text']
1466
- elsif resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
1467
- record.key?('log')
1468
- entry.text_payload = record['log']
1469
- elsif is_json
1470
- entry.json_payload = record
1471
- elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
1472
- record.key?('log')
1473
- entry.text_payload = record['log']
1474
- elsif record.size == 1 && record.key?('message')
1475
- entry.text_payload = record['message']
1476
- else
1477
- entry.json_payload = record
1553
+ value = hash.delete(original_label)
1554
+ extracted_labels[new_label] = convert_to_utf8(value.to_s) if value
1478
1555
  end
1479
1556
  end
1480
1557
 
@@ -1522,7 +1599,10 @@ module Fluent
1522
1599
  ret
1523
1600
  end
1524
1601
 
1525
- def set_payload_grpc(resource_type, record, entry, is_json)
1602
+ def set_payload(resource_type, record, entry, is_json)
1603
+ # Only one of {text_payload, json_payload} will be set.
1604
+ text_payload = nil
1605
+ json_payload = nil
1526
1606
  # If this is a Cloud Functions log that matched the expected regexp,
1527
1607
  # use text payload. Otherwise, use JSON if we found valid JSON, or text
1528
1608
  # payload in the following cases:
@@ -1531,20 +1611,34 @@ module Fluent
1531
1611
  # 3. The only remaining key is 'message'
1532
1612
  if resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
1533
1613
  @cloudfunctions_log_match
1534
- entry.text_payload = convert_to_utf8(
1535
- @cloudfunctions_log_match['text'])
1614
+ text_payload = @cloudfunctions_log_match['text']
1536
1615
  elsif resource_type == CLOUDFUNCTIONS_CONSTANTS[:resource_type] &&
1537
1616
  record.key?('log')
1538
- entry.text_payload = convert_to_utf8(record['log'])
1617
+ text_payload = record['log']
1539
1618
  elsif is_json
1540
- entry.json_payload = struct_from_ruby(record)
1541
- elsif resource_type == CONTAINER_CONSTANTS[:resource_type] &&
1619
+ json_payload = record
1620
+ elsif [GKE_CONSTANTS[:resource_type],
1621
+ DOCKER_CONSTANTS[:resource_type]].include?(resource_type) &&
1542
1622
  record.key?('log')
1543
- entry.text_payload = convert_to_utf8(record['log'])
1623
+ text_payload = record['log']
1544
1624
  elsif record.size == 1 && record.key?('message')
1545
- entry.text_payload = convert_to_utf8(record['message'])
1625
+ text_payload = record['message']
1546
1626
  else
1547
- entry.json_payload = struct_from_ruby(record)
1627
+ json_payload = record
1628
+ end
1629
+
1630
+ if json_payload
1631
+ entry.json_payload = if @use_grpc
1632
+ struct_from_ruby(json_payload)
1633
+ else
1634
+ json_payload
1635
+ end
1636
+ elsif text_payload
1637
+ entry.text_payload = if @use_grpc
1638
+ convert_to_utf8(text_payload)
1639
+ else
1640
+ text_payload
1641
+ end
1548
1642
  end
1549
1643
  end
1550
1644
 
@@ -1554,7 +1648,7 @@ module Fluent
1554
1648
  elsif resource.type == APPENGINE_CONSTANTS[:resource_type]
1555
1649
  # Add a prefix to Managed VM logs to prevent namespace collisions.
1556
1650
  tag = "#{APPENGINE_CONSTANTS[:service]}/#{tag}"
1557
- elsif resource.type == CONTAINER_CONSTANTS[:resource_type]
1651
+ elsif resource.type == GKE_CONSTANTS[:resource_type]
1558
1652
  # For Kubernetes logs, use just the container name as the log name
1559
1653
  # if we have it.
1560
1654
  if resource.labels && resource.labels.key?('container_name')