logstash-output-google_bigquery 4.0.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +71 -0
  3. data/CONTRIBUTORS +15 -0
  4. data/Gemfile +11 -0
  5. data/LICENSE +13 -0
  6. data/NOTICE.TXT +5 -0
  7. data/README.md +100 -0
  8. data/docs/index.asciidoc +348 -0
  9. data/lib/logstash-output-google_bigquery_jars.rb +38 -0
  10. data/lib/logstash/outputs/bigquery/batcher.rb +82 -0
  11. data/lib/logstash/outputs/bigquery/schema.rb +93 -0
  12. data/lib/logstash/outputs/bigquery/streamclient.rb +120 -0
  13. data/lib/logstash/outputs/google_bigquery.rb +280 -0
  14. data/logstash-output-google_bigquery.gemspec +31 -0
  15. data/spec/outputs/bigquery/batcher_spec.rb +110 -0
  16. data/spec/outputs/bigquery/schema_spec.rb +101 -0
  17. data/spec/outputs/google_bigquery_spec.rb +154 -0
  18. data/vendor/jar-dependencies/com/fasterxml/jackson/core/jackson-core/2.1.3/jackson-core-2.1.3.jar +0 -0
  19. data/vendor/jar-dependencies/com/google/api-client/google-api-client/1.23.0/google-api-client-1.23.0.jar +0 -0
  20. data/vendor/jar-dependencies/com/google/api/api-common/1.5.0/api-common-1.5.0.jar +0 -0
  21. data/vendor/jar-dependencies/com/google/api/gax-httpjson/0.40.0/gax-httpjson-0.40.0.jar +0 -0
  22. data/vendor/jar-dependencies/com/google/api/gax/1.23.0/gax-1.23.0.jar +0 -0
  23. data/vendor/jar-dependencies/com/google/api/grpc/proto-google-common-protos/1.7.0/proto-google-common-protos-1.7.0.jar +0 -0
  24. data/vendor/jar-dependencies/com/google/api/grpc/proto-google-iam-v1/0.8.0/proto-google-iam-v1-0.8.0.jar +0 -0
  25. data/vendor/jar-dependencies/com/google/apis/google-api-services-bigquery/v2-rev377-1.23.0/google-api-services-bigquery-v2-rev377-1.23.0.jar +0 -0
  26. data/vendor/jar-dependencies/com/google/auth/google-auth-library-credentials/0.9.0/google-auth-library-credentials-0.9.0.jar +0 -0
  27. data/vendor/jar-dependencies/com/google/auth/google-auth-library-oauth2-http/0.9.0/google-auth-library-oauth2-http-0.9.0.jar +0 -0
  28. data/vendor/jar-dependencies/com/google/auto/value/auto-value/1.4/auto-value-1.4.jar +0 -0
  29. data/vendor/jar-dependencies/com/google/cloud/google-cloud-bigquery/1.24.1/google-cloud-bigquery-1.24.1.jar +0 -0
  30. data/vendor/jar-dependencies/com/google/cloud/google-cloud-core-http/1.24.1/google-cloud-core-http-1.24.1.jar +0 -0
  31. data/vendor/jar-dependencies/com/google/cloud/google-cloud-core/1.24.1/google-cloud-core-1.24.1.jar +0 -0
  32. data/vendor/jar-dependencies/com/google/code/findbugs/jsr305/3.0.1/jsr305-3.0.1.jar +0 -0
  33. data/vendor/jar-dependencies/com/google/code/gson/gson/2.7/gson-2.7.jar +0 -0
  34. data/vendor/jar-dependencies/com/google/errorprone/error_prone_annotations/2.2.0/error_prone_annotations-2.2.0.jar +0 -0
  35. data/vendor/jar-dependencies/com/google/guava/guava/20.0/guava-20.0.jar +0 -0
  36. data/vendor/jar-dependencies/com/google/http-client/google-http-client-appengine/1.23.0/google-http-client-appengine-1.23.0.jar +0 -0
  37. data/vendor/jar-dependencies/com/google/http-client/google-http-client-jackson/1.23.0/google-http-client-jackson-1.23.0.jar +0 -0
  38. data/vendor/jar-dependencies/com/google/http-client/google-http-client-jackson2/1.23.0/google-http-client-jackson2-1.23.0.jar +0 -0
  39. data/vendor/jar-dependencies/com/google/http-client/google-http-client/1.23.0/google-http-client-1.23.0.jar +0 -0
  40. data/vendor/jar-dependencies/com/google/oauth-client/google-oauth-client/1.23.0/google-oauth-client-1.23.0.jar +0 -0
  41. data/vendor/jar-dependencies/com/google/protobuf/protobuf-java-util/3.5.1/protobuf-java-util-3.5.1.jar +0 -0
  42. data/vendor/jar-dependencies/com/google/protobuf/protobuf-java/3.5.1/protobuf-java-3.5.1.jar +0 -0
  43. data/vendor/jar-dependencies/commons-codec/commons-codec/1.9/commons-codec-1.9.jar +0 -0
  44. data/vendor/jar-dependencies/commons-logging/commons-logging/1.2/commons-logging-1.2.jar +0 -0
  45. data/vendor/jar-dependencies/io/grpc/grpc-context/1.9.0/grpc-context-1.9.0.jar +0 -0
  46. data/vendor/jar-dependencies/io/opencensus/opencensus-api/0.11.1/opencensus-api-0.11.1.jar +0 -0
  47. data/vendor/jar-dependencies/io/opencensus/opencensus-contrib-http-util/0.11.1/opencensus-contrib-http-util-0.11.1.jar +0 -0
  48. data/vendor/jar-dependencies/joda-time/joda-time/2.9.2/joda-time-2.9.2.jar +0 -0
  49. data/vendor/jar-dependencies/org/apache/httpcomponents/httpclient/4.5.2/httpclient-4.5.2.jar +0 -0
  50. data/vendor/jar-dependencies/org/apache/httpcomponents/httpcore/4.4.4/httpcore-4.4.4.jar +0 -0
  51. data/vendor/jar-dependencies/org/codehaus/jackson/jackson-core-asl/1.9.11/jackson-core-asl-1.9.11.jar +0 -0
  52. data/vendor/jar-dependencies/org/threeten/threetenbp/1.3.3/threetenbp-1.3.3.jar +0 -0
  53. metadata +178 -0
@@ -0,0 +1,38 @@
1
+ # AUTOGENERATED BY THE GRADLE SCRIPT. DO NOT EDIT.
2
+
3
+ require 'jar_dependencies'
4
+ require_jar('com.google.cloud', 'google-cloud-bigquery', '1.24.1')
5
+ require_jar('com.fasterxml.jackson.core', 'jackson-core', '2.1.3')
6
+ require_jar('com.google.api', 'api-common', '1.5.0')
7
+ require_jar('com.google.api-client', 'google-api-client', '1.23.0')
8
+ require_jar('com.google.api', 'gax', '1.23.0')
9
+ require_jar('com.google.api', 'gax-httpjson', '0.40.0')
10
+ require_jar('com.google.api.grpc', 'proto-google-common-protos', '1.7.0')
11
+ require_jar('com.google.api.grpc', 'proto-google-iam-v1', '0.8.0')
12
+ require_jar('com.google.apis', 'google-api-services-bigquery', 'v2-rev377-1.23.0')
13
+ require_jar('com.google.auth', 'google-auth-library-credentials', '0.9.0')
14
+ require_jar('com.google.auth', 'google-auth-library-oauth2-http', '0.9.0')
15
+ require_jar('com.google.auto.value', 'auto-value', '1.4')
16
+ require_jar('com.google.cloud', 'google-cloud-core', '1.24.1')
17
+ require_jar('com.google.cloud', 'google-cloud-core-http', '1.24.1')
18
+ require_jar('com.google.code.findbugs', 'jsr305', '3.0.1')
19
+ require_jar('com.google.code.gson', 'gson', '2.7')
20
+ require_jar('com.google.errorprone', 'error_prone_annotations', '2.2.0')
21
+ require_jar('com.google.guava', 'guava', '20.0')
22
+ require_jar('com.google.http-client', 'google-http-client', '1.23.0')
23
+ require_jar('com.google.http-client', 'google-http-client-appengine', '1.23.0')
24
+ require_jar('com.google.http-client', 'google-http-client-jackson', '1.23.0')
25
+ require_jar('com.google.http-client', 'google-http-client-jackson2', '1.23.0')
26
+ require_jar('com.google.oauth-client', 'google-oauth-client', '1.23.0')
27
+ require_jar('com.google.protobuf', 'protobuf-java', '3.5.1')
28
+ require_jar('com.google.protobuf', 'protobuf-java-util', '3.5.1')
29
+ require_jar('commons-codec', 'commons-codec', '1.9')
30
+ require_jar('commons-logging', 'commons-logging', '1.2')
31
+ require_jar('io.grpc', 'grpc-context', '1.9.0')
32
+ require_jar('io.opencensus', 'opencensus-api', '0.11.1')
33
+ require_jar('io.opencensus', 'opencensus-contrib-http-util', '0.11.1')
34
+ require_jar('joda-time', 'joda-time', '2.9.2')
35
+ require_jar('org.apache.httpcomponents', 'httpclient', '4.5.2')
36
+ require_jar('org.apache.httpcomponents', 'httpcore', '4.4.4')
37
+ require_jar('org.codehaus.jackson', 'jackson-core-asl', '1.9.11')
38
+ require_jar('org.threeten', 'threetenbp', '1.3.3')
@@ -0,0 +1,82 @@
1
+ require 'thread'
2
+ require 'java'
3
+ require 'logstash-output-google_bigquery_jars.rb'
4
+
5
+ module LogStash
6
+ module Outputs
7
+ module BigQuery
8
+ # Batcher is a queue that bundles messages in batches based on their
9
+ # size in bytes or count. It's used to provide guarantees around
10
+ # maximum data loss due to a fault while maintaining good upload
11
+ # throughput.
12
+ class Batcher
13
+ include_package 'java.util.concurrent.locks'
14
+
15
+ def initialize(max_length, max_bytes)
16
+ @lock = ReentrantReadWriteLock.new
17
+ @max_length = max_length
18
+ @max_bytes = max_bytes
19
+
20
+ clear
21
+ end
22
+
23
+ # enqueue_push calls enqueue and if a batch is ready to go pushes it to
24
+ # the provided queue.
25
+ def enqueue_push(message, queue)
26
+ batch = enqueue message
27
+
28
+ queue << batch unless batch.nil?
29
+ end
30
+
31
+ # enqueue adds a message to the batch. If the batch is ready to be sent
32
+ # out the internal state is reset and the array of messages is both
33
+ # yielded and returned.
34
+ # Otherwise nil is returned.
35
+ def enqueue(message)
36
+ @lock.write_lock.lock
37
+
38
+ begin
39
+ is_flush_request = message.nil?
40
+
41
+ unless is_flush_request
42
+ @batch_size_bytes += message.length
43
+ @batch << message
44
+ end
45
+
46
+ length_met = @batch.length >= @max_length
47
+ size_met = @batch_size_bytes >= @max_bytes
48
+
49
+ if is_flush_request || length_met || size_met
50
+ orig = @batch
51
+ clear
52
+
53
+ yield(orig) if block_given?
54
+ return orig
55
+ end
56
+
57
+ nil
58
+ ensure
59
+ @lock.write_lock.unlock
60
+ end
61
+ end
62
+
63
+ # removes all elements from the batch
64
+ def clear
65
+ @lock.write_lock.lock
66
+ @batch = []
67
+ @batch_size_bytes = 0
68
+ @lock.write_lock.unlock
69
+ end
70
+
71
+ def empty?
72
+ @lock.read_lock.lock
73
+ begin
74
+ @batch.empty? && @batch_size_bytes.zero?
75
+ ensure
76
+ @lock.read_lock.unlock
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,93 @@
1
+ require 'java'
2
+ require 'logstash-output-google_bigquery_jars.rb'
3
+ require 'logstash/json'
4
+
5
+ module LogStash
6
+ module Outputs
7
+ module BigQuery
8
+ class Schema
9
+ include_package 'com.google.cloud.bigquery'
10
+
11
+ # Converts a CSV schema or JSON schema into a BigQuery Java Schema.
12
+ def self.parse_csv_or_json(csv_schema, json_schema)
13
+ csv_blank = csv_schema.nil? || csv_schema.empty?
14
+ json_blank = json_schema.nil? || json_schema.empty?
15
+
16
+ unless csv_blank ^ json_blank
17
+ raise ArgumentError.new("You must provide either json_schema OR csv_schema. csv: #{csv_schema}, json: #{json_schema}")
18
+ end
19
+
20
+ if csv_blank
21
+ schema = json_schema
22
+ else
23
+ schema = parse_csv_schema csv_schema
24
+ end
25
+
26
+ self.hash_to_java_schema schema
27
+ end
28
+
29
+ # Converts a CSV of field:type pairs into the JSON style schema.
30
+ def self.parse_csv_schema(csv_schema)
31
+ require 'csv'
32
+
33
+ fields = []
34
+
35
+ CSV.parse(csv_schema.gsub('\"', '""')).flatten.each do |field|
36
+ raise ArgumentError.new('csv_schema must follow the format <field-name>:<field-type>') if field.nil?
37
+
38
+ temp = field.strip.split(':')
39
+
40
+ if temp.length != 2
41
+ raise ArgumentError.new('csv_schema must follow the format <field-name>:<field-type>')
42
+ end
43
+
44
+ fields << { 'name' => temp[0], 'type' => temp[1] }
45
+ end
46
+
47
+ # Check that we have at least one field in the schema
48
+ raise ArgumentError.new('csv_schema must contain at least one field') if fields.empty?
49
+
50
+ { 'fields' => fields }
51
+ end
52
+
53
+ # Converts the Ruby hash style schema into a BigQuery Java schema
54
+ def self.hash_to_java_schema(schema_hash)
55
+ field_list = self.parse_field_list schema_hash['fields']
56
+ com.google.cloud.bigquery.Schema.of field_list
57
+ end
58
+
59
+ # Converts a list of fields into a BigQuery Java FieldList
60
+ def self.parse_field_list(fields)
61
+ fieldslist = fields.map {|field| self.parse_field field}
62
+
63
+ FieldList.of fieldslist
64
+ end
65
+
66
+ # Converts a single field definition into a BigQuery Java Field object.
67
+ # This includes any nested fields as well.
68
+ def self.parse_field(field)
69
+ type = LegacySQLTypeName.valueOfStrict(field['type'])
70
+ name = field['name']
71
+
72
+ if field.has_key? 'fields'
73
+ sub_fields = self.parse_field_list field['fields']
74
+ builder = Field.newBuilder(name, type, sub_fields)
75
+ else
76
+ builder = Field.newBuilder(name, type)
77
+ end
78
+
79
+ if field.has_key? 'description'
80
+ builder = builder.setDescription(field['description'])
81
+ end
82
+
83
+ if field.has_key? 'mode'
84
+ mode = Field::Mode.valueOf field['mode']
85
+ builder = builder.setMode(mode)
86
+ end
87
+
88
+ builder.build
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,120 @@
1
+ require 'java'
2
+ require 'openssl'
3
+ require 'logstash-output-google_bigquery_jars.rb'
4
+
5
+ module LogStash
6
+ module Outputs
7
+ module BigQuery
8
+ # NOTE: This file uses _a lot_ of Java. Please keep the Java looking
9
+ # java-y so it's easy to tell the languages apart.
10
+
11
+ include_package 'com.google.cloud.bigquery'
12
+
13
+ # StreamingClient supports shipping data to BigQuery using streams.
14
+ class StreamingClient
15
+ def initialize(json_key_file, project_id, logger)
16
+ @logger = logger
17
+
18
+ @bigquery = initialize_google_client json_key_file, project_id
19
+ end
20
+
21
+ def table_exists?(dataset, table)
22
+ api_debug('Checking if table exists', dataset, table)
23
+ tbl = @bigquery.getTable dataset, table
24
+
25
+ !tbl.nil?
26
+ end
27
+
28
+ # Creates a table with the given name in the given dataset
29
+ def create_table(dataset, table, schema)
30
+ api_debug('Creating table', dataset, table)
31
+ table_id = com.google.cloud.bigquery.TableId.of dataset, table
32
+
33
+ table_defn = com.google.cloud.bigquery.StandardTableDefinition.of schema
34
+ table_info = com.google.cloud.bigquery.TableInfo.newBuilder(table_id, table_defn).build()
35
+
36
+ @bigquery.create table_info
37
+ end
38
+
39
+ def append(dataset, table, rows, ignore_unknown)
40
+ api_debug("Appending #{rows.length} rows", dataset, table)
41
+
42
+ request = build_append_request dataset, table, rows, ignore_unknown
43
+
44
+ response = @bigquery.insertAll request
45
+ return true unless response.hasErrors
46
+
47
+ response.getInsertErrors().entrySet().each{ |entry|
48
+ key = entry.getKey
49
+ errors = entry.getValue
50
+
51
+ errors.each{|bqError|
52
+ @logger.warn('Error while inserting',
53
+ key: key,
54
+ location: bqError.getLocation,
55
+ message: bqError.getMessage,
56
+ reason: bqError.getReason)
57
+ }
58
+ }
59
+
60
+ false
61
+ end
62
+
63
+ def build_append_request(dataset, table, rows, ignore_unknown)
64
+ request = com.google.cloud.bigquery.InsertAllRequest.newBuilder dataset, table
65
+ request.setIgnoreUnknownValues ignore_unknown
66
+
67
+ rows.each { |serialized_row|
68
+ # deserialize rows into Java maps
69
+ deserialized = LogStash::Json.load serialized_row
70
+ request.addRow deserialized
71
+ }
72
+
73
+ request.build
74
+ end
75
+
76
+ # raises an exception if the key file is invalid
77
+ def get_key_file_error(json_key_file)
78
+ return nil if json_key_file.nil? || json_key_file == ''
79
+
80
+ abs = ::File.absolute_path json_key_file
81
+ unless abs == json_key_file
82
+ return "json_key_file must be an absolute path: #{json_key_file}"
83
+ end
84
+
85
+ unless ::File.exist? json_key_file
86
+ return "json_key_file does not exist: #{json_key_file}"
87
+ end
88
+
89
+ nil
90
+ end
91
+
92
+ def initialize_google_client(json_key_file, project_id)
93
+ @logger.info("Initializing Google API client #{project_id} key: #{json_key_file}")
94
+ err = get_key_file_error json_key_file
95
+ raise err unless err.nil?
96
+
97
+ if json_key_file.nil? || json_key_file.empty?
98
+ return com.google.cloud.bigquery.BigQueryOptions.getDefaultInstance().getService()
99
+ end
100
+
101
+ # TODO: set User-Agent
102
+
103
+ key_file = java.io.FileInputStream.new json_key_file
104
+ credentials = com.google.auth.oauth2.ServiceAccountCredentials.fromStream key_file
105
+ return com.google.cloud.bigquery.BigQueryOptions.newBuilder()
106
+ .setCredentials(credentials)
107
+ .setProjectId(project_id)
108
+ .build()
109
+ .getService()
110
+ end
111
+
112
+ private
113
+
114
+ def api_debug(message, dataset, table)
115
+ @logger.debug(message, dataset: dataset, table: table)
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,280 @@
1
+ require 'logstash/outputs/base'
2
+ require 'logstash/namespace'
3
+ require 'logstash/json'
4
+ require 'logstash/outputs/bigquery/streamclient'
5
+ require 'logstash/outputs/bigquery/batcher'
6
+ require 'logstash/outputs/bigquery/schema'
7
+
8
+ require 'time'
9
+ require 'fileutils'
10
+
11
+ #
12
+ # === Summary
13
+ #
14
+ # This plugin uploads events to Google BigQuery using the streaming API
15
+ # so data can become available nearly immediately.
16
+ #
17
+ # You can configure it to flush periodically, after N events or after
18
+ # a certain amount of data is ingested.
19
+ #
20
+ # === Environment Configuration
21
+ #
22
+ # You must enable BigQuery on your GCS account and create a dataset to
23
+ # hold the tables this plugin generates.
24
+ #
25
+ # You must also grant the service account this plugin uses access to
26
+ # the dataset.
27
+ #
28
+ # You can use https://www.elastic.co/guide/en/logstash/current/event-dependent-configuration.html[Logstash conditionals]
29
+ # and multiple configuration blocks to upload events with different structures.
30
+ #
31
+ # === Usage
32
+ # This is an example of logstash config:
33
+ #
34
+ # [source,ruby]
35
+ # --------------------------
36
+ # output {
37
+ # google_bigquery {
38
+ # project_id => "folkloric-guru-278" (required)
39
+ # dataset => "logs" (required)
40
+ # csv_schema => "path:STRING,status:INTEGER,score:FLOAT" (required) <1>
41
+ # json_key_file => "/path/to/key.json" (optional) <2>
42
+ # error_directory => "/tmp/bigquery-errors" (required)
43
+ # date_pattern => "%Y-%m-%dT%H:00" (optional)
44
+ # flush_interval_secs => 30 (optional)
45
+ # }
46
+ # }
47
+ # --------------------------
48
+ #
49
+ # <1> Specify either a csv_schema or a json_schema.
50
+ #
51
+ # <2> If the key is not used, then the plugin tries to find
52
+ # https://cloud.google.com/docs/authentication/production[Application Default Credentials]
53
+ #
54
+ # === Considerations
55
+ #
56
+ # * There is a small fee to insert data into BigQuery using the streaming API
57
+ # * This plugin buffers events in-memory, so make sure the flush configurations are appropriate
58
+ # for your use-case and consider using
59
+ # https://www.elastic.co/guide/en/logstash/current/persistent-queues.html[Logstash Persistent Queues]
60
+ #
61
+ # === Additional Resources
62
+ #
63
+ # * https://cloud.google.com/bigquery/[BigQuery Introduction]
64
+ # * https://cloud.google.com/bigquery/docs/schemas[BigQuery Schema Formats and Types]
65
+ # * https://cloud.google.com/bigquery/pricing[Pricing Information]
66
+ #
67
+ class LogStash::Outputs::GoogleBigQuery < LogStash::Outputs::Base
68
+ config_name 'google_bigquery'
69
+
70
+ concurrency :single
71
+
72
+ # Google Cloud Project ID (number, not Project Name!).
73
+ config :project_id, validate: :string, required: true
74
+
75
+ # The BigQuery dataset the tables for the events will be added to.
76
+ config :dataset, validate: :string, required: true
77
+
78
+ # BigQuery table ID prefix to be used when creating new tables for log data.
79
+ # Table name will be `<table_prefix><table_separator><date>`
80
+ config :table_prefix, validate: :string, default: 'logstash'
81
+
82
+ # BigQuery table separator to be added between the table_prefix and the
83
+ # date suffix.
84
+ config :table_separator, validate: :string, default: '_'
85
+
86
+ # Schema for log data. It must follow the format `name1:type1(,name2:type2)*`.
87
+ # For example, `path:STRING,status:INTEGER,score:FLOAT`.
88
+ config :csv_schema, validate: :string, required: false, default: nil
89
+
90
+ # Schema for log data as a hash.
91
+ # These can include nested records, descriptions, and modes.
92
+ #
93
+ # Example:
94
+ # # [source,ruby]
95
+ # --------------------------
96
+ # json_schema => {
97
+ # fields => [{
98
+ # name => "endpoint"
99
+ # type => "STRING"
100
+ # description => "Request route"
101
+ # }, {
102
+ # name => "status"
103
+ # type => "INTEGER"
104
+ # mode => "NULLABLE"
105
+ # }, {
106
+ # name => "params"
107
+ # type => "RECORD"
108
+ # mode => "REPEATED"
109
+ # fields => [{
110
+ # name => "key"
111
+ # type => "STRING"
112
+ # }, {
113
+ # name => "value"
114
+ # type => "STRING"
115
+ # }]
116
+ # }]
117
+ # }
118
+ # --------------------------
119
+ config :json_schema, validate: :hash, required: false, default: nil
120
+
121
+ # Indicates if BigQuery should ignore values that are not represented in the table schema.
122
+ # If true, the extra values are discarded.
123
+ # If false, BigQuery will reject the records with extra fields and the job will fail.
124
+ # The default value is false.
125
+ #
126
+ # NOTE: You may want to add a Logstash filter like the following to remove common fields it adds:
127
+ # [source,ruby]
128
+ # ----------------------------------
129
+ # mutate {
130
+ # remove_field => ["@version","@timestamp","path","host","type", "message"]
131
+ # }
132
+ # ----------------------------------
133
+ config :ignore_unknown_values, validate: :boolean, default: false
134
+
135
+ # Time pattern for BigQuery table, defaults to hourly tables.
136
+ # Must Time.strftime patterns: www.ruby-doc.org/core-2.0/Time.html#method-i-strftime
137
+ config :date_pattern, validate: :string, default: '%Y-%m-%dT%H:00'
138
+
139
+ # If logstash is running within Google Compute Engine, the plugin will use
140
+ # GCE's Application Default Credentials. Outside of GCE, you will need to
141
+ # specify a Service Account JSON key file.
142
+ config :json_key_file, validate: :string, required: false
143
+
144
+ # The number of messages to upload at a single time. (< 1000, default: 128)
145
+ config :batch_size, validate: :number, required: true, default: 128
146
+
147
+ # An approximate number of bytes to upload as part of a batch. Default: 1MB
148
+ config :batch_size_bytes, validate: :number, required: true, default: 1_000_000
149
+
150
+ # Uploads all data this often even if other upload criteria aren't met. Default: 5s
151
+ config :flush_interval_secs, validate: :number, required: true, default: 5
152
+
153
+ # The location to store events that could not be uploaded due to errors.
154
+ # Consider using an additional Logstash input to pipe the contents of
155
+ # these to an alert platform so you can manually fix the events.
156
+ #
157
+ # Or use https://cloud.google.com/storage/docs/gcs-fuse[GCS FUSE] to
158
+ # transparently upload to a GCS bucket.
159
+ #
160
+ # Files names follow the pattern `[table name]-[UNIX timestamp].log`
161
+ config :error_directory, validate: :string, required: true, default: '/tmp/bigquery_errors'
162
+
163
+ # The following configuration options still exist to alert users that are using them
164
+ config :uploader_interval_secs, validate: :number, deprecated: 'No longer used.'
165
+ config :deleter_interval_secs, validate: :number, deprecated: 'No longer used.'
166
+ config :key_path, validate: :string, obsolete: 'Use json_key_file or ADC instead.'
167
+ config :key_password, validate: :string, deprecated: 'No longer needed with json_key_file or ADC.'
168
+ config :service_account, validate: :string, deprecated: 'No longer needed with json_key_file or ADC.'
169
+ config :temp_file_prefix, validate: :string, deprecated: 'No longer used.'
170
+ config :temp_directory, validate: :string, deprecated: 'No longer used.'
171
+
172
+ public
173
+
174
+ def register
175
+ @logger.debug('Registering plugin')
176
+
177
+ @schema = LogStash::Outputs::BigQuery::Schema.parse_csv_or_json @csv_schema, @json_schema
178
+ @bq_client = LogStash::Outputs::BigQuery::StreamingClient.new @json_key_file, @project_id, @logger
179
+ @batcher = LogStash::Outputs::BigQuery::Batcher.new @batch_size, @batch_size_bytes
180
+
181
+ init_batcher_flush_thread
182
+ end
183
+
184
+ # Method called for each log event. It writes the event to the current output
185
+ # file, flushing depending on flush interval configuration.
186
+ def receive(event)
187
+ @logger.debug('BQ: receive method called', event: event)
188
+
189
+ # Property names MUST NOT have @ in them
190
+ message = replace_at_keys event.to_hash
191
+
192
+ # Message must be written as json
193
+ encoded_message = LogStash::Json.dump message
194
+
195
+ @batcher.enqueue(encoded_message) { |batch| publish(batch) }
196
+ end
197
+
198
+ def get_table_name(time=nil)
199
+ time ||= Time.now
200
+
201
+ str_time = time.strftime(@date_pattern)
202
+ table_id = @table_prefix + @table_separator + str_time
203
+
204
+ # BQ does not accept anything other than alphanumeric and _
205
+ # Ref: https://developers.google.com/bigquery/browser-tool-quickstart?hl=en
206
+ table_id.tr!(':-', '_')
207
+
208
+ table_id
209
+ end
210
+
211
+ # Remove @ symbols in hash keys
212
+ def replace_at_keys(event)
213
+ return event unless event.is_a? Hash
214
+
215
+ out = {}
216
+
217
+ event.each do |key, value|
218
+ new_key = key.to_s.delete '@'
219
+ out[new_key] = replace_at_keys value
220
+ end
221
+
222
+ out
223
+ end
224
+
225
+ # publish sends messages to a BigQuery table immediately
226
+ def publish(messages)
227
+ begin
228
+ return if messages.nil? || messages.empty?
229
+
230
+ table = get_table_name
231
+ @logger.info("Publishing #{messages.length} messages to #{table}")
232
+
233
+ create_table_if_not_exists table
234
+
235
+ successful = @bq_client.append @dataset, table, messages, @ignore_unknown_values
236
+ write_to_errors_file(messages, table) unless successful
237
+ rescue StandardError => e
238
+ @logger.error 'Error uploading data.', :exception => e
239
+
240
+ write_to_errors_file(messages, table)
241
+ end
242
+ end
243
+
244
+ def create_table_if_not_exists table
245
+ begin
246
+ return nil if @bq_client.table_exists? @dataset, table
247
+ @bq_client.create_table(@dataset, table, @schema)
248
+
249
+ rescue StandardError => e
250
+ @logger.error 'Error creating table.', :exception => e
251
+ end
252
+ end
253
+
254
+ def write_to_errors_file(messages, table)
255
+ begin
256
+ FileUtils.mkdir_p @error_directory
257
+
258
+ t = Time.new
259
+ error_file_name = "#{table}-#{t.to_i}.log"
260
+ error_file_path = ::File.join(@error_directory, error_file_name)
261
+ @logger.info "Problem data is being stored in: #{error_file_path}"
262
+
263
+ File.open(error_file_path, 'w') do |f|
264
+ messages.each { |message| f.puts message }
265
+ end
266
+ rescue StandardError => e
267
+ @logger.error 'Error creating error file.', :exception => e, :messages => messages, :table => table
268
+ end
269
+ end
270
+
271
+ def init_batcher_flush_thread
272
+ @flush_thread = Thread.new do
273
+ loop do
274
+ sleep @flush_interval_secs
275
+
276
+ @batcher.enqueue(nil) { |batch| publish(batch) }
277
+ end
278
+ end
279
+ end
280
+ end