logstash-output-google_bigquery 4.0.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +71 -0
  3. data/CONTRIBUTORS +15 -0
  4. data/Gemfile +11 -0
  5. data/LICENSE +13 -0
  6. data/NOTICE.TXT +5 -0
  7. data/README.md +100 -0
  8. data/docs/index.asciidoc +348 -0
  9. data/lib/logstash-output-google_bigquery_jars.rb +38 -0
  10. data/lib/logstash/outputs/bigquery/batcher.rb +82 -0
  11. data/lib/logstash/outputs/bigquery/schema.rb +93 -0
  12. data/lib/logstash/outputs/bigquery/streamclient.rb +120 -0
  13. data/lib/logstash/outputs/google_bigquery.rb +280 -0
  14. data/logstash-output-google_bigquery.gemspec +31 -0
  15. data/spec/outputs/bigquery/batcher_spec.rb +110 -0
  16. data/spec/outputs/bigquery/schema_spec.rb +101 -0
  17. data/spec/outputs/google_bigquery_spec.rb +154 -0
  18. data/vendor/jar-dependencies/com/fasterxml/jackson/core/jackson-core/2.1.3/jackson-core-2.1.3.jar +0 -0
  19. data/vendor/jar-dependencies/com/google/api-client/google-api-client/1.23.0/google-api-client-1.23.0.jar +0 -0
  20. data/vendor/jar-dependencies/com/google/api/api-common/1.5.0/api-common-1.5.0.jar +0 -0
  21. data/vendor/jar-dependencies/com/google/api/gax-httpjson/0.40.0/gax-httpjson-0.40.0.jar +0 -0
  22. data/vendor/jar-dependencies/com/google/api/gax/1.23.0/gax-1.23.0.jar +0 -0
  23. data/vendor/jar-dependencies/com/google/api/grpc/proto-google-common-protos/1.7.0/proto-google-common-protos-1.7.0.jar +0 -0
  24. data/vendor/jar-dependencies/com/google/api/grpc/proto-google-iam-v1/0.8.0/proto-google-iam-v1-0.8.0.jar +0 -0
  25. data/vendor/jar-dependencies/com/google/apis/google-api-services-bigquery/v2-rev377-1.23.0/google-api-services-bigquery-v2-rev377-1.23.0.jar +0 -0
  26. data/vendor/jar-dependencies/com/google/auth/google-auth-library-credentials/0.9.0/google-auth-library-credentials-0.9.0.jar +0 -0
  27. data/vendor/jar-dependencies/com/google/auth/google-auth-library-oauth2-http/0.9.0/google-auth-library-oauth2-http-0.9.0.jar +0 -0
  28. data/vendor/jar-dependencies/com/google/auto/value/auto-value/1.4/auto-value-1.4.jar +0 -0
  29. data/vendor/jar-dependencies/com/google/cloud/google-cloud-bigquery/1.24.1/google-cloud-bigquery-1.24.1.jar +0 -0
  30. data/vendor/jar-dependencies/com/google/cloud/google-cloud-core-http/1.24.1/google-cloud-core-http-1.24.1.jar +0 -0
  31. data/vendor/jar-dependencies/com/google/cloud/google-cloud-core/1.24.1/google-cloud-core-1.24.1.jar +0 -0
  32. data/vendor/jar-dependencies/com/google/code/findbugs/jsr305/3.0.1/jsr305-3.0.1.jar +0 -0
  33. data/vendor/jar-dependencies/com/google/code/gson/gson/2.7/gson-2.7.jar +0 -0
  34. data/vendor/jar-dependencies/com/google/errorprone/error_prone_annotations/2.2.0/error_prone_annotations-2.2.0.jar +0 -0
  35. data/vendor/jar-dependencies/com/google/guava/guava/20.0/guava-20.0.jar +0 -0
  36. data/vendor/jar-dependencies/com/google/http-client/google-http-client-appengine/1.23.0/google-http-client-appengine-1.23.0.jar +0 -0
  37. data/vendor/jar-dependencies/com/google/http-client/google-http-client-jackson/1.23.0/google-http-client-jackson-1.23.0.jar +0 -0
  38. data/vendor/jar-dependencies/com/google/http-client/google-http-client-jackson2/1.23.0/google-http-client-jackson2-1.23.0.jar +0 -0
  39. data/vendor/jar-dependencies/com/google/http-client/google-http-client/1.23.0/google-http-client-1.23.0.jar +0 -0
  40. data/vendor/jar-dependencies/com/google/oauth-client/google-oauth-client/1.23.0/google-oauth-client-1.23.0.jar +0 -0
  41. data/vendor/jar-dependencies/com/google/protobuf/protobuf-java-util/3.5.1/protobuf-java-util-3.5.1.jar +0 -0
  42. data/vendor/jar-dependencies/com/google/protobuf/protobuf-java/3.5.1/protobuf-java-3.5.1.jar +0 -0
  43. data/vendor/jar-dependencies/commons-codec/commons-codec/1.9/commons-codec-1.9.jar +0 -0
  44. data/vendor/jar-dependencies/commons-logging/commons-logging/1.2/commons-logging-1.2.jar +0 -0
  45. data/vendor/jar-dependencies/io/grpc/grpc-context/1.9.0/grpc-context-1.9.0.jar +0 -0
  46. data/vendor/jar-dependencies/io/opencensus/opencensus-api/0.11.1/opencensus-api-0.11.1.jar +0 -0
  47. data/vendor/jar-dependencies/io/opencensus/opencensus-contrib-http-util/0.11.1/opencensus-contrib-http-util-0.11.1.jar +0 -0
  48. data/vendor/jar-dependencies/joda-time/joda-time/2.9.2/joda-time-2.9.2.jar +0 -0
  49. data/vendor/jar-dependencies/org/apache/httpcomponents/httpclient/4.5.2/httpclient-4.5.2.jar +0 -0
  50. data/vendor/jar-dependencies/org/apache/httpcomponents/httpcore/4.4.4/httpcore-4.4.4.jar +0 -0
  51. data/vendor/jar-dependencies/org/codehaus/jackson/jackson-core-asl/1.9.11/jackson-core-asl-1.9.11.jar +0 -0
  52. data/vendor/jar-dependencies/org/threeten/threetenbp/1.3.3/threetenbp-1.3.3.jar +0 -0
  53. metadata +178 -0
@@ -0,0 +1,31 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-output-google_bigquery'
3
+ s.version = '4.0.0'
4
+ s.licenses = ['Apache License (2.0)']
5
+ s.summary = "Writes events to Google BigQuery"
6
+ s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
7
+ s.authors = ["Elastic"]
8
+ s.email = 'info@elastic.co'
9
+ s.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html"
10
+ s.require_paths = ["lib", "vendor/jar-dependencies"]
11
+
12
+ # Files
13
+ s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"]
14
+
15
+ # Tests
16
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
17
+
18
+ # Special flag to let us know this is actually a logstash plugin
19
+ s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
20
+
21
+ # Gem dependencies
22
+ s.add_runtime_dependency 'logstash-codec-plain'
23
+ s.add_runtime_dependency 'mime-types', '~> 2' # last version compatible with ruby 2.x
24
+ s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
25
+
26
+ s.add_development_dependency 'logstash-devutils'
27
+ s.add_development_dependency 'jar-dependencies', '~> 0.3.4'
28
+
29
+ s.platform = 'java'
30
+ end
31
+
@@ -0,0 +1,110 @@
1
+ require 'logstash/outputs/bigquery/batcher'
2
+ require 'thread'
3
+
4
+ describe LogStash::Outputs::BigQuery::Batcher do
5
+ let(:logger) { spy(:logger) }
6
+ let(:batcher) { LogStash::Outputs::BigQuery::Batcher.new(2, 1_000) }
7
+ let(:one_b_message) { 'a' }
8
+ let(:one_k_message) { 'a' * 1000 }
9
+ let(:batch_queue) { Queue.new }
10
+
11
+ describe '#enqueue' do
12
+ it 'returns nil if no reason to flush' do
13
+ batch = batcher.enqueue one_b_message
14
+
15
+ expect(batch).to be_nil
16
+ end
17
+
18
+ it 'returns a batch if passed nil' do
19
+ batch = batcher.enqueue nil
20
+
21
+ expect(batch).to_not be_nil
22
+ end
23
+
24
+ it 'returns a batch if the message count overflows' do
25
+ batch = batcher.enqueue one_b_message
26
+ expect(batch).to be_nil
27
+
28
+ batch = batcher.enqueue one_b_message
29
+ expect(batch).to_not be_nil
30
+ end
31
+
32
+ it 'returns a batch if the message size overflows' do
33
+ batch = batcher.enqueue one_b_message
34
+ expect(batch).to be_nil
35
+
36
+ batch = batcher.enqueue one_k_message
37
+ expect(batch).to_not be_nil
38
+ end
39
+
40
+ it 'clears internal state after returning a batch' do
41
+ batch = batcher.enqueue one_k_message
42
+
43
+ expect(batch).to_not be_nil
44
+ expect(batcher.empty?).to be_truthy
45
+ end
46
+
47
+ it 'does not yield a batch if there is no reason to flush' do
48
+ batch = nil
49
+ batcher.enqueue(one_b_message) { |b| batch = b }
50
+
51
+ expect(batch).to be_nil
52
+ end
53
+
54
+ it 'yields a batch on flush' do
55
+ batch = nil
56
+ batcher.enqueue(nil) { |b| batch = b }
57
+
58
+ expect(batch).to_not be_nil
59
+ expect(batch.length).to eq 0
60
+ end
61
+
62
+ it 'yields a batch on overflow' do
63
+ batch = nil
64
+ batcher.enqueue(one_k_message) { |b| batch = b }
65
+
66
+ expect(batch).to_not be_nil
67
+ expect(batch.length).to eq 1
68
+ end
69
+ end
70
+
71
+ describe '#enqueue_push' do
72
+ it 'enqueues nothing nil if no reason to flush' do
73
+ batcher.enqueue_push one_b_message, batch_queue
74
+
75
+ expect(batch_queue.length).to eq 0
76
+ end
77
+
78
+ it 'enqueues a batch if passed nil' do
79
+ batcher.enqueue_push nil, batch_queue
80
+
81
+ expect(batch_queue.length).to eq 1
82
+ end
83
+
84
+ it 'enqueues a batch if the message count overflows' do
85
+ batcher.enqueue_push one_b_message, batch_queue
86
+ expect(batch_queue.length).to eq 0
87
+
88
+ batcher.enqueue_push one_b_message, batch_queue
89
+ expect(batch_queue.length).to eq 1
90
+ end
91
+
92
+ it 'enqueues a batch if the message size overflows' do
93
+ batcher.enqueue_push one_b_message, batch_queue
94
+ expect(batch_queue.length).to eq 0
95
+
96
+ batcher.enqueue_push one_k_message, batch_queue
97
+ expect(batch_queue.length).to eq 1
98
+ end
99
+ end
100
+
101
+ describe '#clear' do
102
+ it 'removes any existing messages' do
103
+ batcher.enqueue one_b_message
104
+ expect(batcher.empty?).to be_falsey
105
+
106
+ batcher.clear
107
+ expect(batcher.empty?).to be_truthy
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,101 @@
1
+ require 'logstash/devutils/rspec/spec_helper'
2
+ require 'logstash/outputs/bigquery/schema'
3
+ require 'thread'
4
+ require 'java'
5
+
6
+ describe LogStash::Outputs::BigQuery::Schema do
7
+
8
+ let(:simple_field) {keys_to_strs({name: 'foo', type: 'STRING'})}
9
+ let(:complex_field) {keys_to_strs({name: 'params', type: 'RECORD', mode: 'REPEATED', description: 'desc', fields: [simple_field]})}
10
+ let(:example_field_list) {[simple_field, complex_field]}
11
+ let(:example_schema) {keys_to_strs({fields:example_field_list})}
12
+
13
+ describe '#parse_csv_or_json' do
14
+ it 'ensures CSV and JSON are not both nil' do
15
+ expect{LogStash::Outputs::BigQuery::Schema.parse_csv_or_json nil, nil}.to raise_error ArgumentError
16
+ end
17
+
18
+ it 'ensures CSV and JSON are not both defined' do
19
+ expect{LogStash::Outputs::BigQuery::Schema.parse_csv_or_json "", {}}.to raise_error ArgumentError
20
+ end
21
+
22
+ it 'parses a CSV schema if it exists' do
23
+ result = LogStash::Outputs::BigQuery::Schema.parse_csv_or_json "name:STRING", nil
24
+ expect(result).to_not be_nil
25
+ end
26
+
27
+ it 'converts the resulting schema into a Java one' do
28
+ result = LogStash::Outputs::BigQuery::Schema.parse_csv_or_json "name:STRING", nil
29
+ expect(result.getClass().getName()).to eq('com.google.cloud.bigquery.Schema')
30
+ end
31
+ end
32
+
33
+ describe '#parse_csv_schema' do
34
+ it 'splits a CSV into name->type structures' do
35
+ expected = {fields: [keys_to_strs({name:'foo', type:'STRING'}), keys_to_strs({name:'bar', type:'FLOAT'})]}
36
+ result = LogStash::Outputs::BigQuery::Schema.parse_csv_schema "foo:STRING,bar:FLOAT"
37
+
38
+ expect(result).to eq(keys_to_strs(expected))
39
+ end
40
+
41
+ it 'fails on a malformed CSV' do
42
+ expect{LogStash::Outputs::BigQuery::Schema.parse_csv_schema "foo:bar:bazz"}.to raise_error ArgumentError
43
+ expect{LogStash::Outputs::BigQuery::Schema.parse_csv_schema "foo:bar,,bar:bazz"}.to raise_error ArgumentError
44
+ expect{LogStash::Outputs::BigQuery::Schema.parse_csv_schema "foo:bar,"}.to raise_error ArgumentError
45
+ end
46
+ end
47
+
48
+ describe '#hash_to_java_schema' do
49
+ subject{LogStash::Outputs::BigQuery::Schema.hash_to_java_schema(example_schema)}
50
+
51
+ it 'parses the field list from the fields key' do
52
+ expect(subject.getFields().size()).to eq(2)
53
+ end
54
+
55
+ it 'returns a BigQuery Schema object' do
56
+ expect(subject.getClass().getName()).to eq('com.google.cloud.bigquery.Schema')
57
+ end
58
+ end
59
+
60
+ describe '#parse_field_list' do
61
+ subject{LogStash::Outputs::BigQuery::Schema.parse_field_list(example_field_list)}
62
+
63
+ it 'returns a Java FieldList object' do
64
+ expect(subject.getClass().getName()).to eq('com.google.cloud.bigquery.FieldList')
65
+ end
66
+ end
67
+
68
+ describe '#parse_field' do
69
+ subject{LogStash::Outputs::BigQuery::Schema.parse_field(complex_field)}
70
+
71
+ it 'sets the correct name and type' do
72
+ expect(subject.getName()).to eq('params')
73
+ expect(subject.getType().toString()).to eq('RECORD')
74
+ end
75
+
76
+ it 'sets a description and mode if present' do
77
+ expect(subject.getDescription()).to eq('desc')
78
+ end
79
+
80
+ it 'sets sub-fields if present' do
81
+ expect(subject.getSubFields().size()).to eq(1)
82
+ end
83
+
84
+ it 'returns a Java Field object' do
85
+ expect(subject.getClass().getName()).to eq('com.google.cloud.bigquery.Field')
86
+ end
87
+ end
88
+
89
+ # converts tokens into strings recursively for a map.
90
+ def keys_to_strs(event)
91
+ return event unless event.is_a? Hash
92
+
93
+ out = {}
94
+
95
+ event.each do |key, value|
96
+ out[key.to_s] = keys_to_strs value
97
+ end
98
+
99
+ out
100
+ end
101
+ end
@@ -0,0 +1,154 @@
1
+ require "logstash/devutils/rspec/spec_helper"
2
+ require 'logstash/outputs/google_bigquery'
3
+ require 'logstash/outputs/bigquery/streamclient'
4
+
5
+ describe LogStash::Outputs::GoogleBigQuery do
6
+
7
+ let(:config) { { 'project_id' => 'project', 'dataset' => 'dataset', 'csv_schema' => 'path:STRING,status:INTEGER,score:FLOAT' } }
8
+ let(:sample_event) { LogStash::Event.new }
9
+ let(:bq_client) { double('streaming-client') }
10
+ let(:errors_file) { double('errors file') }
11
+
12
+ subject { LogStash::Outputs::GoogleBigQuery.new(config) }
13
+
14
+ before(:each) do
15
+ allow(LogStash::Outputs::BigQuery::StreamingClient).to receive(:new).and_return(bq_client)
16
+ expect(LogStash::Outputs::BigQuery::StreamingClient).to receive(:new)
17
+
18
+ allow(subject).to receive(:init_batcher_flush_thread).and_return(nil)
19
+ expect(subject).to receive(:init_batcher_flush_thread)
20
+
21
+ subject.register
22
+
23
+ end
24
+
25
+ describe '#get_table_name' do
26
+ it 'does not crash if no time is given' do
27
+ subject.get_table_name
28
+ end
29
+
30
+ it 'formats the table name correctly' do
31
+ table_id = subject.get_table_name Time.new(2012,9,8,7,6)
32
+ expect(table_id).to eq('logstash_2012_09_08T07_00')
33
+ end
34
+ end
35
+
36
+ describe '#replace_at_keys' do
37
+ it 'removes @ in keys' do
38
+ nested = {'@foo' => 'bar'}
39
+ expected = {foo: 'bar'}
40
+
41
+ out = subject.replace_at_keys nested
42
+
43
+ expect(out).to eq(keys_to_strs(expected))
44
+ end
45
+
46
+ it 'does not remove @ in values' do
47
+ nested = {foo: '@bar'}
48
+
49
+ out = subject.replace_at_keys nested
50
+
51
+ expect(out).to eq(keys_to_strs(nested))
52
+
53
+ end
54
+
55
+ it 'removes @ in nested keys' do
56
+ nested = {foo: {'@bar' => 'bazz'}}
57
+ expected = {foo: {bar: 'bazz'}}
58
+
59
+ out = subject.replace_at_keys nested
60
+
61
+ expect(out).to eq(keys_to_strs(expected))
62
+ end
63
+ end
64
+
65
+ describe '#publish' do
66
+ it 'does nothing if there are no messages' do
67
+ allow(subject).to receive(:create_table_if_not_exists).and_return(nil)
68
+
69
+ subject.publish nil
70
+ subject.publish []
71
+
72
+ expect(subject).not_to receive(:create_table_if_not_exists)
73
+ end
74
+
75
+ it 'creates a table if it does not exist' do
76
+ allow(subject).to receive(:create_table_if_not_exists).and_return(nil)
77
+ allow(bq_client).to receive(:append).and_return(true)
78
+ allow(subject).to receive(:write_to_errors_file).and_return(nil)
79
+ expect(subject).to receive(:create_table_if_not_exists)
80
+
81
+ subject.publish ['{"foo":"bar"}']
82
+ end
83
+
84
+ it 'writes rows to a file on failed insert' do
85
+ allow(subject).to receive(:create_table_if_not_exists).and_return(nil)
86
+ allow(bq_client).to receive(:append).and_return(false)
87
+ allow(subject).to receive(:write_to_errors_file).and_return(nil)
88
+ expect(subject).to receive(:write_to_errors_file)
89
+
90
+ subject.publish ['{"foo":"bar"}']
91
+ end
92
+
93
+ it 'writes rows to a file if insert threw an exception' do
94
+ allow(subject).to receive(:create_table_if_not_exists).and_return(nil)
95
+ allow(bq_client).to receive(:append).and_raise('expected insert error')
96
+ allow(subject).to receive(:write_to_errors_file).and_return(nil)
97
+ expect(subject).to receive(:write_to_errors_file)
98
+
99
+ subject.publish ['{"foo":"bar"}']
100
+ end
101
+ end
102
+
103
+ describe '#create_table_if_not_exists' do
104
+ it 'checks if a table exists' do
105
+ allow(bq_client).to receive(:table_exists?).and_return(true)
106
+ expect(bq_client).to receive(:table_exists?)
107
+
108
+ subject.create_table_if_not_exists 'foo'
109
+ end
110
+
111
+ it 'creates a table if it does not exist' do
112
+ allow(bq_client).to receive(:table_exists?).and_return(false)
113
+ allow(bq_client).to receive(:create_table).and_return(nil)
114
+ expect(bq_client).to receive(:table_exists?)
115
+ expect(bq_client).to receive(:create_table)
116
+
117
+ subject.create_table_if_not_exists 'foo'
118
+ end
119
+ end
120
+
121
+ describe '#write_to_errors_file' do
122
+ it 'creates missing directories' do
123
+ allow(File).to receive(:open).and_return(errors_file)
124
+ allow(FileUtils).to receive(:mkdir_p)
125
+ expect(FileUtils).to receive(:mkdir_p)
126
+
127
+ subject.write_to_errors_file(['a','b'], 'table_name')
128
+ end
129
+
130
+ it 'does not fail on exception' do
131
+ allow(FileUtils).to receive(:mkdir_p).and_raise("exception creating directories")
132
+ expect{subject.write_to_errors_file([], 'table_name')}.to_not raise_error
133
+ end
134
+ end
135
+
136
+
137
+ # converts tokens into strings recursively for a map.
138
+ def keys_to_strs(event)
139
+ return event unless event.is_a? Hash
140
+
141
+ out = {}
142
+
143
+ event.each do |key, value|
144
+ out[key.to_s] = keys_to_strs value
145
+ end
146
+
147
+ out
148
+ end
149
+
150
+ RSpec::Matchers.define :starts_with do |x|
151
+ match { |actual| actual.start_with? x}
152
+ end
153
+
154
+ end