logstash-output-google_bigquery 4.0.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +71 -0
  3. data/CONTRIBUTORS +15 -0
  4. data/Gemfile +11 -0
  5. data/LICENSE +13 -0
  6. data/NOTICE.TXT +5 -0
  7. data/README.md +100 -0
  8. data/docs/index.asciidoc +348 -0
  9. data/lib/logstash-output-google_bigquery_jars.rb +38 -0
  10. data/lib/logstash/outputs/bigquery/batcher.rb +82 -0
  11. data/lib/logstash/outputs/bigquery/schema.rb +93 -0
  12. data/lib/logstash/outputs/bigquery/streamclient.rb +120 -0
  13. data/lib/logstash/outputs/google_bigquery.rb +280 -0
  14. data/logstash-output-google_bigquery.gemspec +31 -0
  15. data/spec/outputs/bigquery/batcher_spec.rb +110 -0
  16. data/spec/outputs/bigquery/schema_spec.rb +101 -0
  17. data/spec/outputs/google_bigquery_spec.rb +154 -0
  18. data/vendor/jar-dependencies/com/fasterxml/jackson/core/jackson-core/2.1.3/jackson-core-2.1.3.jar +0 -0
  19. data/vendor/jar-dependencies/com/google/api-client/google-api-client/1.23.0/google-api-client-1.23.0.jar +0 -0
  20. data/vendor/jar-dependencies/com/google/api/api-common/1.5.0/api-common-1.5.0.jar +0 -0
  21. data/vendor/jar-dependencies/com/google/api/gax-httpjson/0.40.0/gax-httpjson-0.40.0.jar +0 -0
  22. data/vendor/jar-dependencies/com/google/api/gax/1.23.0/gax-1.23.0.jar +0 -0
  23. data/vendor/jar-dependencies/com/google/api/grpc/proto-google-common-protos/1.7.0/proto-google-common-protos-1.7.0.jar +0 -0
  24. data/vendor/jar-dependencies/com/google/api/grpc/proto-google-iam-v1/0.8.0/proto-google-iam-v1-0.8.0.jar +0 -0
  25. data/vendor/jar-dependencies/com/google/apis/google-api-services-bigquery/v2-rev377-1.23.0/google-api-services-bigquery-v2-rev377-1.23.0.jar +0 -0
  26. data/vendor/jar-dependencies/com/google/auth/google-auth-library-credentials/0.9.0/google-auth-library-credentials-0.9.0.jar +0 -0
  27. data/vendor/jar-dependencies/com/google/auth/google-auth-library-oauth2-http/0.9.0/google-auth-library-oauth2-http-0.9.0.jar +0 -0
  28. data/vendor/jar-dependencies/com/google/auto/value/auto-value/1.4/auto-value-1.4.jar +0 -0
  29. data/vendor/jar-dependencies/com/google/cloud/google-cloud-bigquery/1.24.1/google-cloud-bigquery-1.24.1.jar +0 -0
  30. data/vendor/jar-dependencies/com/google/cloud/google-cloud-core-http/1.24.1/google-cloud-core-http-1.24.1.jar +0 -0
  31. data/vendor/jar-dependencies/com/google/cloud/google-cloud-core/1.24.1/google-cloud-core-1.24.1.jar +0 -0
  32. data/vendor/jar-dependencies/com/google/code/findbugs/jsr305/3.0.1/jsr305-3.0.1.jar +0 -0
  33. data/vendor/jar-dependencies/com/google/code/gson/gson/2.7/gson-2.7.jar +0 -0
  34. data/vendor/jar-dependencies/com/google/errorprone/error_prone_annotations/2.2.0/error_prone_annotations-2.2.0.jar +0 -0
  35. data/vendor/jar-dependencies/com/google/guava/guava/20.0/guava-20.0.jar +0 -0
  36. data/vendor/jar-dependencies/com/google/http-client/google-http-client-appengine/1.23.0/google-http-client-appengine-1.23.0.jar +0 -0
  37. data/vendor/jar-dependencies/com/google/http-client/google-http-client-jackson/1.23.0/google-http-client-jackson-1.23.0.jar +0 -0
  38. data/vendor/jar-dependencies/com/google/http-client/google-http-client-jackson2/1.23.0/google-http-client-jackson2-1.23.0.jar +0 -0
  39. data/vendor/jar-dependencies/com/google/http-client/google-http-client/1.23.0/google-http-client-1.23.0.jar +0 -0
  40. data/vendor/jar-dependencies/com/google/oauth-client/google-oauth-client/1.23.0/google-oauth-client-1.23.0.jar +0 -0
  41. data/vendor/jar-dependencies/com/google/protobuf/protobuf-java-util/3.5.1/protobuf-java-util-3.5.1.jar +0 -0
  42. data/vendor/jar-dependencies/com/google/protobuf/protobuf-java/3.5.1/protobuf-java-3.5.1.jar +0 -0
  43. data/vendor/jar-dependencies/commons-codec/commons-codec/1.9/commons-codec-1.9.jar +0 -0
  44. data/vendor/jar-dependencies/commons-logging/commons-logging/1.2/commons-logging-1.2.jar +0 -0
  45. data/vendor/jar-dependencies/io/grpc/grpc-context/1.9.0/grpc-context-1.9.0.jar +0 -0
  46. data/vendor/jar-dependencies/io/opencensus/opencensus-api/0.11.1/opencensus-api-0.11.1.jar +0 -0
  47. data/vendor/jar-dependencies/io/opencensus/opencensus-contrib-http-util/0.11.1/opencensus-contrib-http-util-0.11.1.jar +0 -0
  48. data/vendor/jar-dependencies/joda-time/joda-time/2.9.2/joda-time-2.9.2.jar +0 -0
  49. data/vendor/jar-dependencies/org/apache/httpcomponents/httpclient/4.5.2/httpclient-4.5.2.jar +0 -0
  50. data/vendor/jar-dependencies/org/apache/httpcomponents/httpcore/4.4.4/httpcore-4.4.4.jar +0 -0
  51. data/vendor/jar-dependencies/org/codehaus/jackson/jackson-core-asl/1.9.11/jackson-core-asl-1.9.11.jar +0 -0
  52. data/vendor/jar-dependencies/org/threeten/threetenbp/1.3.3/threetenbp-1.3.3.jar +0 -0
  53. metadata +178 -0
@@ -0,0 +1,31 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-output-google_bigquery'
3
+ s.version = '4.0.0'
4
+ s.licenses = ['Apache License (2.0)']
5
+ s.summary = "Writes events to Google BigQuery"
6
+ s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
7
+ s.authors = ["Elastic"]
8
+ s.email = 'info@elastic.co'
9
+ s.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html"
10
+ s.require_paths = ["lib", "vendor/jar-dependencies"]
11
+
12
+ # Files
13
+ s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"]
14
+
15
+ # Tests
16
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
17
+
18
+ # Special flag to let us know this is actually a logstash plugin
19
+ s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
20
+
21
+ # Gem dependencies
22
+ s.add_runtime_dependency 'logstash-codec-plain'
23
+ s.add_runtime_dependency 'mime-types', '~> 2' # last version compatible with ruby 2.x
24
+ s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
25
+
26
+ s.add_development_dependency 'logstash-devutils'
27
+ s.add_development_dependency 'jar-dependencies', '~> 0.3.4'
28
+
29
+ s.platform = 'java'
30
+ end
31
+
@@ -0,0 +1,110 @@
1
+ require 'logstash/outputs/bigquery/batcher'
2
+ require 'thread'
3
+
4
+ describe LogStash::Outputs::BigQuery::Batcher do
5
+ let(:logger) { spy(:logger) }
6
+ let(:batcher) { LogStash::Outputs::BigQuery::Batcher.new(2, 1_000) }
7
+ let(:one_b_message) { 'a' }
8
+ let(:one_k_message) { 'a' * 1000 }
9
+ let(:batch_queue) { Queue.new }
10
+
11
+ describe '#enqueue' do
12
+ it 'returns nil if no reason to flush' do
13
+ batch = batcher.enqueue one_b_message
14
+
15
+ expect(batch).to be_nil
16
+ end
17
+
18
+ it 'returns a batch if passed nil' do
19
+ batch = batcher.enqueue nil
20
+
21
+ expect(batch).to_not be_nil
22
+ end
23
+
24
+ it 'returns a batch if the message count overflows' do
25
+ batch = batcher.enqueue one_b_message
26
+ expect(batch).to be_nil
27
+
28
+ batch = batcher.enqueue one_b_message
29
+ expect(batch).to_not be_nil
30
+ end
31
+
32
+ it 'returns a batch if the message size overflows' do
33
+ batch = batcher.enqueue one_b_message
34
+ expect(batch).to be_nil
35
+
36
+ batch = batcher.enqueue one_k_message
37
+ expect(batch).to_not be_nil
38
+ end
39
+
40
+ it 'clears internal state after returning a batch' do
41
+ batch = batcher.enqueue one_k_message
42
+
43
+ expect(batch).to_not be_nil
44
+ expect(batcher.empty?).to be_truthy
45
+ end
46
+
47
+ it 'does not yield a batch if there is no reason to flush' do
48
+ batch = nil
49
+ batcher.enqueue(one_b_message) { |b| batch = b }
50
+
51
+ expect(batch).to be_nil
52
+ end
53
+
54
+ it 'yields a batch on flush' do
55
+ batch = nil
56
+ batcher.enqueue(nil) { |b| batch = b }
57
+
58
+ expect(batch).to_not be_nil
59
+ expect(batch.length).to eq 0
60
+ end
61
+
62
+ it 'yields a batch on overflow' do
63
+ batch = nil
64
+ batcher.enqueue(one_k_message) { |b| batch = b }
65
+
66
+ expect(batch).to_not be_nil
67
+ expect(batch.length).to eq 1
68
+ end
69
+ end
70
+
71
+ describe '#enqueue_push' do
72
+ it 'enqueues nothing nil if no reason to flush' do
73
+ batcher.enqueue_push one_b_message, batch_queue
74
+
75
+ expect(batch_queue.length).to eq 0
76
+ end
77
+
78
+ it 'enqueues a batch if passed nil' do
79
+ batcher.enqueue_push nil, batch_queue
80
+
81
+ expect(batch_queue.length).to eq 1
82
+ end
83
+
84
+ it 'enqueues a batch if the message count overflows' do
85
+ batcher.enqueue_push one_b_message, batch_queue
86
+ expect(batch_queue.length).to eq 0
87
+
88
+ batcher.enqueue_push one_b_message, batch_queue
89
+ expect(batch_queue.length).to eq 1
90
+ end
91
+
92
+ it 'enqueues a batch if the message size overflows' do
93
+ batcher.enqueue_push one_b_message, batch_queue
94
+ expect(batch_queue.length).to eq 0
95
+
96
+ batcher.enqueue_push one_k_message, batch_queue
97
+ expect(batch_queue.length).to eq 1
98
+ end
99
+ end
100
+
101
+ describe '#clear' do
102
+ it 'removes any existing messages' do
103
+ batcher.enqueue one_b_message
104
+ expect(batcher.empty?).to be_falsey
105
+
106
+ batcher.clear
107
+ expect(batcher.empty?).to be_truthy
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,101 @@
1
+ require 'logstash/devutils/rspec/spec_helper'
2
+ require 'logstash/outputs/bigquery/schema'
3
+ require 'thread'
4
+ require 'java'
5
+
6
+ describe LogStash::Outputs::BigQuery::Schema do
7
+
8
+ let(:simple_field) {keys_to_strs({name: 'foo', type: 'STRING'})}
9
+ let(:complex_field) {keys_to_strs({name: 'params', type: 'RECORD', mode: 'REPEATED', description: 'desc', fields: [simple_field]})}
10
+ let(:example_field_list) {[simple_field, complex_field]}
11
+ let(:example_schema) {keys_to_strs({fields:example_field_list})}
12
+
13
+ describe '#parse_csv_or_json' do
14
+ it 'ensures CSV and JSON are not both nil' do
15
+ expect{LogStash::Outputs::BigQuery::Schema.parse_csv_or_json nil, nil}.to raise_error ArgumentError
16
+ end
17
+
18
+ it 'ensures CSV and JSON are not both defined' do
19
+ expect{LogStash::Outputs::BigQuery::Schema.parse_csv_or_json "", {}}.to raise_error ArgumentError
20
+ end
21
+
22
+ it 'parses a CSV schema if it exists' do
23
+ result = LogStash::Outputs::BigQuery::Schema.parse_csv_or_json "name:STRING", nil
24
+ expect(result).to_not be_nil
25
+ end
26
+
27
+ it 'converts the resulting schema into a Java one' do
28
+ result = LogStash::Outputs::BigQuery::Schema.parse_csv_or_json "name:STRING", nil
29
+ expect(result.getClass().getName()).to eq('com.google.cloud.bigquery.Schema')
30
+ end
31
+ end
32
+
33
+ describe '#parse_csv_schema' do
34
+ it 'splits a CSV into name->type structures' do
35
+ expected = {fields: [keys_to_strs({name:'foo', type:'STRING'}), keys_to_strs({name:'bar', type:'FLOAT'})]}
36
+ result = LogStash::Outputs::BigQuery::Schema.parse_csv_schema "foo:STRING,bar:FLOAT"
37
+
38
+ expect(result).to eq(keys_to_strs(expected))
39
+ end
40
+
41
+ it 'fails on a malformed CSV' do
42
+ expect{LogStash::Outputs::BigQuery::Schema.parse_csv_schema "foo:bar:bazz"}.to raise_error ArgumentError
43
+ expect{LogStash::Outputs::BigQuery::Schema.parse_csv_schema "foo:bar,,bar:bazz"}.to raise_error ArgumentError
44
+ expect{LogStash::Outputs::BigQuery::Schema.parse_csv_schema "foo:bar,"}.to raise_error ArgumentError
45
+ end
46
+ end
47
+
48
+ describe '#hash_to_java_schema' do
49
+ subject{LogStash::Outputs::BigQuery::Schema.hash_to_java_schema(example_schema)}
50
+
51
+ it 'parses the field list from the fields key' do
52
+ expect(subject.getFields().size()).to eq(2)
53
+ end
54
+
55
+ it 'returns a BigQuery Schema object' do
56
+ expect(subject.getClass().getName()).to eq('com.google.cloud.bigquery.Schema')
57
+ end
58
+ end
59
+
60
+ describe '#parse_field_list' do
61
+ subject{LogStash::Outputs::BigQuery::Schema.parse_field_list(example_field_list)}
62
+
63
+ it 'returns a Java FieldList object' do
64
+ expect(subject.getClass().getName()).to eq('com.google.cloud.bigquery.FieldList')
65
+ end
66
+ end
67
+
68
+ describe '#parse_field' do
69
+ subject{LogStash::Outputs::BigQuery::Schema.parse_field(complex_field)}
70
+
71
+ it 'sets the correct name and type' do
72
+ expect(subject.getName()).to eq('params')
73
+ expect(subject.getType().toString()).to eq('RECORD')
74
+ end
75
+
76
+ it 'sets a description and mode if present' do
77
+ expect(subject.getDescription()).to eq('desc')
78
+ end
79
+
80
+ it 'sets sub-fields if present' do
81
+ expect(subject.getSubFields().size()).to eq(1)
82
+ end
83
+
84
+ it 'returns a Java Field object' do
85
+ expect(subject.getClass().getName()).to eq('com.google.cloud.bigquery.Field')
86
+ end
87
+ end
88
+
89
+ # converts tokens into strings recursively for a map.
90
+ def keys_to_strs(event)
91
+ return event unless event.is_a? Hash
92
+
93
+ out = {}
94
+
95
+ event.each do |key, value|
96
+ out[key.to_s] = keys_to_strs value
97
+ end
98
+
99
+ out
100
+ end
101
+ end
@@ -0,0 +1,154 @@
1
+ require "logstash/devutils/rspec/spec_helper"
2
+ require 'logstash/outputs/google_bigquery'
3
+ require 'logstash/outputs/bigquery/streamclient'
4
+
5
+ describe LogStash::Outputs::GoogleBigQuery do
6
+
7
+ let(:config) { { 'project_id' => 'project', 'dataset' => 'dataset', 'csv_schema' => 'path:STRING,status:INTEGER,score:FLOAT' } }
8
+ let(:sample_event) { LogStash::Event.new }
9
+ let(:bq_client) { double('streaming-client') }
10
+ let(:errors_file) { double('errors file') }
11
+
12
+ subject { LogStash::Outputs::GoogleBigQuery.new(config) }
13
+
14
+ before(:each) do
15
+ allow(LogStash::Outputs::BigQuery::StreamingClient).to receive(:new).and_return(bq_client)
16
+ expect(LogStash::Outputs::BigQuery::StreamingClient).to receive(:new)
17
+
18
+ allow(subject).to receive(:init_batcher_flush_thread).and_return(nil)
19
+ expect(subject).to receive(:init_batcher_flush_thread)
20
+
21
+ subject.register
22
+
23
+ end
24
+
25
+ describe '#get_table_name' do
26
+ it 'does not crash if no time is given' do
27
+ subject.get_table_name
28
+ end
29
+
30
+ it 'formats the table name correctly' do
31
+ table_id = subject.get_table_name Time.new(2012,9,8,7,6)
32
+ expect(table_id).to eq('logstash_2012_09_08T07_00')
33
+ end
34
+ end
35
+
36
+ describe '#replace_at_keys' do
37
+ it 'removes @ in keys' do
38
+ nested = {'@foo' => 'bar'}
39
+ expected = {foo: 'bar'}
40
+
41
+ out = subject.replace_at_keys nested
42
+
43
+ expect(out).to eq(keys_to_strs(expected))
44
+ end
45
+
46
+ it 'does not remove @ in values' do
47
+ nested = {foo: '@bar'}
48
+
49
+ out = subject.replace_at_keys nested
50
+
51
+ expect(out).to eq(keys_to_strs(nested))
52
+
53
+ end
54
+
55
+ it 'removes @ in nested keys' do
56
+ nested = {foo: {'@bar' => 'bazz'}}
57
+ expected = {foo: {bar: 'bazz'}}
58
+
59
+ out = subject.replace_at_keys nested
60
+
61
+ expect(out).to eq(keys_to_strs(expected))
62
+ end
63
+ end
64
+
65
+ describe '#publish' do
66
+ it 'does nothing if there are no messages' do
67
+ allow(subject).to receive(:create_table_if_not_exists).and_return(nil)
68
+
69
+ subject.publish nil
70
+ subject.publish []
71
+
72
+ expect(subject).not_to receive(:create_table_if_not_exists)
73
+ end
74
+
75
+ it 'creates a table if it does not exist' do
76
+ allow(subject).to receive(:create_table_if_not_exists).and_return(nil)
77
+ allow(bq_client).to receive(:append).and_return(true)
78
+ allow(subject).to receive(:write_to_errors_file).and_return(nil)
79
+ expect(subject).to receive(:create_table_if_not_exists)
80
+
81
+ subject.publish ['{"foo":"bar"}']
82
+ end
83
+
84
+ it 'writes rows to a file on failed insert' do
85
+ allow(subject).to receive(:create_table_if_not_exists).and_return(nil)
86
+ allow(bq_client).to receive(:append).and_return(false)
87
+ allow(subject).to receive(:write_to_errors_file).and_return(nil)
88
+ expect(subject).to receive(:write_to_errors_file)
89
+
90
+ subject.publish ['{"foo":"bar"}']
91
+ end
92
+
93
+ it 'writes rows to a file if insert threw an exception' do
94
+ allow(subject).to receive(:create_table_if_not_exists).and_return(nil)
95
+ allow(bq_client).to receive(:append).and_raise('expected insert error')
96
+ allow(subject).to receive(:write_to_errors_file).and_return(nil)
97
+ expect(subject).to receive(:write_to_errors_file)
98
+
99
+ subject.publish ['{"foo":"bar"}']
100
+ end
101
+ end
102
+
103
+ describe '#create_table_if_not_exists' do
104
+ it 'checks if a table exists' do
105
+ allow(bq_client).to receive(:table_exists?).and_return(true)
106
+ expect(bq_client).to receive(:table_exists?)
107
+
108
+ subject.create_table_if_not_exists 'foo'
109
+ end
110
+
111
+ it 'creates a table if it does not exist' do
112
+ allow(bq_client).to receive(:table_exists?).and_return(false)
113
+ allow(bq_client).to receive(:create_table).and_return(nil)
114
+ expect(bq_client).to receive(:table_exists?)
115
+ expect(bq_client).to receive(:create_table)
116
+
117
+ subject.create_table_if_not_exists 'foo'
118
+ end
119
+ end
120
+
121
+ describe '#write_to_errors_file' do
122
+ it 'creates missing directories' do
123
+ allow(File).to receive(:open).and_return(errors_file)
124
+ allow(FileUtils).to receive(:mkdir_p)
125
+ expect(FileUtils).to receive(:mkdir_p)
126
+
127
+ subject.write_to_errors_file(['a','b'], 'table_name')
128
+ end
129
+
130
+ it 'does not fail on exception' do
131
+ allow(FileUtils).to receive(:mkdir_p).and_raise("exception creating directories")
132
+ expect{subject.write_to_errors_file([], 'table_name')}.to_not raise_error
133
+ end
134
+ end
135
+
136
+
137
+ # converts tokens into strings recursively for a map.
138
+ def keys_to_strs(event)
139
+ return event unless event.is_a? Hash
140
+
141
+ out = {}
142
+
143
+ event.each do |key, value|
144
+ out[key.to_s] = keys_to_strs value
145
+ end
146
+
147
+ out
148
+ end
149
+
150
+ RSpec::Matchers.define :starts_with do |x|
151
+ match { |actual| actual.start_with? x}
152
+ end
153
+
154
+ end