rspec-hive 0.4.2 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +39 -5
  4. data/.rubocop_todo.yml +40 -0
  5. data/.ruby-version +1 -1
  6. data/.travis.yml +4 -3
  7. data/Gemfile +18 -0
  8. data/Guardfile +10 -6
  9. data/README.md +15 -0
  10. data/Rakefile +4 -2
  11. data/docker/Dockerfile +38 -47
  12. data/docker/hive-site.xml +28 -0
  13. data/examples/lib/query.rb +2 -0
  14. data/examples/rspec-hive.yml.example +7 -1
  15. data/examples/spec/query_spec.rb +2 -0
  16. data/examples/spec/spec_helper.rb +2 -0
  17. data/lib/rspec/hive.rb +2 -0
  18. data/lib/rspec/hive/configuration.rb +14 -33
  19. data/lib/rspec/hive/connection_delegator.rb +19 -13
  20. data/lib/rspec/hive/connector.rb +3 -2
  21. data/lib/rspec/hive/db_name.rb +2 -0
  22. data/lib/rspec/hive/exponential_backoff.rb +2 -0
  23. data/lib/rspec/hive/matchers.rb +2 -0
  24. data/lib/rspec/hive/query_builder.rb +2 -0
  25. data/lib/rspec/hive/query_builder/null_strategy.rb +2 -0
  26. data/lib/rspec/hive/query_builder/row_transformer.rb +4 -2
  27. data/lib/rspec/hive/query_builder/type_faker.rb +2 -0
  28. data/lib/rspec/hive/query_builder/value_by_type_strategy.rb +2 -0
  29. data/lib/rspec/hive/query_builder_helper.rb +2 -0
  30. data/lib/rspec/hive/railtie.rb +2 -0
  31. data/lib/rspec/hive/rake_tasks/docker.rake +27 -14
  32. data/lib/rspec/hive/version.rb +3 -1
  33. data/lib/rspec/hive/with_hive_connection.rb +2 -0
  34. data/rspec-hive.gemspec +7 -16
  35. data/spec/lib/rspec/hive/configuration_spec.rb +29 -33
  36. data/spec/lib/rspec/hive/connection_delegator_spec.rb +58 -138
  37. data/spec/lib/rspec/hive/connector_spec.rb +33 -38
  38. data/spec/lib/rspec/hive/db_name_spec.rb +4 -2
  39. data/spec/lib/rspec/hive/matchers_spec.rb +2 -0
  40. data/spec/lib/rspec/hive/query_builder/row_transformer_spec.rb +6 -4
  41. data/spec/lib/rspec/hive/query_builder_helper_spec.rb +8 -6
  42. data/spec/lib/rspec/hive/query_builder_spec.rb +15 -17
  43. data/spec/lib/rspec/hive_spec.rb +15 -22
  44. metadata +13 -154
  45. data/.rubocop_u2i.yml +0 -63
  46. data/spec/.rubocop.yml +0 -4
@@ -1,6 +1,18 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  RSpec.describe RSpec::Hive::ConnectionDelegator do
6
+ let(:day_column) { instance_double(RBHive::TableSchema::Column, name: :day, type: :string) }
7
+ let(:dth_column) { instance_double(RBHive::TableSchema::Column, name: :dth, type: :int) }
8
+ let(:hm_column) { instance_double(RBHive::TableSchema::Column, name: :hm, type: :int) }
9
+ let(:country_column) { instance_double(RBHive::TableSchema::Column, name: :country, type: :string) }
10
+ let(:table_name) { 'test_table' }
11
+ let(:table_schema) { instance_double(RBHive::TableSchema, name: table_name) }
12
+ let(:connection_delegator) { described_class.new(connection, config) }
13
+ let(:connection) { double('Connection') }
14
+ let(:config) { double('Config') }
15
+
4
16
  describe '#load_into_table' do
5
17
  let(:host_shared_directory_path) { '/tmp/host' }
6
18
  let(:docker_file_path) { '/tmp/docked/test_file' }
@@ -11,9 +23,6 @@ RSpec.describe RSpec::Hive::ConnectionDelegator do
11
23
  )
12
24
  end
13
25
  let(:delimiter) { "\t" }
14
- let(:table_name) { 'test_table' }
15
- let(:table_schema) { instance_double(RBHive::TableSchema, name: table_name) }
16
- let(:connection) { double('Connection') }
17
26
  let(:file_mock) { double(Tempfile) }
18
27
 
19
28
  let(:values) { ['a', 'b', 1] }
@@ -24,54 +33,49 @@ RSpec.describe RSpec::Hive::ConnectionDelegator do
24
33
  expect(Tempfile).to receive(:open).
25
34
  with(table_name, host_shared_directory_path).and_yield(file_mock)
26
35
 
27
- expect(subject).to receive(:docker_path).
36
+ expect(connection_delegator).to receive(:docker_path).
28
37
  with(file_mock) { docker_file_path }
29
38
 
30
- expect(subject).to receive(:write_values_to_file).
39
+ expect(connection_delegator).to receive(:write_values_to_file).
31
40
  with(file_mock, values, "\t").once
32
41
  end
33
42
 
34
43
  context 'without partitions' do
35
44
  before do
36
- expect(subject).to receive(:load_file_to_hive_table).
45
+ expect(connection_delegator).to receive(:load_file_to_hive_table).
37
46
  with(table_name, docker_file_path, nil).once
38
47
 
39
- expect(subject).not_to receive(:partition_clause)
48
+ expect(connection_delegator).not_to receive(:partition_clause)
40
49
  end
41
50
 
42
- subject { described_class.new(connection, config) }
43
-
44
- it do
45
- subject.load_into_table(table_schema, values)
46
- end
51
+ it { connection_delegator.load_into_table(table_schema, values) }
47
52
  end
48
53
 
49
54
  context 'with partitions' do
50
55
  let(:partitions) { {day: '20160101', hm: '2020'} }
56
+ let(:table_schema) { instance_double(RBHive::TableSchema, name: table_name, partitions: [day_column, hm_column]) }
57
+
51
58
  let(:partition_query) { "PARTITION(day='20160101',hm='2020')" }
59
+
52
60
  before do
53
- expect(subject).to receive(:load_file_to_hive_table).
61
+ expect(connection_delegator).to receive(:load_file_to_hive_table).
54
62
  with(table_name, docker_file_path, partition_query).once
55
- expect(subject).to receive(:partition_clause).
56
- with(partitions) { partition_query }
63
+ expect(connection_delegator).to receive(:partition_clause).
64
+ with(table_schema, partitions) { partition_query }
57
65
  end
58
66
 
59
- subject { described_class.new(connection, config) }
60
-
61
- it do
62
- subject.load_into_table(table_schema, values, partitions)
63
- end
67
+ it { connection_delegator.load_into_table(table_schema, values, partitions) }
64
68
  end
65
69
  end
66
70
 
67
71
  describe '#load_partition' do
68
- let(:config) { double('Config') }
69
- let(:connection) { double('Connection') }
70
-
71
- let(:table_name) { 'test_table' }
72
72
  let(:partitions) do
73
73
  [{dth: 'mon', country: 'us'}, {dth: 'tue', country: 'us'}]
74
74
  end
75
+ let(:table_schema) do
76
+ instance_double(RBHive::TableSchema, name: table_name, partitions: [day_column, hm_column, country_column])
77
+ end
78
+
75
79
  let(:partition_query) do
76
80
  "PARTITION(dth='mon',country='us') PARTITION(dth='tue',country='us')"
77
81
  end
@@ -81,43 +85,34 @@ RSpec.describe RSpec::Hive::ConnectionDelegator do
81
85
  end
82
86
 
83
87
  before do
84
- expect(subject).to receive(:partition_clause).
85
- with(partitions) { partition_query }
88
+ expect(connection_delegator).to receive(:partition_clause).with(table_schema, partitions) { partition_query }
86
89
  expect(connection).to receive(:execute).with(executed_query)
87
90
  end
88
91
 
89
- subject { described_class.new(connection, config) }
90
-
91
- it do
92
- subject.load_partitions(table_name, partitions)
93
- end
92
+ it { connection_delegator.load_partitions(table_schema, partitions) }
94
93
  end
95
94
 
96
95
  describe '#partition_clause' do
97
- let(:config) { double('Config') }
98
- let(:connection) { double('Connection') }
99
-
100
96
  context 'with single partition' do
101
- let(:partitions) { {day: '20160101', hm: '2020'} }
102
- let(:partition_query) { "PARTITION(day='20160101',hm='2020')" }
103
-
104
- subject { described_class.new(connection, config) }
97
+ let(:partitions) { {day: 'tue', dth: '20160101'} }
98
+ let(:table_schema) { instance_double(RBHive::TableSchema, partitions: [day_column, dth_column]) }
99
+ let(:expected_partition_query) { "PARTITION(day='tue',dth=20160101)" }
105
100
 
106
101
  it 'translates partition hash to single query' do
107
- expect(subject.send(:partition_clause, partitions)).to eq(partition_query)
102
+ expect(connection_delegator.send(:partition_clause, table_schema, partitions)).to eq(expected_partition_query)
108
103
  end
109
104
  end
110
105
 
111
106
  context 'with multiple partitions' do
112
107
  let(:partitions) { [{day: 'mon', hm: '2020'}, {day: 'tue', hm: '2020'}, {day: 'mon', hm: '2030'}] }
108
+ let(:table_schema) { instance_double(RBHive::TableSchema, partitions: [day_column, hm_column]) }
109
+
113
110
  let(:partition_query) do
114
- "PARTITION(day='mon',hm='2020') PARTITION(day='tue',hm='2020') PARTITION(day='mon',hm='2030')"
111
+ "PARTITION(day='mon',hm=2020) PARTITION(day='tue',hm=2020) PARTITION(day='mon',hm=2030)"
115
112
  end
116
113
 
117
- subject { described_class.new(connection, config) }
118
-
119
114
  it 'translates partition hash to combined query' do
120
- expect(subject.send(:partition_clause, partitions)).to eq(partition_query)
115
+ expect(connection_delegator.send(:partition_clause, table_schema, partitions)).to eq(partition_query)
121
116
  end
122
117
  end
123
118
  end
@@ -128,37 +123,23 @@ RSpec.describe RSpec::Hive::ConnectionDelegator do
128
123
  [['a', 'b', 1],
129
124
  ['aa', 'bb', 22]]
130
125
  end
131
- let(:connection) { double('Connection') }
132
- let(:config) { double('Config') }
133
126
  let(:delimiter) { '|' }
134
127
  let(:expected_file_content) { "a|b|1\naa|bb|22\n" }
135
128
 
136
- subject { described_class.new(connection, config) }
137
129
  it 'writes values to file in correct format' do
138
- subject.send(:write_values_to_file, file, values, delimiter)
130
+ connection_delegator.send(:write_values_to_file, file, values, delimiter)
139
131
  file.rewind
140
132
  expect(file.read).to eq(expected_file_content)
141
133
  end
142
134
  end
143
135
 
144
136
  describe '#load_file_to_hive_table' do
145
- let(:connection) { double('Connection') }
146
- let(:config) { double('Config') }
147
- let(:table_name) { 'test_table' }
148
137
  let(:file_path) { '/tmp/test' }
149
- let(:execute_text) do
150
- "load data local inpath '/tmp/test' into table test_table"
151
- end
138
+ let(:execute_text) { "load data local inpath '/tmp/test' into table test_table" }
152
139
 
153
- before do
154
- expect(connection).to receive(:execute).with(execute_text)
155
- end
156
-
157
- subject { described_class.new(connection, config) }
140
+ before { expect(connection).to receive(:execute).with(execute_text) }
158
141
 
159
- it do
160
- subject.send(:load_file_to_hive_table, table_name, file_path)
161
- end
142
+ it { connection_delegator.send(:load_file_to_hive_table, table_name, file_path) }
162
143
  end
163
144
 
164
145
  describe '#translate_to_docker_path' do
@@ -167,7 +148,6 @@ RSpec.describe RSpec::Hive::ConnectionDelegator do
167
148
  let(:file_host_path) { '/tmp/host/testfile' }
168
149
  let(:expected_file_path) { '/tmp/docker/testfile' }
169
150
 
170
- let(:connection) { double('Connection') }
171
151
  let(:docker_shared_directory_path) { '/tmp/docker' }
172
152
  let(:config) do
173
153
  double(
@@ -176,54 +156,29 @@ RSpec.describe RSpec::Hive::ConnectionDelegator do
176
156
  )
177
157
  end
178
158
 
179
- before do
180
- expect(file_mock).to receive(:path) { file_host_path }
181
- end
182
-
183
- subject { described_class.new(connection, config) }
159
+ before { expect(file_mock).to receive(:path) { file_host_path } }
184
160
 
185
- it do
186
- expect(subject.send(:docker_path, file_mock)).
187
- to eq(expected_file_path)
188
- end
161
+ it { expect(connection_delegator.send(:docker_path, file_mock)).to eq(expected_file_path) }
189
162
  end
190
163
 
191
164
  describe '#show_tables' do
192
- let(:connection) { double('Connection') }
193
- let(:config) { double('Config') }
194
165
  let(:fetch_text) { 'SHOW TABLES' }
195
166
 
196
- before do
197
- expect(connection).to receive(:fetch).with(fetch_text)
198
- end
199
-
200
- subject { described_class.new(connection, config) }
167
+ before { expect(connection).to receive(:fetch).with(fetch_text) }
201
168
 
202
- it do
203
- subject.show_tables
204
- end
169
+ it { connection_delegator.show_tables }
205
170
  end
206
171
 
207
172
  describe '#create_database' do
208
- let(:connection) { double('Connection') }
209
- let(:config) { double('Config') }
210
173
  let(:db_name) { 'test' }
211
174
  let(:fetch_text) { 'CREATE DATABASE IF NOT EXISTS `test`' }
212
175
 
213
- before do
214
- expect(connection).to receive(:execute).with(fetch_text)
215
- end
176
+ before { expect(connection).to receive(:execute).with(fetch_text) }
216
177
 
217
- subject { described_class.new(connection, config) }
218
-
219
- it do
220
- subject.create_database(db_name)
221
- end
178
+ it { connection_delegator.create_database(db_name) }
222
179
  end
223
180
 
224
181
  describe '#create_table' do
225
- let(:connection) { double('Connection') }
226
- let(:config) { double('Config') }
227
182
  let(:table_schema) { double('Table_schema') }
228
183
  let(:table_statement) { 'I AM TABLE STATEMENT' }
229
184
 
@@ -234,78 +189,43 @@ RSpec.describe RSpec::Hive::ConnectionDelegator do
234
189
  expect(connection).to receive(:execute).with(table_statement)
235
190
  end
236
191
 
237
- subject { described_class.new(connection, config) }
238
-
239
- it do
240
- subject.create_table(table_schema)
241
- end
192
+ it { connection_delegator.create_table(table_schema) }
242
193
  end
243
194
 
244
195
  describe '#use databaes' do
245
- let(:connection) { double('Connection') }
246
- let(:config) { double('Config') }
247
196
  let(:db_name) { 'test' }
248
197
  let(:fetch_text) { 'USE `test`' }
249
198
 
250
- before do
251
- expect(connection).to receive(:execute).with(fetch_text)
252
- end
253
-
254
- subject { described_class.new(connection, config) }
199
+ before { expect(connection).to receive(:execute).with(fetch_text) }
255
200
 
256
- it do
257
- subject.use_database(db_name)
258
- end
201
+ it { connection_delegator.use_database(db_name) }
259
202
  end
260
203
 
261
204
  describe '#drop_databse' do
262
- let(:connection) { double('Connection') }
263
- let(:config) { double('Config') }
264
205
  let(:db_name) { 'test' }
265
206
  let(:fetch_text) { 'DROP DATABASE `test`' }
266
207
 
267
- before do
268
- expect(connection).to receive(:execute).with(fetch_text)
269
- end
208
+ before { expect(connection).to receive(:execute).with(fetch_text) }
270
209
 
271
- subject { described_class.new(connection, config) }
272
-
273
- it do
274
- subject.drop_database(db_name)
275
- end
210
+ it { connection_delegator.drop_database(db_name) }
276
211
  end
277
212
 
278
213
  describe '#show_databases' do
279
- let(:connection) { double('Connection') }
280
- let(:config) { double('Config') }
281
214
  let(:fetch_text) { 'SHOW DATABASES' }
282
215
 
283
- before do
284
- expect(connection).to receive(:fetch).with(fetch_text)
285
- end
286
-
287
- subject { described_class.new(connection, config) }
216
+ before { expect(connection).to receive(:fetch).with(fetch_text) }
288
217
 
289
- it do
290
- subject.show_databases
291
- end
218
+ it { connection_delegator.show_databases }
292
219
  end
293
220
 
294
221
  describe '#switch database' do
295
- let(:connection) { double('Connection') }
296
- let(:config) { double('Config') }
297
-
298
222
  let(:db_name) { 'test_db' }
299
223
 
300
224
  before do
301
- expect(subject).to receive(:create_database).once
302
- expect(subject).to receive(:use_database).once
225
+ expect(connection_delegator).to receive(:create_database).once
226
+ expect(connection_delegator).to receive(:use_database).once
303
227
  end
304
228
 
305
- subject { described_class.new(connection, config) }
306
-
307
- it do
308
- subject.switch_database(db_name)
309
- end
229
+ it { connection_delegator.switch_database(db_name) }
310
230
  end
311
231
  end
@@ -1,8 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  RSpec.describe RSpec::Hive::Connector do
4
6
  describe '#start_connection' do
5
- let(:tcli_connection) { double(RBHive::TCLIConnection) }
7
+ let(:connector) { described_class.new(configuration) }
8
+ let(:tcli_connection) { class_double(RBHive::TCLIConnection) }
6
9
  let(:connection_delegator) { double(RSpec::Hive::ConnectionDelegator) }
7
10
  let(:host) { '127.0.0.1' }
8
11
  let(:port) { '10000' }
@@ -10,76 +13,68 @@ RSpec.describe RSpec::Hive::Connector do
10
13
  let(:hive_options) do
11
14
  {'hive.exec.dynamic.partition' => 'true',
12
15
  'hive.exec.dynamic.partition.mode' => 'nonstrict',
13
- 'hive.exec.max.dynamic.partitions.pernodexi' => '100000',
16
+ 'hive.exec.max.dynamic.partitions.pernode' => '100000',
14
17
  'hive.exec.max.dynamic.partitions' => '100000',
15
18
  'mapred.child.java.opts' => '-Xmx2048m'}
16
19
  end
17
20
  let(:configuration) do
18
- double(
21
+ instance_double(
19
22
  RSpec::Hive::Configuration,
20
23
  host: host,
21
24
  port: port,
22
- hive_options: hive_options
25
+ hive_options: hive_options,
26
+ logger: instance_double(Logger)
23
27
  )
24
28
  end
25
29
 
30
+ before do
31
+ allow(connector).to receive(:connection_options) { options_mock }
32
+ allow(configuration.logger).to receive(:info)
33
+ end
34
+
26
35
  context 'when db_name is provided' do
36
+ subject { connector.start_connection(db_name) }
37
+
27
38
  let(:db_name) { 'test' }
28
39
 
29
40
  before do
30
- allow(subject).to receive(:connection_options) { options_mock }
31
- expect(RBHive::TCLIConnection).to receive(:new).
32
- with(host, port, options_mock) { tcli_connection }
41
+ expect(RBHive::TCLIConnection).to receive(:new).with(host, port, options_mock) { tcli_connection }
33
42
  expect(RSpec::Hive::ConnectionDelegator).to receive(:new).
34
43
  with(tcli_connection, configuration) { connection_delegator }
35
-
36
44
  expect(connection_delegator).to receive(:open).once
37
45
  expect(connection_delegator).to receive(:open_session).once
38
- expect(connection_delegator).to receive(:switch_database).
39
- with(db_name).once
40
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition=true;')
41
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition.mode=nonstrict;')
42
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions.pernodexi=100000;')
43
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions=100000;')
44
- expect(connection_delegator).to receive(:execute).with('SET mapred.child.java.opts=-Xmx2048m;')
45
- allow(configuration).to receive_message_chain(:logger, :info)
46
+ expect(connection_delegator).to receive(:switch_database).with(db_name).once
47
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition=true')
48
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition.mode=nonstrict')
49
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions.pernode=100000')
50
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions=100000')
51
+ expect(connection_delegator).to receive(:execute).with('SET mapred.child.java.opts=-Xmx2048m')
46
52
  end
47
53
 
48
- subject { described_class.new(configuration) }
49
-
50
- it do
51
- expect(subject.start_connection(db_name)).to equal(connection_delegator)
52
- end
54
+ it { is_expected.to equal(connection_delegator) }
53
55
  end
54
56
 
55
57
  context 'when db_name is not provided' do
58
+ subject { connector.start_connection }
59
+
56
60
  let(:db_random_name) { 'rand123' }
57
61
 
58
62
  before do
59
- allow(subject).to receive(:connection_options) { options_mock }
60
63
  expect(RSpec::Hive::DbName).to receive(:random_name) { db_random_name }
61
- expect(RBHive::TCLIConnection).to receive(:new).
62
- with(host, port, options_mock) { tcli_connection }
64
+ expect(RBHive::TCLIConnection).to receive(:new).with(host, port, options_mock) { tcli_connection }
63
65
  expect(RSpec::Hive::ConnectionDelegator).to receive(:new).
64
66
  with(tcli_connection, configuration) { connection_delegator }
65
-
66
67
  expect(connection_delegator).to receive(:open).once
67
68
  expect(connection_delegator).to receive(:open_session).once
68
- expect(connection_delegator).to receive(:switch_database).
69
- with(db_random_name).once
70
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition=true;')
71
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition.mode=nonstrict;')
72
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions.pernodexi=100000;')
73
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions=100000;')
74
- expect(connection_delegator).to receive(:execute).with('SET mapred.child.java.opts=-Xmx2048m;')
75
- allow(configuration).to receive_message_chain(:logger, :info)
69
+ expect(connection_delegator).to receive(:switch_database).with(db_random_name).once
70
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition=true')
71
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition.mode=nonstrict')
72
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions.pernode=100000')
73
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions=100000')
74
+ expect(connection_delegator).to receive(:execute).with('SET mapred.child.java.opts=-Xmx2048m')
76
75
  end
77
76
 
78
- subject { described_class.new(configuration) }
79
-
80
- it do
81
- expect(subject.start_connection).to equal(connection_delegator)
82
- end
77
+ it { is_expected.to equal(connection_delegator) }
83
78
  end
84
79
  end
85
80
  end