rspec-hive 0.4.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +39 -5
  4. data/.rubocop_todo.yml +40 -0
  5. data/.ruby-version +1 -1
  6. data/.travis.yml +4 -3
  7. data/Gemfile +18 -0
  8. data/Guardfile +10 -6
  9. data/README.md +15 -0
  10. data/Rakefile +4 -2
  11. data/docker/Dockerfile +38 -47
  12. data/docker/hive-site.xml +28 -0
  13. data/examples/lib/query.rb +2 -0
  14. data/examples/rspec-hive.yml.example +7 -1
  15. data/examples/spec/query_spec.rb +2 -0
  16. data/examples/spec/spec_helper.rb +2 -0
  17. data/lib/rspec/hive.rb +2 -0
  18. data/lib/rspec/hive/configuration.rb +14 -33
  19. data/lib/rspec/hive/connection_delegator.rb +19 -13
  20. data/lib/rspec/hive/connector.rb +3 -2
  21. data/lib/rspec/hive/db_name.rb +2 -0
  22. data/lib/rspec/hive/exponential_backoff.rb +2 -0
  23. data/lib/rspec/hive/matchers.rb +2 -0
  24. data/lib/rspec/hive/query_builder.rb +2 -0
  25. data/lib/rspec/hive/query_builder/null_strategy.rb +2 -0
  26. data/lib/rspec/hive/query_builder/row_transformer.rb +4 -2
  27. data/lib/rspec/hive/query_builder/type_faker.rb +2 -0
  28. data/lib/rspec/hive/query_builder/value_by_type_strategy.rb +2 -0
  29. data/lib/rspec/hive/query_builder_helper.rb +2 -0
  30. data/lib/rspec/hive/railtie.rb +2 -0
  31. data/lib/rspec/hive/rake_tasks/docker.rake +27 -14
  32. data/lib/rspec/hive/version.rb +3 -1
  33. data/lib/rspec/hive/with_hive_connection.rb +2 -0
  34. data/rspec-hive.gemspec +7 -16
  35. data/spec/lib/rspec/hive/configuration_spec.rb +29 -33
  36. data/spec/lib/rspec/hive/connection_delegator_spec.rb +58 -138
  37. data/spec/lib/rspec/hive/connector_spec.rb +33 -38
  38. data/spec/lib/rspec/hive/db_name_spec.rb +4 -2
  39. data/spec/lib/rspec/hive/matchers_spec.rb +2 -0
  40. data/spec/lib/rspec/hive/query_builder/row_transformer_spec.rb +6 -4
  41. data/spec/lib/rspec/hive/query_builder_helper_spec.rb +8 -6
  42. data/spec/lib/rspec/hive/query_builder_spec.rb +15 -17
  43. data/spec/lib/rspec/hive_spec.rb +15 -22
  44. metadata +13 -154
  45. data/.rubocop_u2i.yml +0 -63
  46. data/spec/.rubocop.yml +0 -4
@@ -1,6 +1,18 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  RSpec.describe RSpec::Hive::ConnectionDelegator do
6
+ let(:day_column) { instance_double(RBHive::TableSchema::Column, name: :day, type: :string) }
7
+ let(:dth_column) { instance_double(RBHive::TableSchema::Column, name: :dth, type: :int) }
8
+ let(:hm_column) { instance_double(RBHive::TableSchema::Column, name: :hm, type: :int) }
9
+ let(:country_column) { instance_double(RBHive::TableSchema::Column, name: :country, type: :string) }
10
+ let(:table_name) { 'test_table' }
11
+ let(:table_schema) { instance_double(RBHive::TableSchema, name: table_name) }
12
+ let(:connection_delegator) { described_class.new(connection, config) }
13
+ let(:connection) { double('Connection') }
14
+ let(:config) { double('Config') }
15
+
4
16
  describe '#load_into_table' do
5
17
  let(:host_shared_directory_path) { '/tmp/host' }
6
18
  let(:docker_file_path) { '/tmp/docked/test_file' }
@@ -11,9 +23,6 @@ RSpec.describe RSpec::Hive::ConnectionDelegator do
11
23
  )
12
24
  end
13
25
  let(:delimiter) { "\t" }
14
- let(:table_name) { 'test_table' }
15
- let(:table_schema) { instance_double(RBHive::TableSchema, name: table_name) }
16
- let(:connection) { double('Connection') }
17
26
  let(:file_mock) { double(Tempfile) }
18
27
 
19
28
  let(:values) { ['a', 'b', 1] }
@@ -24,54 +33,49 @@ RSpec.describe RSpec::Hive::ConnectionDelegator do
24
33
  expect(Tempfile).to receive(:open).
25
34
  with(table_name, host_shared_directory_path).and_yield(file_mock)
26
35
 
27
- expect(subject).to receive(:docker_path).
36
+ expect(connection_delegator).to receive(:docker_path).
28
37
  with(file_mock) { docker_file_path }
29
38
 
30
- expect(subject).to receive(:write_values_to_file).
39
+ expect(connection_delegator).to receive(:write_values_to_file).
31
40
  with(file_mock, values, "\t").once
32
41
  end
33
42
 
34
43
  context 'without partitions' do
35
44
  before do
36
- expect(subject).to receive(:load_file_to_hive_table).
45
+ expect(connection_delegator).to receive(:load_file_to_hive_table).
37
46
  with(table_name, docker_file_path, nil).once
38
47
 
39
- expect(subject).not_to receive(:partition_clause)
48
+ expect(connection_delegator).not_to receive(:partition_clause)
40
49
  end
41
50
 
42
- subject { described_class.new(connection, config) }
43
-
44
- it do
45
- subject.load_into_table(table_schema, values)
46
- end
51
+ it { connection_delegator.load_into_table(table_schema, values) }
47
52
  end
48
53
 
49
54
  context 'with partitions' do
50
55
  let(:partitions) { {day: '20160101', hm: '2020'} }
56
+ let(:table_schema) { instance_double(RBHive::TableSchema, name: table_name, partitions: [day_column, hm_column]) }
57
+
51
58
  let(:partition_query) { "PARTITION(day='20160101',hm='2020')" }
59
+
52
60
  before do
53
- expect(subject).to receive(:load_file_to_hive_table).
61
+ expect(connection_delegator).to receive(:load_file_to_hive_table).
54
62
  with(table_name, docker_file_path, partition_query).once
55
- expect(subject).to receive(:partition_clause).
56
- with(partitions) { partition_query }
63
+ expect(connection_delegator).to receive(:partition_clause).
64
+ with(table_schema, partitions) { partition_query }
57
65
  end
58
66
 
59
- subject { described_class.new(connection, config) }
60
-
61
- it do
62
- subject.load_into_table(table_schema, values, partitions)
63
- end
67
+ it { connection_delegator.load_into_table(table_schema, values, partitions) }
64
68
  end
65
69
  end
66
70
 
67
71
  describe '#load_partition' do
68
- let(:config) { double('Config') }
69
- let(:connection) { double('Connection') }
70
-
71
- let(:table_name) { 'test_table' }
72
72
  let(:partitions) do
73
73
  [{dth: 'mon', country: 'us'}, {dth: 'tue', country: 'us'}]
74
74
  end
75
+ let(:table_schema) do
76
+ instance_double(RBHive::TableSchema, name: table_name, partitions: [day_column, hm_column, country_column])
77
+ end
78
+
75
79
  let(:partition_query) do
76
80
  "PARTITION(dth='mon',country='us') PARTITION(dth='tue',country='us')"
77
81
  end
@@ -81,43 +85,34 @@ RSpec.describe RSpec::Hive::ConnectionDelegator do
81
85
  end
82
86
 
83
87
  before do
84
- expect(subject).to receive(:partition_clause).
85
- with(partitions) { partition_query }
88
+ expect(connection_delegator).to receive(:partition_clause).with(table_schema, partitions) { partition_query }
86
89
  expect(connection).to receive(:execute).with(executed_query)
87
90
  end
88
91
 
89
- subject { described_class.new(connection, config) }
90
-
91
- it do
92
- subject.load_partitions(table_name, partitions)
93
- end
92
+ it { connection_delegator.load_partitions(table_schema, partitions) }
94
93
  end
95
94
 
96
95
  describe '#partition_clause' do
97
- let(:config) { double('Config') }
98
- let(:connection) { double('Connection') }
99
-
100
96
  context 'with single partition' do
101
- let(:partitions) { {day: '20160101', hm: '2020'} }
102
- let(:partition_query) { "PARTITION(day='20160101',hm='2020')" }
103
-
104
- subject { described_class.new(connection, config) }
97
+ let(:partitions) { {day: 'tue', dth: '20160101'} }
98
+ let(:table_schema) { instance_double(RBHive::TableSchema, partitions: [day_column, dth_column]) }
99
+ let(:expected_partition_query) { "PARTITION(day='tue',dth=20160101)" }
105
100
 
106
101
  it 'translates partition hash to single query' do
107
- expect(subject.send(:partition_clause, partitions)).to eq(partition_query)
102
+ expect(connection_delegator.send(:partition_clause, table_schema, partitions)).to eq(expected_partition_query)
108
103
  end
109
104
  end
110
105
 
111
106
  context 'with multiple partitions' do
112
107
  let(:partitions) { [{day: 'mon', hm: '2020'}, {day: 'tue', hm: '2020'}, {day: 'mon', hm: '2030'}] }
108
+ let(:table_schema) { instance_double(RBHive::TableSchema, partitions: [day_column, hm_column]) }
109
+
113
110
  let(:partition_query) do
114
- "PARTITION(day='mon',hm='2020') PARTITION(day='tue',hm='2020') PARTITION(day='mon',hm='2030')"
111
+ "PARTITION(day='mon',hm=2020) PARTITION(day='tue',hm=2020) PARTITION(day='mon',hm=2030)"
115
112
  end
116
113
 
117
- subject { described_class.new(connection, config) }
118
-
119
114
  it 'translates partition hash to combined query' do
120
- expect(subject.send(:partition_clause, partitions)).to eq(partition_query)
115
+ expect(connection_delegator.send(:partition_clause, table_schema, partitions)).to eq(partition_query)
121
116
  end
122
117
  end
123
118
  end
@@ -128,37 +123,23 @@ RSpec.describe RSpec::Hive::ConnectionDelegator do
128
123
  [['a', 'b', 1],
129
124
  ['aa', 'bb', 22]]
130
125
  end
131
- let(:connection) { double('Connection') }
132
- let(:config) { double('Config') }
133
126
  let(:delimiter) { '|' }
134
127
  let(:expected_file_content) { "a|b|1\naa|bb|22\n" }
135
128
 
136
- subject { described_class.new(connection, config) }
137
129
  it 'writes values to file in correct format' do
138
- subject.send(:write_values_to_file, file, values, delimiter)
130
+ connection_delegator.send(:write_values_to_file, file, values, delimiter)
139
131
  file.rewind
140
132
  expect(file.read).to eq(expected_file_content)
141
133
  end
142
134
  end
143
135
 
144
136
  describe '#load_file_to_hive_table' do
145
- let(:connection) { double('Connection') }
146
- let(:config) { double('Config') }
147
- let(:table_name) { 'test_table' }
148
137
  let(:file_path) { '/tmp/test' }
149
- let(:execute_text) do
150
- "load data local inpath '/tmp/test' into table test_table"
151
- end
138
+ let(:execute_text) { "load data local inpath '/tmp/test' into table test_table" }
152
139
 
153
- before do
154
- expect(connection).to receive(:execute).with(execute_text)
155
- end
156
-
157
- subject { described_class.new(connection, config) }
140
+ before { expect(connection).to receive(:execute).with(execute_text) }
158
141
 
159
- it do
160
- subject.send(:load_file_to_hive_table, table_name, file_path)
161
- end
142
+ it { connection_delegator.send(:load_file_to_hive_table, table_name, file_path) }
162
143
  end
163
144
 
164
145
  describe '#translate_to_docker_path' do
@@ -167,7 +148,6 @@ RSpec.describe RSpec::Hive::ConnectionDelegator do
167
148
  let(:file_host_path) { '/tmp/host/testfile' }
168
149
  let(:expected_file_path) { '/tmp/docker/testfile' }
169
150
 
170
- let(:connection) { double('Connection') }
171
151
  let(:docker_shared_directory_path) { '/tmp/docker' }
172
152
  let(:config) do
173
153
  double(
@@ -176,54 +156,29 @@ RSpec.describe RSpec::Hive::ConnectionDelegator do
176
156
  )
177
157
  end
178
158
 
179
- before do
180
- expect(file_mock).to receive(:path) { file_host_path }
181
- end
182
-
183
- subject { described_class.new(connection, config) }
159
+ before { expect(file_mock).to receive(:path) { file_host_path } }
184
160
 
185
- it do
186
- expect(subject.send(:docker_path, file_mock)).
187
- to eq(expected_file_path)
188
- end
161
+ it { expect(connection_delegator.send(:docker_path, file_mock)).to eq(expected_file_path) }
189
162
  end
190
163
 
191
164
  describe '#show_tables' do
192
- let(:connection) { double('Connection') }
193
- let(:config) { double('Config') }
194
165
  let(:fetch_text) { 'SHOW TABLES' }
195
166
 
196
- before do
197
- expect(connection).to receive(:fetch).with(fetch_text)
198
- end
199
-
200
- subject { described_class.new(connection, config) }
167
+ before { expect(connection).to receive(:fetch).with(fetch_text) }
201
168
 
202
- it do
203
- subject.show_tables
204
- end
169
+ it { connection_delegator.show_tables }
205
170
  end
206
171
 
207
172
  describe '#create_database' do
208
- let(:connection) { double('Connection') }
209
- let(:config) { double('Config') }
210
173
  let(:db_name) { 'test' }
211
174
  let(:fetch_text) { 'CREATE DATABASE IF NOT EXISTS `test`' }
212
175
 
213
- before do
214
- expect(connection).to receive(:execute).with(fetch_text)
215
- end
176
+ before { expect(connection).to receive(:execute).with(fetch_text) }
216
177
 
217
- subject { described_class.new(connection, config) }
218
-
219
- it do
220
- subject.create_database(db_name)
221
- end
178
+ it { connection_delegator.create_database(db_name) }
222
179
  end
223
180
 
224
181
  describe '#create_table' do
225
- let(:connection) { double('Connection') }
226
- let(:config) { double('Config') }
227
182
  let(:table_schema) { double('Table_schema') }
228
183
  let(:table_statement) { 'I AM TABLE STATEMENT' }
229
184
 
@@ -234,78 +189,43 @@ RSpec.describe RSpec::Hive::ConnectionDelegator do
234
189
  expect(connection).to receive(:execute).with(table_statement)
235
190
  end
236
191
 
237
- subject { described_class.new(connection, config) }
238
-
239
- it do
240
- subject.create_table(table_schema)
241
- end
192
+ it { connection_delegator.create_table(table_schema) }
242
193
  end
243
194
 
244
195
  describe '#use databaes' do
245
- let(:connection) { double('Connection') }
246
- let(:config) { double('Config') }
247
196
  let(:db_name) { 'test' }
248
197
  let(:fetch_text) { 'USE `test`' }
249
198
 
250
- before do
251
- expect(connection).to receive(:execute).with(fetch_text)
252
- end
253
-
254
- subject { described_class.new(connection, config) }
199
+ before { expect(connection).to receive(:execute).with(fetch_text) }
255
200
 
256
- it do
257
- subject.use_database(db_name)
258
- end
201
+ it { connection_delegator.use_database(db_name) }
259
202
  end
260
203
 
261
204
  describe '#drop_databse' do
262
- let(:connection) { double('Connection') }
263
- let(:config) { double('Config') }
264
205
  let(:db_name) { 'test' }
265
206
  let(:fetch_text) { 'DROP DATABASE `test`' }
266
207
 
267
- before do
268
- expect(connection).to receive(:execute).with(fetch_text)
269
- end
208
+ before { expect(connection).to receive(:execute).with(fetch_text) }
270
209
 
271
- subject { described_class.new(connection, config) }
272
-
273
- it do
274
- subject.drop_database(db_name)
275
- end
210
+ it { connection_delegator.drop_database(db_name) }
276
211
  end
277
212
 
278
213
  describe '#show_databases' do
279
- let(:connection) { double('Connection') }
280
- let(:config) { double('Config') }
281
214
  let(:fetch_text) { 'SHOW DATABASES' }
282
215
 
283
- before do
284
- expect(connection).to receive(:fetch).with(fetch_text)
285
- end
286
-
287
- subject { described_class.new(connection, config) }
216
+ before { expect(connection).to receive(:fetch).with(fetch_text) }
288
217
 
289
- it do
290
- subject.show_databases
291
- end
218
+ it { connection_delegator.show_databases }
292
219
  end
293
220
 
294
221
  describe '#switch database' do
295
- let(:connection) { double('Connection') }
296
- let(:config) { double('Config') }
297
-
298
222
  let(:db_name) { 'test_db' }
299
223
 
300
224
  before do
301
- expect(subject).to receive(:create_database).once
302
- expect(subject).to receive(:use_database).once
225
+ expect(connection_delegator).to receive(:create_database).once
226
+ expect(connection_delegator).to receive(:use_database).once
303
227
  end
304
228
 
305
- subject { described_class.new(connection, config) }
306
-
307
- it do
308
- subject.switch_database(db_name)
309
- end
229
+ it { connection_delegator.switch_database(db_name) }
310
230
  end
311
231
  end
@@ -1,8 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  RSpec.describe RSpec::Hive::Connector do
4
6
  describe '#start_connection' do
5
- let(:tcli_connection) { double(RBHive::TCLIConnection) }
7
+ let(:connector) { described_class.new(configuration) }
8
+ let(:tcli_connection) { class_double(RBHive::TCLIConnection) }
6
9
  let(:connection_delegator) { double(RSpec::Hive::ConnectionDelegator) }
7
10
  let(:host) { '127.0.0.1' }
8
11
  let(:port) { '10000' }
@@ -10,76 +13,68 @@ RSpec.describe RSpec::Hive::Connector do
10
13
  let(:hive_options) do
11
14
  {'hive.exec.dynamic.partition' => 'true',
12
15
  'hive.exec.dynamic.partition.mode' => 'nonstrict',
13
- 'hive.exec.max.dynamic.partitions.pernodexi' => '100000',
16
+ 'hive.exec.max.dynamic.partitions.pernode' => '100000',
14
17
  'hive.exec.max.dynamic.partitions' => '100000',
15
18
  'mapred.child.java.opts' => '-Xmx2048m'}
16
19
  end
17
20
  let(:configuration) do
18
- double(
21
+ instance_double(
19
22
  RSpec::Hive::Configuration,
20
23
  host: host,
21
24
  port: port,
22
- hive_options: hive_options
25
+ hive_options: hive_options,
26
+ logger: instance_double(Logger)
23
27
  )
24
28
  end
25
29
 
30
+ before do
31
+ allow(connector).to receive(:connection_options) { options_mock }
32
+ allow(configuration.logger).to receive(:info)
33
+ end
34
+
26
35
  context 'when db_name is provided' do
36
+ subject { connector.start_connection(db_name) }
37
+
27
38
  let(:db_name) { 'test' }
28
39
 
29
40
  before do
30
- allow(subject).to receive(:connection_options) { options_mock }
31
- expect(RBHive::TCLIConnection).to receive(:new).
32
- with(host, port, options_mock) { tcli_connection }
41
+ expect(RBHive::TCLIConnection).to receive(:new).with(host, port, options_mock) { tcli_connection }
33
42
  expect(RSpec::Hive::ConnectionDelegator).to receive(:new).
34
43
  with(tcli_connection, configuration) { connection_delegator }
35
-
36
44
  expect(connection_delegator).to receive(:open).once
37
45
  expect(connection_delegator).to receive(:open_session).once
38
- expect(connection_delegator).to receive(:switch_database).
39
- with(db_name).once
40
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition=true;')
41
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition.mode=nonstrict;')
42
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions.pernodexi=100000;')
43
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions=100000;')
44
- expect(connection_delegator).to receive(:execute).with('SET mapred.child.java.opts=-Xmx2048m;')
45
- allow(configuration).to receive_message_chain(:logger, :info)
46
+ expect(connection_delegator).to receive(:switch_database).with(db_name).once
47
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition=true')
48
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition.mode=nonstrict')
49
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions.pernode=100000')
50
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions=100000')
51
+ expect(connection_delegator).to receive(:execute).with('SET mapred.child.java.opts=-Xmx2048m')
46
52
  end
47
53
 
48
- subject { described_class.new(configuration) }
49
-
50
- it do
51
- expect(subject.start_connection(db_name)).to equal(connection_delegator)
52
- end
54
+ it { is_expected.to equal(connection_delegator) }
53
55
  end
54
56
 
55
57
  context 'when db_name is not provided' do
58
+ subject { connector.start_connection }
59
+
56
60
  let(:db_random_name) { 'rand123' }
57
61
 
58
62
  before do
59
- allow(subject).to receive(:connection_options) { options_mock }
60
63
  expect(RSpec::Hive::DbName).to receive(:random_name) { db_random_name }
61
- expect(RBHive::TCLIConnection).to receive(:new).
62
- with(host, port, options_mock) { tcli_connection }
64
+ expect(RBHive::TCLIConnection).to receive(:new).with(host, port, options_mock) { tcli_connection }
63
65
  expect(RSpec::Hive::ConnectionDelegator).to receive(:new).
64
66
  with(tcli_connection, configuration) { connection_delegator }
65
-
66
67
  expect(connection_delegator).to receive(:open).once
67
68
  expect(connection_delegator).to receive(:open_session).once
68
- expect(connection_delegator).to receive(:switch_database).
69
- with(db_random_name).once
70
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition=true;')
71
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition.mode=nonstrict;')
72
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions.pernodexi=100000;')
73
- expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions=100000;')
74
- expect(connection_delegator).to receive(:execute).with('SET mapred.child.java.opts=-Xmx2048m;')
75
- allow(configuration).to receive_message_chain(:logger, :info)
69
+ expect(connection_delegator).to receive(:switch_database).with(db_random_name).once
70
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition=true')
71
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.dynamic.partition.mode=nonstrict')
72
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions.pernode=100000')
73
+ expect(connection_delegator).to receive(:execute).with('SET hive.exec.max.dynamic.partitions=100000')
74
+ expect(connection_delegator).to receive(:execute).with('SET mapred.child.java.opts=-Xmx2048m')
76
75
  end
77
76
 
78
- subject { described_class.new(configuration) }
79
-
80
- it do
81
- expect(subject.start_connection).to equal(connection_delegator)
82
- end
77
+ it { is_expected.to equal(connection_delegator) }
83
78
  end
84
79
  end
85
80
  end