rspec-hive 0.4.2 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +39 -5
- data/.rubocop_todo.yml +40 -0
- data/.ruby-version +1 -1
- data/.travis.yml +4 -3
- data/Gemfile +18 -0
- data/Guardfile +10 -6
- data/README.md +15 -0
- data/Rakefile +4 -2
- data/docker/Dockerfile +38 -47
- data/docker/hive-site.xml +28 -0
- data/examples/lib/query.rb +2 -0
- data/examples/rspec-hive.yml.example +7 -1
- data/examples/spec/query_spec.rb +2 -0
- data/examples/spec/spec_helper.rb +2 -0
- data/lib/rspec/hive.rb +2 -0
- data/lib/rspec/hive/configuration.rb +14 -33
- data/lib/rspec/hive/connection_delegator.rb +19 -13
- data/lib/rspec/hive/connector.rb +3 -2
- data/lib/rspec/hive/db_name.rb +2 -0
- data/lib/rspec/hive/exponential_backoff.rb +2 -0
- data/lib/rspec/hive/matchers.rb +2 -0
- data/lib/rspec/hive/query_builder.rb +2 -0
- data/lib/rspec/hive/query_builder/null_strategy.rb +2 -0
- data/lib/rspec/hive/query_builder/row_transformer.rb +4 -2
- data/lib/rspec/hive/query_builder/type_faker.rb +2 -0
- data/lib/rspec/hive/query_builder/value_by_type_strategy.rb +2 -0
- data/lib/rspec/hive/query_builder_helper.rb +2 -0
- data/lib/rspec/hive/railtie.rb +2 -0
- data/lib/rspec/hive/rake_tasks/docker.rake +27 -14
- data/lib/rspec/hive/version.rb +3 -1
- data/lib/rspec/hive/with_hive_connection.rb +2 -0
- data/rspec-hive.gemspec +7 -16
- data/spec/lib/rspec/hive/configuration_spec.rb +29 -33
- data/spec/lib/rspec/hive/connection_delegator_spec.rb +58 -138
- data/spec/lib/rspec/hive/connector_spec.rb +33 -38
- data/spec/lib/rspec/hive/db_name_spec.rb +4 -2
- data/spec/lib/rspec/hive/matchers_spec.rb +2 -0
- data/spec/lib/rspec/hive/query_builder/row_transformer_spec.rb +6 -4
- data/spec/lib/rspec/hive/query_builder_helper_spec.rb +8 -6
- data/spec/lib/rspec/hive/query_builder_spec.rb +15 -17
- data/spec/lib/rspec/hive_spec.rb +15 -22
- metadata +13 -154
- data/.rubocop_u2i.yml +0 -63
- data/spec/.rubocop.yml +0 -4
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'delegate'
|
2
4
|
require 'tempfile'
|
3
5
|
|
@@ -15,9 +17,9 @@ module RSpec
|
|
15
17
|
execute(table_schema.create_table_statement)
|
16
18
|
end
|
17
19
|
|
18
|
-
def load_partitions(
|
19
|
-
partitions = partition_clause(partitions)
|
20
|
-
query = "ALTER TABLE #{
|
20
|
+
def load_partitions(table_schema, partitions)
|
21
|
+
partitions = partition_clause(table_schema, partitions)
|
22
|
+
query = "ALTER TABLE #{table_schema.name} ADD #{partitions}"
|
21
23
|
execute(query)
|
22
24
|
end
|
23
25
|
|
@@ -29,7 +31,7 @@ module RSpec
|
|
29
31
|
values,
|
30
32
|
table_schema.instance_variable_get(:@field_sep)
|
31
33
|
)
|
32
|
-
partition_query = partition_clause(partitions) if partitions
|
34
|
+
partition_query = partition_clause(table_schema, partitions) if partitions
|
33
35
|
load_file_to_hive_table(
|
34
36
|
table_name,
|
35
37
|
docker_path(file),
|
@@ -65,23 +67,27 @@ module RSpec
|
|
65
67
|
|
66
68
|
private
|
67
69
|
|
68
|
-
def partition_clause(partitions)
|
70
|
+
def partition_clause(table_schema, partitions)
|
69
71
|
if partitions.is_a?(Array)
|
70
|
-
partitions.collect { |x| to_partition_clause(x) }.join(' ')
|
72
|
+
partitions.collect { |x| to_partition_clause(table_schema, x) }.join(' ')
|
71
73
|
else
|
72
|
-
to_partition_clause(partitions)
|
74
|
+
to_partition_clause(table_schema, partitions)
|
73
75
|
end
|
74
76
|
end
|
75
77
|
|
76
|
-
def to_partition_clause(partition)
|
77
|
-
"PARTITION(#{partition.map { |k, v| "#{k}
|
78
|
+
def to_partition_clause(table_schema, partition)
|
79
|
+
"PARTITION(#{partition.map { |k, v| "#{k}=#{partition_value(table_schema, k, v)}" }.join(',')})"
|
80
|
+
end
|
81
|
+
|
82
|
+
def partition_value(table_schema, key, value)
|
83
|
+
return value if table_schema.partitions.detect { |x| x.name == key && x.type == :int }
|
84
|
+
"'#{value}'"
|
78
85
|
end
|
79
86
|
|
80
87
|
def load_file_to_hive_table(table_name, path, partition_clause = nil)
|
81
|
-
request_txt =
|
82
|
-
|
83
|
-
request_txt
|
84
|
-
execute(request_txt)
|
88
|
+
request_txt = "load data local inpath '#{path}' into table #{table_name}"
|
89
|
+
return execute(request_txt) if partition_clause.nil?
|
90
|
+
execute("#{request_txt} #{partition_clause}")
|
85
91
|
end
|
86
92
|
|
87
93
|
def docker_path(file)
|
data/lib/rspec/hive/connector.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rbhive'
|
2
4
|
require 'tempfile'
|
3
5
|
require 'yaml'
|
@@ -16,11 +18,10 @@ module RSpec
|
|
16
18
|
connection = open_connection
|
17
19
|
connection.switch_database(db_name)
|
18
20
|
@config.hive_options.each do |key, value|
|
19
|
-
connection.execute("SET #{key}=#{value}
|
21
|
+
connection.execute("SET #{key}=#{value}")
|
20
22
|
end
|
21
23
|
|
22
24
|
connection
|
23
|
-
|
24
25
|
rescue Thrift::ApplicationException => e
|
25
26
|
config.logger.fatal('An exception was thrown during start connection')
|
26
27
|
config.logger.fatal(e)
|
data/lib/rspec/hive/db_name.rb
CHANGED
data/lib/rspec/hive/matchers.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'type_faker'
|
2
4
|
|
3
5
|
module RSpec
|
@@ -23,12 +25,12 @@ module RSpec
|
|
23
25
|
|
24
26
|
attr_reader :schema, :strategy
|
25
27
|
|
26
|
-
HIVE_NIL = '\N'
|
28
|
+
HIVE_NIL = '\N'
|
27
29
|
|
28
30
|
def array_row(row)
|
29
31
|
size = schema.columns.size
|
30
32
|
missing = size - row.size
|
31
|
-
if missing
|
33
|
+
if missing.positive?
|
32
34
|
row_with_missing_columns(row)
|
33
35
|
else
|
34
36
|
row
|
data/lib/rspec/hive/railtie.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'yaml'
|
2
4
|
require 'colorize'
|
3
5
|
require 'tmpdir'
|
@@ -23,7 +25,14 @@ namespace :spec do
|
|
23
25
|
'docker_shared_directory_path' =>
|
24
26
|
ENV['DOCKER_SHARED_DIR'] || default_config.docker_shared_directory_path,
|
25
27
|
'hive_version' =>
|
26
|
-
ENV['HIVE_VERSION'] || default_config.hive_version
|
28
|
+
ENV['HIVE_VERSION'] || default_config.hive_version,
|
29
|
+
'hive_options' => {
|
30
|
+
'hive.exec.dynamic.partition' => 'true',
|
31
|
+
'hive.exec.dynamic.partition.mode' => 'nonstrict',
|
32
|
+
'hive.exec.max.dynamic.partitions.pernode' => '100000',
|
33
|
+
'hive.exec.max.dynamic.partitions' => '100000',
|
34
|
+
'mapred.child.java.opts' => '-Xmx2048m'
|
35
|
+
}
|
27
36
|
}
|
28
37
|
}
|
29
38
|
system 'mkdir', '-p', default_values['hive']['host_shared_directory_path']
|
@@ -42,14 +51,16 @@ namespace :spec do
|
|
42
51
|
desc 'Runs docker using hive config file.'\
|
43
52
|
' It assumes your docker-machine is running.'
|
44
53
|
task :run do
|
45
|
-
|
54
|
+
raise 'Command `docker` not found.'.red unless system('which docker')
|
46
55
|
|
47
56
|
config_filepath = ENV['CONFIG_FILE'] || File.join('config', 'rspec-hive.yml')
|
48
|
-
|
49
|
-
|
57
|
+
unless File.exist? config_filepath
|
58
|
+
raise "There's no config file #{config_filepath} please"\
|
59
|
+
'generate default or provide custom config.'.red
|
60
|
+
end
|
50
61
|
|
51
62
|
interpolated = ERB.new(File.read(config_filepath)).result
|
52
|
-
config = YAML.
|
63
|
+
config = YAML.safe_load(interpolated)['hive']
|
53
64
|
|
54
65
|
docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
|
55
66
|
cmd = "docker run -v #{config['host_shared_directory_path']}:"\
|
@@ -62,7 +73,7 @@ namespace :spec do
|
|
62
73
|
|
63
74
|
desc 'Downloads docker image from dockerhub.'
|
64
75
|
task :download_image do
|
65
|
-
|
76
|
+
raise 'Command `docker` not found.'.red unless system('which docker')
|
66
77
|
|
67
78
|
docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
|
68
79
|
|
@@ -76,36 +87,37 @@ namespace :spec do
|
|
76
87
|
docker_conatiners = `docker ps`.lines
|
77
88
|
if docker_conatiners.size != 2
|
78
89
|
raise 'There is more than 1 instance of docker container running (or no running docker containers). '\
|
79
|
-
'Check `docker ps` and stop containers that are not in use right now or specify CONTAINER_ID
|
90
|
+
'Check `docker ps` and stop containers that are not in use right now or specify CONTAINER_ID '\
|
91
|
+
'and run this command again.'.red
|
80
92
|
else
|
81
93
|
docker_conatiners[1].split[0]
|
82
94
|
end
|
83
95
|
end
|
84
96
|
|
85
97
|
desc 'Load Hive UDFS (user defined functions) onto docker.'
|
86
|
-
task :load_udfs, [:udfs_path] do |
|
98
|
+
task :load_udfs, [:udfs_path] do |_t, args|
|
87
99
|
udfs_path = args[:udfs_path]
|
88
100
|
config_filepath = ENV['CONFIG_FILE'] || File.join('config', 'rspec-hive.yml')
|
89
101
|
interpolated = ERB.new(File.read(config_filepath)).result
|
90
|
-
config = YAML.
|
102
|
+
config = YAML.safe_load(interpolated)['hive']
|
91
103
|
|
92
104
|
host_hive_udfs_path = File.join(config['host_shared_directory_path'], 'hive-udfs.jar')
|
93
|
-
|
105
|
+
raise 'Please provide UDFS_PATH'.red unless udfs_path
|
94
106
|
if udfs_path.start_with?('s3://')
|
95
107
|
puts 'Downloading from s3...'.yellow
|
96
108
|
cmd = "aws s3 ls #{udfs_path}"
|
97
109
|
|
98
|
-
|
110
|
+
raise 'awscli is not configured.'.red unless system(cmd)
|
99
111
|
cmd = "aws s3 cp #{udfs_path} #{host_hive_udfs_path}"
|
100
|
-
system(cmd)
|
101
112
|
else
|
102
113
|
puts 'Copying from local directory...'.yellow
|
103
114
|
cmd = "cp #{udfs_path} #{host_hive_udfs_path}"
|
104
115
|
end
|
116
|
+
system(cmd)
|
105
117
|
puts 'Done'.green
|
106
118
|
|
107
119
|
puts 'Copying to hadoop on docker...'.yellow
|
108
|
-
cmd = "docker exec -it #{container_id} /bin/bash -c 'cp #{
|
120
|
+
cmd = "docker exec -it #{container_id} /bin/bash -c 'cp #{host_hive_udfs_path} $HADOOP_HOME'"
|
109
121
|
system(cmd)
|
110
122
|
puts 'Done'.green
|
111
123
|
end
|
@@ -114,7 +126,8 @@ namespace :spec do
|
|
114
126
|
desc 'Runs beeline console on hive.'
|
115
127
|
task :beeline do
|
116
128
|
puts "Connecting to docker container: #{container_id} and running beeline. To exit: '!q'".green
|
117
|
-
|
129
|
+
bash_cmd = '$HIVE_HOME/bin/beeline -u jdbc:hive2://localhost:10000 -d org.apache.hive.jdbc.HiveDriver'
|
130
|
+
cmd = "docker exec -it #{container_id} /bin/bash -c #{bash_cmd}"
|
118
131
|
system(cmd)
|
119
132
|
end
|
120
133
|
end
|
data/lib/rspec/hive/version.rb
CHANGED
data/rspec-hive.gemspec
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
lib = File.expand_path('../lib', __FILE__)
|
3
5
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
6
|
require 'rspec/hive/version'
|
@@ -7,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
7
9
|
spec.name = 'rspec-hive'
|
8
10
|
spec.version = RSpec::Hive::VERSION
|
9
11
|
spec.authors = ['Wojtek Mielczarek', 'Mikołaj Nowak']
|
10
|
-
spec.email = %w
|
12
|
+
spec.email = %w[wojtek.mielczarek@u2i.com mikolaj.nowak@u2i.com]
|
11
13
|
spec.summary = 'RSpec addition to test hive queries'
|
12
14
|
spec.description = 'RSpecHive let you test your hive queries
|
13
15
|
connecting to hive instance installed on docker'
|
@@ -19,21 +21,10 @@ Gem::Specification.new do |spec|
|
|
19
21
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
22
|
spec.require_paths = ['lib']
|
21
23
|
|
22
|
-
spec.add_dependency 'rake', ['>= 10.0', '<
|
23
|
-
spec.add_dependency 'colorize', '~> 0.
|
24
|
+
spec.add_dependency 'rake', ['>= 10.0', '< 13.0']
|
25
|
+
spec.add_dependency 'colorize', '~> 0.8.0'
|
24
26
|
spec.add_dependency 'faker', '~> 1.6'
|
25
|
-
spec.add_dependency 'retryable', '~> 2.0
|
27
|
+
spec.add_dependency 'retryable', '~> 2.0'
|
26
28
|
spec.add_dependency 'rspec', '~> 3.4'
|
27
|
-
spec.add_dependency 'rbhive-u2i', '~> 1.0
|
28
|
-
|
29
|
-
spec.add_development_dependency 'bundler', '~> 1.7'
|
30
|
-
spec.add_development_dependency 'rspec-its', '~> 1.2'
|
31
|
-
spec.add_development_dependency 'rubocop', '~> 0.39'
|
32
|
-
spec.add_development_dependency 'rubocop-rspec', '~> 1.4'
|
33
|
-
spec.add_development_dependency 'guard', '~> 2.6'
|
34
|
-
spec.add_development_dependency 'guard-rspec', '~> 4.3'
|
35
|
-
spec.add_development_dependency 'guard-rubocop', '~> 1.2'
|
36
|
-
spec.add_development_dependency 'codeclimate-test-reporter', '~> 0.4'
|
37
|
-
spec.add_development_dependency 'pry'
|
38
|
-
spec.add_development_dependency 'pry-byebug'
|
29
|
+
spec.add_dependency 'rbhive-u2i', '~> 1.0'
|
39
30
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'spec_helper'
|
2
4
|
require 'tempfile'
|
3
5
|
|
@@ -32,34 +34,21 @@ RSpec.describe RSpec::Hive::Configuration do
|
|
32
34
|
end
|
33
35
|
end
|
34
36
|
|
35
|
-
let(:expected_host_shared_directory_path)
|
36
|
-
'/Users/Shared/tmp/spec-tmp-files'
|
37
|
-
end
|
37
|
+
let(:expected_host_shared_directory_path) { '/Users/Shared/tmp/spec-tmp-files' }
|
38
38
|
let(:expected_docker_shared_directory_path) { '/tmp/spec-tmp-files' }
|
39
|
-
let(:
|
40
|
-
let(:
|
41
|
-
let(:expected_partition_flag) { 'true' }
|
42
|
-
let(:expected_partition_mode) { 'nonstrict' }
|
43
|
-
let(:expected_partiton_pernodexi) { '100000' }
|
44
|
-
let(:expected_partitions) { '100000' }
|
45
|
-
let(:expected_java_opts) { '-Xmx2048m' }
|
46
|
-
let(:expected_hive_options) do
|
47
|
-
{'hive.exec.dynamic.partition' => expected_partition_flag,
|
48
|
-
'hive.exec.dynamic.partition.mode' => expected_partition_mode,
|
49
|
-
'hive.exec.max.dynamic.partitions.pernodexi' => expected_partiton_pernodexi,
|
50
|
-
'hive.exec.max.dynamic.partitions' => expected_partitions,
|
51
|
-
'mapred.child.java.opts' => expected_java_opts}
|
52
|
-
end
|
39
|
+
let(:expected_timeout) { 120 }
|
40
|
+
let(:expected_hive_options) { {} }
|
53
41
|
|
54
42
|
context 'when no configuration file is provided' do
|
55
|
-
let(:expected_port) {
|
43
|
+
let(:expected_port) { 10_000 }
|
56
44
|
let!(:original_host_os) { RbConfig::CONFIG['host_os'] }
|
45
|
+
let(:expected_hive_version) { described_class::DEFAULT_VERSION }
|
57
46
|
|
58
47
|
before { allow(Dir).to receive(:mktmpdir) { mock_tmpdir } }
|
59
48
|
|
60
49
|
context 'when on Mac' do
|
61
50
|
let(:mock_tmpdir) { '/Users/Shared/test/' }
|
62
|
-
let(:expected_host) { '
|
51
|
+
let(:expected_host) { '127.0.0.1' }
|
63
52
|
let(:expected_host_shared_directory_path) { '/Users/Shared/test/spec-tmp-files' }
|
64
53
|
|
65
54
|
before do
|
@@ -88,64 +77,68 @@ RSpec.describe RSpec::Hive::Configuration do
|
|
88
77
|
|
89
78
|
context 'when there is a configuration file' do
|
90
79
|
let(:path_to_config_file) do
|
91
|
-
Tempfile.open(%w
|
80
|
+
Tempfile.open(%w[config .yml]) do |f|
|
92
81
|
f.write yaml_hash.to_yaml
|
93
82
|
f.path
|
94
83
|
end
|
95
84
|
end
|
96
85
|
let(:expected_host) { '127.0.0.2' }
|
97
|
-
let(:expected_port) {
|
86
|
+
let(:expected_port) { 10_001 }
|
98
87
|
|
99
88
|
context 'where all parameters are present' do
|
89
|
+
subject { described_class.new(path_to_config_file) }
|
90
|
+
|
91
|
+
let(:expected_hive_version) { 12 }
|
92
|
+
|
100
93
|
let(:yaml_hash) do
|
101
94
|
{
|
102
95
|
'hive' =>
|
103
96
|
{
|
104
97
|
'host' => '127.0.0.2',
|
105
|
-
'port' =>
|
98
|
+
'port' => 10_001,
|
106
99
|
'host_shared_directory_path' => expected_host_shared_directory_path,
|
107
100
|
'docker_shared_directory_path' => expected_docker_shared_directory_path,
|
108
|
-
'hive_version' => '
|
109
|
-
'timeout' =>
|
101
|
+
'hive_version' => '12',
|
102
|
+
'timeout' => 120
|
110
103
|
}
|
111
104
|
}
|
112
105
|
end
|
113
106
|
|
114
107
|
after { File.unlink(path_to_config_file) }
|
115
108
|
|
116
|
-
subject { described_class.new(path_to_config_file) }
|
117
|
-
|
118
109
|
include_examples('config')
|
119
110
|
end
|
120
111
|
|
121
112
|
context 'where there are only required parameters' do
|
113
|
+
subject { described_class.new(path_to_config_file) }
|
114
|
+
|
115
|
+
let(:expected_hive_version) { described_class::DEFAULT_VERSION }
|
122
116
|
let(:yaml_hash) do
|
123
117
|
{
|
124
118
|
'hive' =>
|
125
119
|
{
|
126
120
|
'host' => '127.0.0.2',
|
127
|
-
'port' =>
|
121
|
+
'port' => 10_001,
|
128
122
|
'host_shared_directory_path' => expected_host_shared_directory_path,
|
129
123
|
'docker_shared_directory_path' => expected_docker_shared_directory_path
|
130
124
|
}
|
131
125
|
}
|
132
126
|
end
|
133
|
-
let(:expected_hive_version) { 10 }
|
134
127
|
|
135
128
|
after { File.unlink(path_to_config_file) }
|
136
129
|
|
137
|
-
subject { described_class.new(path_to_config_file) }
|
138
|
-
|
139
130
|
include_examples('config')
|
140
131
|
end
|
141
132
|
|
142
133
|
context 'where there are some parameters required and optional' do
|
134
|
+
subject { described_class.new(path_to_config_file) }
|
135
|
+
|
143
136
|
let(:yaml_hash) do
|
144
137
|
{
|
145
138
|
'hive' =>
|
146
139
|
{
|
147
140
|
'host' => '127.0.0.2',
|
148
|
-
'port' =>
|
141
|
+
'port' => 10_001,
|
149
142
|
'host_shared_directory_path' => expected_host_shared_directory_path,
|
150
143
|
'docker_shared_directory_path' => expected_docker_shared_directory_path,
|
151
144
|
'hive_version' => 11,
|
@@ -159,11 +152,14 @@ RSpec.describe RSpec::Hive::Configuration do
|
|
159
152
|
let(:expected_timeout) { 60 }
|
160
153
|
let(:expected_hive_version) { 11 }
|
161
154
|
let(:expected_java_opts) { '-Xmx64m' }
|
155
|
+
let(:expected_hive_options) do
|
156
|
+
{
|
157
|
+
'mapred.child.java.opts' => expected_java_opts
|
158
|
+
}
|
159
|
+
end
|
162
160
|
|
163
161
|
after { File.unlink(path_to_config_file) }
|
164
162
|
|
165
|
-
subject { described_class.new(path_to_config_file) }
|
166
|
-
|
167
163
|
include_examples('config')
|
168
164
|
end
|
169
165
|
end
|