rspec-hive 0.4.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +39 -5
- data/.rubocop_todo.yml +40 -0
- data/.ruby-version +1 -1
- data/.travis.yml +4 -3
- data/Gemfile +18 -0
- data/Guardfile +10 -6
- data/README.md +15 -0
- data/Rakefile +4 -2
- data/docker/Dockerfile +38 -47
- data/docker/hive-site.xml +28 -0
- data/examples/lib/query.rb +2 -0
- data/examples/rspec-hive.yml.example +7 -1
- data/examples/spec/query_spec.rb +2 -0
- data/examples/spec/spec_helper.rb +2 -0
- data/lib/rspec/hive.rb +2 -0
- data/lib/rspec/hive/configuration.rb +14 -33
- data/lib/rspec/hive/connection_delegator.rb +19 -13
- data/lib/rspec/hive/connector.rb +3 -2
- data/lib/rspec/hive/db_name.rb +2 -0
- data/lib/rspec/hive/exponential_backoff.rb +2 -0
- data/lib/rspec/hive/matchers.rb +2 -0
- data/lib/rspec/hive/query_builder.rb +2 -0
- data/lib/rspec/hive/query_builder/null_strategy.rb +2 -0
- data/lib/rspec/hive/query_builder/row_transformer.rb +4 -2
- data/lib/rspec/hive/query_builder/type_faker.rb +2 -0
- data/lib/rspec/hive/query_builder/value_by_type_strategy.rb +2 -0
- data/lib/rspec/hive/query_builder_helper.rb +2 -0
- data/lib/rspec/hive/railtie.rb +2 -0
- data/lib/rspec/hive/rake_tasks/docker.rake +27 -14
- data/lib/rspec/hive/version.rb +3 -1
- data/lib/rspec/hive/with_hive_connection.rb +2 -0
- data/rspec-hive.gemspec +7 -16
- data/spec/lib/rspec/hive/configuration_spec.rb +29 -33
- data/spec/lib/rspec/hive/connection_delegator_spec.rb +58 -138
- data/spec/lib/rspec/hive/connector_spec.rb +33 -38
- data/spec/lib/rspec/hive/db_name_spec.rb +4 -2
- data/spec/lib/rspec/hive/matchers_spec.rb +2 -0
- data/spec/lib/rspec/hive/query_builder/row_transformer_spec.rb +6 -4
- data/spec/lib/rspec/hive/query_builder_helper_spec.rb +8 -6
- data/spec/lib/rspec/hive/query_builder_spec.rb +15 -17
- data/spec/lib/rspec/hive_spec.rb +15 -22
- metadata +13 -154
- data/.rubocop_u2i.yml +0 -63
- data/spec/.rubocop.yml +0 -4
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'delegate'
|
2
4
|
require 'tempfile'
|
3
5
|
|
@@ -15,9 +17,9 @@ module RSpec
|
|
15
17
|
execute(table_schema.create_table_statement)
|
16
18
|
end
|
17
19
|
|
18
|
-
def load_partitions(
|
19
|
-
partitions = partition_clause(partitions)
|
20
|
-
query = "ALTER TABLE #{
|
20
|
+
def load_partitions(table_schema, partitions)
|
21
|
+
partitions = partition_clause(table_schema, partitions)
|
22
|
+
query = "ALTER TABLE #{table_schema.name} ADD #{partitions}"
|
21
23
|
execute(query)
|
22
24
|
end
|
23
25
|
|
@@ -29,7 +31,7 @@ module RSpec
|
|
29
31
|
values,
|
30
32
|
table_schema.instance_variable_get(:@field_sep)
|
31
33
|
)
|
32
|
-
partition_query = partition_clause(partitions) if partitions
|
34
|
+
partition_query = partition_clause(table_schema, partitions) if partitions
|
33
35
|
load_file_to_hive_table(
|
34
36
|
table_name,
|
35
37
|
docker_path(file),
|
@@ -65,23 +67,27 @@ module RSpec
|
|
65
67
|
|
66
68
|
private
|
67
69
|
|
68
|
-
def partition_clause(partitions)
|
70
|
+
def partition_clause(table_schema, partitions)
|
69
71
|
if partitions.is_a?(Array)
|
70
|
-
partitions.collect { |x| to_partition_clause(x) }.join(' ')
|
72
|
+
partitions.collect { |x| to_partition_clause(table_schema, x) }.join(' ')
|
71
73
|
else
|
72
|
-
to_partition_clause(partitions)
|
74
|
+
to_partition_clause(table_schema, partitions)
|
73
75
|
end
|
74
76
|
end
|
75
77
|
|
76
|
-
def to_partition_clause(partition)
|
77
|
-
"PARTITION(#{partition.map { |k, v| "#{k}
|
78
|
+
def to_partition_clause(table_schema, partition)
|
79
|
+
"PARTITION(#{partition.map { |k, v| "#{k}=#{partition_value(table_schema, k, v)}" }.join(',')})"
|
80
|
+
end
|
81
|
+
|
82
|
+
def partition_value(table_schema, key, value)
|
83
|
+
return value if table_schema.partitions.detect { |x| x.name == key && x.type == :int }
|
84
|
+
"'#{value}'"
|
78
85
|
end
|
79
86
|
|
80
87
|
def load_file_to_hive_table(table_name, path, partition_clause = nil)
|
81
|
-
request_txt =
|
82
|
-
|
83
|
-
request_txt
|
84
|
-
execute(request_txt)
|
88
|
+
request_txt = "load data local inpath '#{path}' into table #{table_name}"
|
89
|
+
return execute(request_txt) if partition_clause.nil?
|
90
|
+
execute("#{request_txt} #{partition_clause}")
|
85
91
|
end
|
86
92
|
|
87
93
|
def docker_path(file)
|
data/lib/rspec/hive/connector.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rbhive'
|
2
4
|
require 'tempfile'
|
3
5
|
require 'yaml'
|
@@ -16,11 +18,10 @@ module RSpec
|
|
16
18
|
connection = open_connection
|
17
19
|
connection.switch_database(db_name)
|
18
20
|
@config.hive_options.each do |key, value|
|
19
|
-
connection.execute("SET #{key}=#{value}
|
21
|
+
connection.execute("SET #{key}=#{value}")
|
20
22
|
end
|
21
23
|
|
22
24
|
connection
|
23
|
-
|
24
25
|
rescue Thrift::ApplicationException => e
|
25
26
|
config.logger.fatal('An exception was thrown during start connection')
|
26
27
|
config.logger.fatal(e)
|
data/lib/rspec/hive/db_name.rb
CHANGED
data/lib/rspec/hive/matchers.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'type_faker'
|
2
4
|
|
3
5
|
module RSpec
|
@@ -23,12 +25,12 @@ module RSpec
|
|
23
25
|
|
24
26
|
attr_reader :schema, :strategy
|
25
27
|
|
26
|
-
HIVE_NIL = '\N'
|
28
|
+
HIVE_NIL = '\N'
|
27
29
|
|
28
30
|
def array_row(row)
|
29
31
|
size = schema.columns.size
|
30
32
|
missing = size - row.size
|
31
|
-
if missing
|
33
|
+
if missing.positive?
|
32
34
|
row_with_missing_columns(row)
|
33
35
|
else
|
34
36
|
row
|
data/lib/rspec/hive/railtie.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'yaml'
|
2
4
|
require 'colorize'
|
3
5
|
require 'tmpdir'
|
@@ -23,7 +25,14 @@ namespace :spec do
|
|
23
25
|
'docker_shared_directory_path' =>
|
24
26
|
ENV['DOCKER_SHARED_DIR'] || default_config.docker_shared_directory_path,
|
25
27
|
'hive_version' =>
|
26
|
-
ENV['HIVE_VERSION'] || default_config.hive_version
|
28
|
+
ENV['HIVE_VERSION'] || default_config.hive_version,
|
29
|
+
'hive_options' => {
|
30
|
+
'hive.exec.dynamic.partition' => 'true',
|
31
|
+
'hive.exec.dynamic.partition.mode' => 'nonstrict',
|
32
|
+
'hive.exec.max.dynamic.partitions.pernode' => '100000',
|
33
|
+
'hive.exec.max.dynamic.partitions' => '100000',
|
34
|
+
'mapred.child.java.opts' => '-Xmx2048m'
|
35
|
+
}
|
27
36
|
}
|
28
37
|
}
|
29
38
|
system 'mkdir', '-p', default_values['hive']['host_shared_directory_path']
|
@@ -42,14 +51,16 @@ namespace :spec do
|
|
42
51
|
desc 'Runs docker using hive config file.'\
|
43
52
|
' It assumes your docker-machine is running.'
|
44
53
|
task :run do
|
45
|
-
|
54
|
+
raise 'Command `docker` not found.'.red unless system('which docker')
|
46
55
|
|
47
56
|
config_filepath = ENV['CONFIG_FILE'] || File.join('config', 'rspec-hive.yml')
|
48
|
-
|
49
|
-
|
57
|
+
unless File.exist? config_filepath
|
58
|
+
raise "There's no config file #{config_filepath} please"\
|
59
|
+
'generate default or provide custom config.'.red
|
60
|
+
end
|
50
61
|
|
51
62
|
interpolated = ERB.new(File.read(config_filepath)).result
|
52
|
-
config = YAML.
|
63
|
+
config = YAML.safe_load(interpolated)['hive']
|
53
64
|
|
54
65
|
docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
|
55
66
|
cmd = "docker run -v #{config['host_shared_directory_path']}:"\
|
@@ -62,7 +73,7 @@ namespace :spec do
|
|
62
73
|
|
63
74
|
desc 'Downloads docker image from dockerhub.'
|
64
75
|
task :download_image do
|
65
|
-
|
76
|
+
raise 'Command `docker` not found.'.red unless system('which docker')
|
66
77
|
|
67
78
|
docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
|
68
79
|
|
@@ -76,36 +87,37 @@ namespace :spec do
|
|
76
87
|
docker_conatiners = `docker ps`.lines
|
77
88
|
if docker_conatiners.size != 2
|
78
89
|
raise 'There is more than 1 instance of docker container running (or no running docker containers). '\
|
79
|
-
'Check `docker ps` and stop containers that are not in use right now or specify CONTAINER_ID
|
90
|
+
'Check `docker ps` and stop containers that are not in use right now or specify CONTAINER_ID '\
|
91
|
+
'and run this command again.'.red
|
80
92
|
else
|
81
93
|
docker_conatiners[1].split[0]
|
82
94
|
end
|
83
95
|
end
|
84
96
|
|
85
97
|
desc 'Load Hive UDFS (user defined functions) onto docker.'
|
86
|
-
task :load_udfs, [:udfs_path] do |
|
98
|
+
task :load_udfs, [:udfs_path] do |_t, args|
|
87
99
|
udfs_path = args[:udfs_path]
|
88
100
|
config_filepath = ENV['CONFIG_FILE'] || File.join('config', 'rspec-hive.yml')
|
89
101
|
interpolated = ERB.new(File.read(config_filepath)).result
|
90
|
-
config = YAML.
|
102
|
+
config = YAML.safe_load(interpolated)['hive']
|
91
103
|
|
92
104
|
host_hive_udfs_path = File.join(config['host_shared_directory_path'], 'hive-udfs.jar')
|
93
|
-
|
105
|
+
raise 'Please provide UDFS_PATH'.red unless udfs_path
|
94
106
|
if udfs_path.start_with?('s3://')
|
95
107
|
puts 'Downloading from s3...'.yellow
|
96
108
|
cmd = "aws s3 ls #{udfs_path}"
|
97
109
|
|
98
|
-
|
110
|
+
raise 'awscli is not configured.'.red unless system(cmd)
|
99
111
|
cmd = "aws s3 cp #{udfs_path} #{host_hive_udfs_path}"
|
100
|
-
system(cmd)
|
101
112
|
else
|
102
113
|
puts 'Copying from local directory...'.yellow
|
103
114
|
cmd = "cp #{udfs_path} #{host_hive_udfs_path}"
|
104
115
|
end
|
116
|
+
system(cmd)
|
105
117
|
puts 'Done'.green
|
106
118
|
|
107
119
|
puts 'Copying to hadoop on docker...'.yellow
|
108
|
-
cmd = "docker exec -it #{container_id} /bin/bash -c 'cp #{
|
120
|
+
cmd = "docker exec -it #{container_id} /bin/bash -c 'cp #{host_hive_udfs_path} $HADOOP_HOME'"
|
109
121
|
system(cmd)
|
110
122
|
puts 'Done'.green
|
111
123
|
end
|
@@ -114,7 +126,8 @@ namespace :spec do
|
|
114
126
|
desc 'Runs beeline console on hive.'
|
115
127
|
task :beeline do
|
116
128
|
puts "Connecting to docker container: #{container_id} and running beeline. To exit: '!q'".green
|
117
|
-
|
129
|
+
bash_cmd = '$HIVE_HOME/bin/beeline -u jdbc:hive2://localhost:10000 -d org.apache.hive.jdbc.HiveDriver'
|
130
|
+
cmd = "docker exec -it #{container_id} /bin/bash -c #{bash_cmd}"
|
118
131
|
system(cmd)
|
119
132
|
end
|
120
133
|
end
|
data/lib/rspec/hive/version.rb
CHANGED
data/rspec-hive.gemspec
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
lib = File.expand_path('../lib', __FILE__)
|
3
5
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
6
|
require 'rspec/hive/version'
|
@@ -7,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
7
9
|
spec.name = 'rspec-hive'
|
8
10
|
spec.version = RSpec::Hive::VERSION
|
9
11
|
spec.authors = ['Wojtek Mielczarek', 'Mikołaj Nowak']
|
10
|
-
spec.email = %w
|
12
|
+
spec.email = %w[wojtek.mielczarek@u2i.com mikolaj.nowak@u2i.com]
|
11
13
|
spec.summary = 'RSpec addition to test hive queries'
|
12
14
|
spec.description = 'RSpecHive let you test your hive queries
|
13
15
|
connecting to hive instance installed on docker'
|
@@ -19,21 +21,10 @@ Gem::Specification.new do |spec|
|
|
19
21
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
22
|
spec.require_paths = ['lib']
|
21
23
|
|
22
|
-
spec.add_dependency 'rake', ['>= 10.0', '<
|
23
|
-
spec.add_dependency 'colorize', '~> 0.
|
24
|
+
spec.add_dependency 'rake', ['>= 10.0', '< 13.0']
|
25
|
+
spec.add_dependency 'colorize', '~> 0.8.0'
|
24
26
|
spec.add_dependency 'faker', '~> 1.6'
|
25
|
-
spec.add_dependency 'retryable', '~> 2.0
|
27
|
+
spec.add_dependency 'retryable', '~> 2.0'
|
26
28
|
spec.add_dependency 'rspec', '~> 3.4'
|
27
|
-
spec.add_dependency 'rbhive-u2i', '~> 1.0
|
28
|
-
|
29
|
-
spec.add_development_dependency 'bundler', '~> 1.7'
|
30
|
-
spec.add_development_dependency 'rspec-its', '~> 1.2'
|
31
|
-
spec.add_development_dependency 'rubocop', '~> 0.39'
|
32
|
-
spec.add_development_dependency 'rubocop-rspec', '~> 1.4'
|
33
|
-
spec.add_development_dependency 'guard', '~> 2.6'
|
34
|
-
spec.add_development_dependency 'guard-rspec', '~> 4.3'
|
35
|
-
spec.add_development_dependency 'guard-rubocop', '~> 1.2'
|
36
|
-
spec.add_development_dependency 'codeclimate-test-reporter', '~> 0.4'
|
37
|
-
spec.add_development_dependency 'pry'
|
38
|
-
spec.add_development_dependency 'pry-byebug'
|
29
|
+
spec.add_dependency 'rbhive-u2i', '~> 1.0'
|
39
30
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'spec_helper'
|
2
4
|
require 'tempfile'
|
3
5
|
|
@@ -32,34 +34,21 @@ RSpec.describe RSpec::Hive::Configuration do
|
|
32
34
|
end
|
33
35
|
end
|
34
36
|
|
35
|
-
let(:expected_host_shared_directory_path)
|
36
|
-
'/Users/Shared/tmp/spec-tmp-files'
|
37
|
-
end
|
37
|
+
let(:expected_host_shared_directory_path) { '/Users/Shared/tmp/spec-tmp-files' }
|
38
38
|
let(:expected_docker_shared_directory_path) { '/tmp/spec-tmp-files' }
|
39
|
-
let(:
|
40
|
-
let(:
|
41
|
-
let(:expected_partition_flag) { 'true' }
|
42
|
-
let(:expected_partition_mode) { 'nonstrict' }
|
43
|
-
let(:expected_partiton_pernodexi) { '100000' }
|
44
|
-
let(:expected_partitions) { '100000' }
|
45
|
-
let(:expected_java_opts) { '-Xmx2048m' }
|
46
|
-
let(:expected_hive_options) do
|
47
|
-
{'hive.exec.dynamic.partition' => expected_partition_flag,
|
48
|
-
'hive.exec.dynamic.partition.mode' => expected_partition_mode,
|
49
|
-
'hive.exec.max.dynamic.partitions.pernodexi' => expected_partiton_pernodexi,
|
50
|
-
'hive.exec.max.dynamic.partitions' => expected_partitions,
|
51
|
-
'mapred.child.java.opts' => expected_java_opts}
|
52
|
-
end
|
39
|
+
let(:expected_timeout) { 120 }
|
40
|
+
let(:expected_hive_options) { {} }
|
53
41
|
|
54
42
|
context 'when no configuration file is provided' do
|
55
|
-
let(:expected_port) {
|
43
|
+
let(:expected_port) { 10_000 }
|
56
44
|
let!(:original_host_os) { RbConfig::CONFIG['host_os'] }
|
45
|
+
let(:expected_hive_version) { described_class::DEFAULT_VERSION }
|
57
46
|
|
58
47
|
before { allow(Dir).to receive(:mktmpdir) { mock_tmpdir } }
|
59
48
|
|
60
49
|
context 'when on Mac' do
|
61
50
|
let(:mock_tmpdir) { '/Users/Shared/test/' }
|
62
|
-
let(:expected_host) { '
|
51
|
+
let(:expected_host) { '127.0.0.1' }
|
63
52
|
let(:expected_host_shared_directory_path) { '/Users/Shared/test/spec-tmp-files' }
|
64
53
|
|
65
54
|
before do
|
@@ -88,64 +77,68 @@ RSpec.describe RSpec::Hive::Configuration do
|
|
88
77
|
|
89
78
|
context 'when there is a configuration file' do
|
90
79
|
let(:path_to_config_file) do
|
91
|
-
Tempfile.open(%w
|
80
|
+
Tempfile.open(%w[config .yml]) do |f|
|
92
81
|
f.write yaml_hash.to_yaml
|
93
82
|
f.path
|
94
83
|
end
|
95
84
|
end
|
96
85
|
let(:expected_host) { '127.0.0.2' }
|
97
|
-
let(:expected_port) {
|
86
|
+
let(:expected_port) { 10_001 }
|
98
87
|
|
99
88
|
context 'where all parameters are present' do
|
89
|
+
subject { described_class.new(path_to_config_file) }
|
90
|
+
|
91
|
+
let(:expected_hive_version) { 12 }
|
92
|
+
|
100
93
|
let(:yaml_hash) do
|
101
94
|
{
|
102
95
|
'hive' =>
|
103
96
|
{
|
104
97
|
'host' => '127.0.0.2',
|
105
|
-
'port' =>
|
98
|
+
'port' => 10_001,
|
106
99
|
'host_shared_directory_path' => expected_host_shared_directory_path,
|
107
100
|
'docker_shared_directory_path' => expected_docker_shared_directory_path,
|
108
|
-
'hive_version' => '
|
109
|
-
'timeout' =>
|
101
|
+
'hive_version' => '12',
|
102
|
+
'timeout' => 120
|
110
103
|
}
|
111
104
|
}
|
112
105
|
end
|
113
106
|
|
114
107
|
after { File.unlink(path_to_config_file) }
|
115
108
|
|
116
|
-
subject { described_class.new(path_to_config_file) }
|
117
|
-
|
118
109
|
include_examples('config')
|
119
110
|
end
|
120
111
|
|
121
112
|
context 'where there are only required parameters' do
|
113
|
+
subject { described_class.new(path_to_config_file) }
|
114
|
+
|
115
|
+
let(:expected_hive_version) { described_class::DEFAULT_VERSION }
|
122
116
|
let(:yaml_hash) do
|
123
117
|
{
|
124
118
|
'hive' =>
|
125
119
|
{
|
126
120
|
'host' => '127.0.0.2',
|
127
|
-
'port' =>
|
121
|
+
'port' => 10_001,
|
128
122
|
'host_shared_directory_path' => expected_host_shared_directory_path,
|
129
123
|
'docker_shared_directory_path' => expected_docker_shared_directory_path
|
130
124
|
}
|
131
125
|
}
|
132
126
|
end
|
133
|
-
let(:expected_hive_version) { 10 }
|
134
127
|
|
135
128
|
after { File.unlink(path_to_config_file) }
|
136
129
|
|
137
|
-
subject { described_class.new(path_to_config_file) }
|
138
|
-
|
139
130
|
include_examples('config')
|
140
131
|
end
|
141
132
|
|
142
133
|
context 'where there are some parameters required and optional' do
|
134
|
+
subject { described_class.new(path_to_config_file) }
|
135
|
+
|
143
136
|
let(:yaml_hash) do
|
144
137
|
{
|
145
138
|
'hive' =>
|
146
139
|
{
|
147
140
|
'host' => '127.0.0.2',
|
148
|
-
'port' =>
|
141
|
+
'port' => 10_001,
|
149
142
|
'host_shared_directory_path' => expected_host_shared_directory_path,
|
150
143
|
'docker_shared_directory_path' => expected_docker_shared_directory_path,
|
151
144
|
'hive_version' => 11,
|
@@ -159,11 +152,14 @@ RSpec.describe RSpec::Hive::Configuration do
|
|
159
152
|
let(:expected_timeout) { 60 }
|
160
153
|
let(:expected_hive_version) { 11 }
|
161
154
|
let(:expected_java_opts) { '-Xmx64m' }
|
155
|
+
let(:expected_hive_options) do
|
156
|
+
{
|
157
|
+
'mapred.child.java.opts' => expected_java_opts
|
158
|
+
}
|
159
|
+
end
|
162
160
|
|
163
161
|
after { File.unlink(path_to_config_file) }
|
164
162
|
|
165
|
-
subject { described_class.new(path_to_config_file) }
|
166
|
-
|
167
163
|
include_examples('config')
|
168
164
|
end
|
169
165
|
end
|