rspec-hive 0.4.2 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +39 -5
  4. data/.rubocop_todo.yml +40 -0
  5. data/.ruby-version +1 -1
  6. data/.travis.yml +4 -3
  7. data/Gemfile +18 -0
  8. data/Guardfile +10 -6
  9. data/README.md +15 -0
  10. data/Rakefile +4 -2
  11. data/docker/Dockerfile +38 -47
  12. data/docker/hive-site.xml +28 -0
  13. data/examples/lib/query.rb +2 -0
  14. data/examples/rspec-hive.yml.example +7 -1
  15. data/examples/spec/query_spec.rb +2 -0
  16. data/examples/spec/spec_helper.rb +2 -0
  17. data/lib/rspec/hive.rb +2 -0
  18. data/lib/rspec/hive/configuration.rb +14 -33
  19. data/lib/rspec/hive/connection_delegator.rb +19 -13
  20. data/lib/rspec/hive/connector.rb +3 -2
  21. data/lib/rspec/hive/db_name.rb +2 -0
  22. data/lib/rspec/hive/exponential_backoff.rb +2 -0
  23. data/lib/rspec/hive/matchers.rb +2 -0
  24. data/lib/rspec/hive/query_builder.rb +2 -0
  25. data/lib/rspec/hive/query_builder/null_strategy.rb +2 -0
  26. data/lib/rspec/hive/query_builder/row_transformer.rb +4 -2
  27. data/lib/rspec/hive/query_builder/type_faker.rb +2 -0
  28. data/lib/rspec/hive/query_builder/value_by_type_strategy.rb +2 -0
  29. data/lib/rspec/hive/query_builder_helper.rb +2 -0
  30. data/lib/rspec/hive/railtie.rb +2 -0
  31. data/lib/rspec/hive/rake_tasks/docker.rake +27 -14
  32. data/lib/rspec/hive/version.rb +3 -1
  33. data/lib/rspec/hive/with_hive_connection.rb +2 -0
  34. data/rspec-hive.gemspec +7 -16
  35. data/spec/lib/rspec/hive/configuration_spec.rb +29 -33
  36. data/spec/lib/rspec/hive/connection_delegator_spec.rb +58 -138
  37. data/spec/lib/rspec/hive/connector_spec.rb +33 -38
  38. data/spec/lib/rspec/hive/db_name_spec.rb +4 -2
  39. data/spec/lib/rspec/hive/matchers_spec.rb +2 -0
  40. data/spec/lib/rspec/hive/query_builder/row_transformer_spec.rb +6 -4
  41. data/spec/lib/rspec/hive/query_builder_helper_spec.rb +8 -6
  42. data/spec/lib/rspec/hive/query_builder_spec.rb +15 -17
  43. data/spec/lib/rspec/hive_spec.rb +15 -22
  44. metadata +13 -154
  45. data/.rubocop_u2i.yml +0 -63
  46. data/spec/.rubocop.yml +0 -4
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'delegate'
2
4
  require 'tempfile'
3
5
 
@@ -15,9 +17,9 @@ module RSpec
15
17
  execute(table_schema.create_table_statement)
16
18
  end
17
19
 
18
- def load_partitions(table_name, partitions)
19
- partitions = partition_clause(partitions)
20
- query = "ALTER TABLE #{table_name} ADD #{partitions}"
20
+ def load_partitions(table_schema, partitions)
21
+ partitions = partition_clause(table_schema, partitions)
22
+ query = "ALTER TABLE #{table_schema.name} ADD #{partitions}"
21
23
  execute(query)
22
24
  end
23
25
 
@@ -29,7 +31,7 @@ module RSpec
29
31
  values,
30
32
  table_schema.instance_variable_get(:@field_sep)
31
33
  )
32
- partition_query = partition_clause(partitions) if partitions
34
+ partition_query = partition_clause(table_schema, partitions) if partitions
33
35
  load_file_to_hive_table(
34
36
  table_name,
35
37
  docker_path(file),
@@ -65,23 +67,27 @@ module RSpec
65
67
 
66
68
  private
67
69
 
68
- def partition_clause(partitions)
70
+ def partition_clause(table_schema, partitions)
69
71
  if partitions.is_a?(Array)
70
- partitions.collect { |x| to_partition_clause(x) }.join(' ')
72
+ partitions.collect { |x| to_partition_clause(table_schema, x) }.join(' ')
71
73
  else
72
- to_partition_clause(partitions)
74
+ to_partition_clause(table_schema, partitions)
73
75
  end
74
76
  end
75
77
 
76
- def to_partition_clause(partition)
77
- "PARTITION(#{partition.map { |k, v| "#{k}='#{v}'" }.join(',')})"
78
+ def to_partition_clause(table_schema, partition)
79
+ "PARTITION(#{partition.map { |k, v| "#{k}=#{partition_value(table_schema, k, v)}" }.join(',')})"
80
+ end
81
+
82
+ def partition_value(table_schema, key, value)
83
+ return value if table_schema.partitions.detect { |x| x.name == key && x.type == :int }
84
+ "'#{value}'"
78
85
  end
79
86
 
80
87
  def load_file_to_hive_table(table_name, path, partition_clause = nil)
81
- request_txt =
82
- "load data local inpath '#{path}' into table #{table_name}"
83
- request_txt << " #{partition_clause}" unless partition_clause.nil?
84
- execute(request_txt)
88
+ request_txt = "load data local inpath '#{path}' into table #{table_name}"
89
+ return execute(request_txt) if partition_clause.nil?
90
+ execute("#{request_txt} #{partition_clause}")
85
91
  end
86
92
 
87
93
  def docker_path(file)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rbhive'
2
4
  require 'tempfile'
3
5
  require 'yaml'
@@ -16,11 +18,10 @@ module RSpec
16
18
  connection = open_connection
17
19
  connection.switch_database(db_name)
18
20
  @config.hive_options.each do |key, value|
19
- connection.execute("SET #{key}=#{value};")
21
+ connection.execute("SET #{key}=#{value}")
20
22
  end
21
23
 
22
24
  connection
23
-
24
25
  rescue Thrift::ApplicationException => e
25
26
  config.logger.fatal('An exception was thrown during start connection')
26
27
  config.logger.fatal(e)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'securerandom'
2
4
 
3
5
  module RSpec
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'retryable'
2
4
 
3
5
  module RSpec
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rspec/matchers'
2
4
 
3
5
  RSpec::Matchers.define :match_result_set do |expected|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'query_builder/row_transformer'
2
4
  require_relative 'query_builder/null_strategy'
3
5
  require_relative 'query_builder/value_by_type_strategy'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module RSpec
2
4
  module Hive
3
5
  class QueryBuilder
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'type_faker'
2
4
 
3
5
  module RSpec
@@ -23,12 +25,12 @@ module RSpec
23
25
 
24
26
  attr_reader :schema, :strategy
25
27
 
26
- HIVE_NIL = '\N'.freeze
28
+ HIVE_NIL = '\N'
27
29
 
28
30
  def array_row(row)
29
31
  size = schema.columns.size
30
32
  missing = size - row.size
31
- if missing > 0
33
+ if missing.positive?
32
34
  row_with_missing_columns(row)
33
35
  else
34
36
  row
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'faker'
2
4
 
3
5
  module RSpec
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'type_faker'
2
4
 
3
5
  module RSpec
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'query_builder'
2
4
 
3
5
  module RSpec
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rails'
2
4
 
3
5
  module RSpec
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'yaml'
2
4
  require 'colorize'
3
5
  require 'tmpdir'
@@ -23,7 +25,14 @@ namespace :spec do
23
25
  'docker_shared_directory_path' =>
24
26
  ENV['DOCKER_SHARED_DIR'] || default_config.docker_shared_directory_path,
25
27
  'hive_version' =>
26
- ENV['HIVE_VERSION'] || default_config.hive_version
28
+ ENV['HIVE_VERSION'] || default_config.hive_version,
29
+ 'hive_options' => {
30
+ 'hive.exec.dynamic.partition' => 'true',
31
+ 'hive.exec.dynamic.partition.mode' => 'nonstrict',
32
+ 'hive.exec.max.dynamic.partitions.pernode' => '100000',
33
+ 'hive.exec.max.dynamic.partitions' => '100000',
34
+ 'mapred.child.java.opts' => '-Xmx2048m'
35
+ }
27
36
  }
28
37
  }
29
38
  system 'mkdir', '-p', default_values['hive']['host_shared_directory_path']
@@ -42,14 +51,16 @@ namespace :spec do
42
51
  desc 'Runs docker using hive config file.'\
43
52
  ' It assumes your docker-machine is running.'
44
53
  task :run do
45
- fail 'Command `docker` not found.'.red unless system('which docker')
54
+ raise 'Command `docker` not found.'.red unless system('which docker')
46
55
 
47
56
  config_filepath = ENV['CONFIG_FILE'] || File.join('config', 'rspec-hive.yml')
48
- fail "There's no config file #{config_filepath} please"\
49
- "generate default or provide custom config.".red unless File.exist? config_filepath
57
+ unless File.exist? config_filepath
58
+ raise "There's no config file #{config_filepath} please"\
59
+ 'generate default or provide custom config.'.red
60
+ end
50
61
 
51
62
  interpolated = ERB.new(File.read(config_filepath)).result
52
- config = YAML.load(interpolated)['hive']
63
+ config = YAML.safe_load(interpolated)['hive']
53
64
 
54
65
  docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
55
66
  cmd = "docker run -v #{config['host_shared_directory_path']}:"\
@@ -62,7 +73,7 @@ namespace :spec do
62
73
 
63
74
  desc 'Downloads docker image from dockerhub.'
64
75
  task :download_image do
65
- fail 'Command `docker` not found.'.red unless system('which docker')
76
+ raise 'Command `docker` not found.'.red unless system('which docker')
66
77
 
67
78
  docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
68
79
 
@@ -76,36 +87,37 @@ namespace :spec do
76
87
  docker_conatiners = `docker ps`.lines
77
88
  if docker_conatiners.size != 2
78
89
  raise 'There is more than 1 instance of docker container running (or no running docker containers). '\
79
- 'Check `docker ps` and stop containers that are not in use right now or specify CONTAINER_ID and run this command again.'.red
90
+ 'Check `docker ps` and stop containers that are not in use right now or specify CONTAINER_ID '\
91
+ 'and run this command again.'.red
80
92
  else
81
93
  docker_conatiners[1].split[0]
82
94
  end
83
95
  end
84
96
 
85
97
  desc 'Load Hive UDFS (user defined functions) onto docker.'
86
- task :load_udfs, [:udfs_path] do |t, args|
98
+ task :load_udfs, [:udfs_path] do |_t, args|
87
99
  udfs_path = args[:udfs_path]
88
100
  config_filepath = ENV['CONFIG_FILE'] || File.join('config', 'rspec-hive.yml')
89
101
  interpolated = ERB.new(File.read(config_filepath)).result
90
- config = YAML.load(interpolated)['hive']
102
+ config = YAML.safe_load(interpolated)['hive']
91
103
 
92
104
  host_hive_udfs_path = File.join(config['host_shared_directory_path'], 'hive-udfs.jar')
93
- fail 'Please provide UDFS_PATH'.red unless udfs_path
105
+ raise 'Please provide UDFS_PATH'.red unless udfs_path
94
106
  if udfs_path.start_with?('s3://')
95
107
  puts 'Downloading from s3...'.yellow
96
108
  cmd = "aws s3 ls #{udfs_path}"
97
109
 
98
- fail 'awscli is not configured.'.red unless system(cmd)
110
+ raise 'awscli is not configured.'.red unless system(cmd)
99
111
  cmd = "aws s3 cp #{udfs_path} #{host_hive_udfs_path}"
100
- system(cmd)
101
112
  else
102
113
  puts 'Copying from local directory...'.yellow
103
114
  cmd = "cp #{udfs_path} #{host_hive_udfs_path}"
104
115
  end
116
+ system(cmd)
105
117
  puts 'Done'.green
106
118
 
107
119
  puts 'Copying to hadoop on docker...'.yellow
108
- cmd = "docker exec -it #{container_id} /bin/bash -c 'cp #{config['docker_shared_directory_path']}/hive-udfs.jar $HADOOP_HOME'"
120
+ cmd = "docker exec -it #{container_id} /bin/bash -c 'cp #{host_hive_udfs_path} $HADOOP_HOME'"
109
121
  system(cmd)
110
122
  puts 'Done'.green
111
123
  end
@@ -114,7 +126,8 @@ namespace :spec do
114
126
  desc 'Runs beeline console on hive.'
115
127
  task :beeline do
116
128
  puts "Connecting to docker container: #{container_id} and running beeline. To exit: '!q'".green
117
- cmd = "docker exec -it #{container_id} /bin/bash -c '$HIVE_HOME/bin/beeline -u jdbc:hive2://localhost:10000 -d org.apache.hive.jdbc.HiveDriver'"
129
+ bash_cmd = '$HIVE_HOME/bin/beeline -u jdbc:hive2://localhost:10000 -d org.apache.hive.jdbc.HiveDriver'
130
+ cmd = "docker exec -it #{container_id} /bin/bash -c #{bash_cmd}"
118
131
  system(cmd)
119
132
  end
120
133
  end
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module RSpec
2
4
  module Hive
3
- VERSION = '0.4.2'.freeze
5
+ VERSION = '0.6.0'
4
6
  end
5
7
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'exponential_backoff'
2
4
 
3
5
  module RSpec
@@ -1,4 +1,6 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
3
+
2
4
  lib = File.expand_path('../lib', __FILE__)
3
5
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
6
  require 'rspec/hive/version'
@@ -7,7 +9,7 @@ Gem::Specification.new do |spec|
7
9
  spec.name = 'rspec-hive'
8
10
  spec.version = RSpec::Hive::VERSION
9
11
  spec.authors = ['Wojtek Mielczarek', 'Mikołaj Nowak']
10
- spec.email = %w(wojtek.mielczarek@u2i.com mikolaj.nowak@u2i.com)
12
+ spec.email = %w[wojtek.mielczarek@u2i.com mikolaj.nowak@u2i.com]
11
13
  spec.summary = 'RSpec addition to test hive queries'
12
14
  spec.description = 'RSpecHive let you test your hive queries
13
15
  connecting to hive instance installed on docker'
@@ -19,21 +21,10 @@ Gem::Specification.new do |spec|
19
21
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
22
  spec.require_paths = ['lib']
21
23
 
22
- spec.add_dependency 'rake', ['>= 10.0', '< 12.0']
23
- spec.add_dependency 'colorize', '~> 0.7'
24
+ spec.add_dependency 'rake', ['>= 10.0', '< 13.0']
25
+ spec.add_dependency 'colorize', '~> 0.8.0'
24
26
  spec.add_dependency 'faker', '~> 1.6'
25
- spec.add_dependency 'retryable', '~> 2.0.3'
27
+ spec.add_dependency 'retryable', '~> 2.0'
26
28
  spec.add_dependency 'rspec', '~> 3.4'
27
- spec.add_dependency 'rbhive-u2i', '~> 1.0.0'
28
-
29
- spec.add_development_dependency 'bundler', '~> 1.7'
30
- spec.add_development_dependency 'rspec-its', '~> 1.2'
31
- spec.add_development_dependency 'rubocop', '~> 0.39'
32
- spec.add_development_dependency 'rubocop-rspec', '~> 1.4'
33
- spec.add_development_dependency 'guard', '~> 2.6'
34
- spec.add_development_dependency 'guard-rspec', '~> 4.3'
35
- spec.add_development_dependency 'guard-rubocop', '~> 1.2'
36
- spec.add_development_dependency 'codeclimate-test-reporter', '~> 0.4'
37
- spec.add_development_dependency 'pry'
38
- spec.add_development_dependency 'pry-byebug'
29
+ spec.add_dependency 'rbhive-u2i', '~> 1.0'
39
30
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
  require 'tempfile'
3
5
 
@@ -32,34 +34,21 @@ RSpec.describe RSpec::Hive::Configuration do
32
34
  end
33
35
  end
34
36
 
35
- let(:expected_host_shared_directory_path) do
36
- '/Users/Shared/tmp/spec-tmp-files'
37
- end
37
+ let(:expected_host_shared_directory_path) { '/Users/Shared/tmp/spec-tmp-files' }
38
38
  let(:expected_docker_shared_directory_path) { '/tmp/spec-tmp-files' }
39
- let(:expected_hive_version) { 10 }
40
- let(:expected_timeout) { 1800 }
41
- let(:expected_partition_flag) { 'true' }
42
- let(:expected_partition_mode) { 'nonstrict' }
43
- let(:expected_partiton_pernodexi) { '100000' }
44
- let(:expected_partitions) { '100000' }
45
- let(:expected_java_opts) { '-Xmx2048m' }
46
- let(:expected_hive_options) do
47
- {'hive.exec.dynamic.partition' => expected_partition_flag,
48
- 'hive.exec.dynamic.partition.mode' => expected_partition_mode,
49
- 'hive.exec.max.dynamic.partitions.pernodexi' => expected_partiton_pernodexi,
50
- 'hive.exec.max.dynamic.partitions' => expected_partitions,
51
- 'mapred.child.java.opts' => expected_java_opts}
52
- end
39
+ let(:expected_timeout) { 120 }
40
+ let(:expected_hive_options) { {} }
53
41
 
54
42
  context 'when no configuration file is provided' do
55
- let(:expected_port) { 10000 }
43
+ let(:expected_port) { 10_000 }
56
44
  let!(:original_host_os) { RbConfig::CONFIG['host_os'] }
45
+ let(:expected_hive_version) { described_class::DEFAULT_VERSION }
57
46
 
58
47
  before { allow(Dir).to receive(:mktmpdir) { mock_tmpdir } }
59
48
 
60
49
  context 'when on Mac' do
61
50
  let(:mock_tmpdir) { '/Users/Shared/test/' }
62
- let(:expected_host) { '192.168.99.100' }
51
+ let(:expected_host) { '127.0.0.1' }
63
52
  let(:expected_host_shared_directory_path) { '/Users/Shared/test/spec-tmp-files' }
64
53
 
65
54
  before do
@@ -88,64 +77,68 @@ RSpec.describe RSpec::Hive::Configuration do
88
77
 
89
78
  context 'when there is a configuration file' do
90
79
  let(:path_to_config_file) do
91
- Tempfile.open(%w(config .yml)) do |f|
80
+ Tempfile.open(%w[config .yml]) do |f|
92
81
  f.write yaml_hash.to_yaml
93
82
  f.path
94
83
  end
95
84
  end
96
85
  let(:expected_host) { '127.0.0.2' }
97
- let(:expected_port) { 10001 }
86
+ let(:expected_port) { 10_001 }
98
87
 
99
88
  context 'where all parameters are present' do
89
+ subject { described_class.new(path_to_config_file) }
90
+
91
+ let(:expected_hive_version) { 12 }
92
+
100
93
  let(:yaml_hash) do
101
94
  {
102
95
  'hive' =>
103
96
  {
104
97
  'host' => '127.0.0.2',
105
- 'port' => 10001,
98
+ 'port' => 10_001,
106
99
  'host_shared_directory_path' => expected_host_shared_directory_path,
107
100
  'docker_shared_directory_path' => expected_docker_shared_directory_path,
108
- 'hive_version' => '10',
109
- 'timeout' => 1800
101
+ 'hive_version' => '12',
102
+ 'timeout' => 120
110
103
  }
111
104
  }
112
105
  end
113
106
 
114
107
  after { File.unlink(path_to_config_file) }
115
108
 
116
- subject { described_class.new(path_to_config_file) }
117
-
118
109
  include_examples('config')
119
110
  end
120
111
 
121
112
  context 'where there are only required parameters' do
113
+ subject { described_class.new(path_to_config_file) }
114
+
115
+ let(:expected_hive_version) { described_class::DEFAULT_VERSION }
122
116
  let(:yaml_hash) do
123
117
  {
124
118
  'hive' =>
125
119
  {
126
120
  'host' => '127.0.0.2',
127
- 'port' => 10001,
121
+ 'port' => 10_001,
128
122
  'host_shared_directory_path' => expected_host_shared_directory_path,
129
123
  'docker_shared_directory_path' => expected_docker_shared_directory_path
130
124
  }
131
125
  }
132
126
  end
133
- let(:expected_hive_version) { 10 }
134
127
 
135
128
  after { File.unlink(path_to_config_file) }
136
129
 
137
- subject { described_class.new(path_to_config_file) }
138
-
139
130
  include_examples('config')
140
131
  end
141
132
 
142
133
  context 'where there are some parameters required and optional' do
134
+ subject { described_class.new(path_to_config_file) }
135
+
143
136
  let(:yaml_hash) do
144
137
  {
145
138
  'hive' =>
146
139
  {
147
140
  'host' => '127.0.0.2',
148
- 'port' => 10001,
141
+ 'port' => 10_001,
149
142
  'host_shared_directory_path' => expected_host_shared_directory_path,
150
143
  'docker_shared_directory_path' => expected_docker_shared_directory_path,
151
144
  'hive_version' => 11,
@@ -159,11 +152,14 @@ RSpec.describe RSpec::Hive::Configuration do
159
152
  let(:expected_timeout) { 60 }
160
153
  let(:expected_hive_version) { 11 }
161
154
  let(:expected_java_opts) { '-Xmx64m' }
155
+ let(:expected_hive_options) do
156
+ {
157
+ 'mapred.child.java.opts' => expected_java_opts
158
+ }
159
+ end
162
160
 
163
161
  after { File.unlink(path_to_config_file) }
164
162
 
165
- subject { described_class.new(path_to_config_file) }
166
-
167
163
  include_examples('config')
168
164
  end
169
165
  end