rspec-hive 0.4.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +39 -5
  4. data/.rubocop_todo.yml +40 -0
  5. data/.ruby-version +1 -1
  6. data/.travis.yml +4 -3
  7. data/Gemfile +18 -0
  8. data/Guardfile +10 -6
  9. data/README.md +15 -0
  10. data/Rakefile +4 -2
  11. data/docker/Dockerfile +38 -47
  12. data/docker/hive-site.xml +28 -0
  13. data/examples/lib/query.rb +2 -0
  14. data/examples/rspec-hive.yml.example +7 -1
  15. data/examples/spec/query_spec.rb +2 -0
  16. data/examples/spec/spec_helper.rb +2 -0
  17. data/lib/rspec/hive.rb +2 -0
  18. data/lib/rspec/hive/configuration.rb +14 -33
  19. data/lib/rspec/hive/connection_delegator.rb +19 -13
  20. data/lib/rspec/hive/connector.rb +3 -2
  21. data/lib/rspec/hive/db_name.rb +2 -0
  22. data/lib/rspec/hive/exponential_backoff.rb +2 -0
  23. data/lib/rspec/hive/matchers.rb +2 -0
  24. data/lib/rspec/hive/query_builder.rb +2 -0
  25. data/lib/rspec/hive/query_builder/null_strategy.rb +2 -0
  26. data/lib/rspec/hive/query_builder/row_transformer.rb +4 -2
  27. data/lib/rspec/hive/query_builder/type_faker.rb +2 -0
  28. data/lib/rspec/hive/query_builder/value_by_type_strategy.rb +2 -0
  29. data/lib/rspec/hive/query_builder_helper.rb +2 -0
  30. data/lib/rspec/hive/railtie.rb +2 -0
  31. data/lib/rspec/hive/rake_tasks/docker.rake +27 -14
  32. data/lib/rspec/hive/version.rb +3 -1
  33. data/lib/rspec/hive/with_hive_connection.rb +2 -0
  34. data/rspec-hive.gemspec +7 -16
  35. data/spec/lib/rspec/hive/configuration_spec.rb +29 -33
  36. data/spec/lib/rspec/hive/connection_delegator_spec.rb +58 -138
  37. data/spec/lib/rspec/hive/connector_spec.rb +33 -38
  38. data/spec/lib/rspec/hive/db_name_spec.rb +4 -2
  39. data/spec/lib/rspec/hive/matchers_spec.rb +2 -0
  40. data/spec/lib/rspec/hive/query_builder/row_transformer_spec.rb +6 -4
  41. data/spec/lib/rspec/hive/query_builder_helper_spec.rb +8 -6
  42. data/spec/lib/rspec/hive/query_builder_spec.rb +15 -17
  43. data/spec/lib/rspec/hive_spec.rb +15 -22
  44. metadata +13 -154
  45. data/.rubocop_u2i.yml +0 -63
  46. data/spec/.rubocop.yml +0 -4
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'delegate'
2
4
  require 'tempfile'
3
5
 
@@ -15,9 +17,9 @@ module RSpec
15
17
  execute(table_schema.create_table_statement)
16
18
  end
17
19
 
18
- def load_partitions(table_name, partitions)
19
- partitions = partition_clause(partitions)
20
- query = "ALTER TABLE #{table_name} ADD #{partitions}"
20
+ def load_partitions(table_schema, partitions)
21
+ partitions = partition_clause(table_schema, partitions)
22
+ query = "ALTER TABLE #{table_schema.name} ADD #{partitions}"
21
23
  execute(query)
22
24
  end
23
25
 
@@ -29,7 +31,7 @@ module RSpec
29
31
  values,
30
32
  table_schema.instance_variable_get(:@field_sep)
31
33
  )
32
- partition_query = partition_clause(partitions) if partitions
34
+ partition_query = partition_clause(table_schema, partitions) if partitions
33
35
  load_file_to_hive_table(
34
36
  table_name,
35
37
  docker_path(file),
@@ -65,23 +67,27 @@ module RSpec
65
67
 
66
68
  private
67
69
 
68
- def partition_clause(partitions)
70
+ def partition_clause(table_schema, partitions)
69
71
  if partitions.is_a?(Array)
70
- partitions.collect { |x| to_partition_clause(x) }.join(' ')
72
+ partitions.collect { |x| to_partition_clause(table_schema, x) }.join(' ')
71
73
  else
72
- to_partition_clause(partitions)
74
+ to_partition_clause(table_schema, partitions)
73
75
  end
74
76
  end
75
77
 
76
- def to_partition_clause(partition)
77
- "PARTITION(#{partition.map { |k, v| "#{k}='#{v}'" }.join(',')})"
78
+ def to_partition_clause(table_schema, partition)
79
+ "PARTITION(#{partition.map { |k, v| "#{k}=#{partition_value(table_schema, k, v)}" }.join(',')})"
80
+ end
81
+
82
+ def partition_value(table_schema, key, value)
83
+ return value if table_schema.partitions.detect { |x| x.name == key && x.type == :int }
84
+ "'#{value}'"
78
85
  end
79
86
 
80
87
  def load_file_to_hive_table(table_name, path, partition_clause = nil)
81
- request_txt =
82
- "load data local inpath '#{path}' into table #{table_name}"
83
- request_txt << " #{partition_clause}" unless partition_clause.nil?
84
- execute(request_txt)
88
+ request_txt = "load data local inpath '#{path}' into table #{table_name}"
89
+ return execute(request_txt) if partition_clause.nil?
90
+ execute("#{request_txt} #{partition_clause}")
85
91
  end
86
92
 
87
93
  def docker_path(file)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rbhive'
2
4
  require 'tempfile'
3
5
  require 'yaml'
@@ -16,11 +18,10 @@ module RSpec
16
18
  connection = open_connection
17
19
  connection.switch_database(db_name)
18
20
  @config.hive_options.each do |key, value|
19
- connection.execute("SET #{key}=#{value};")
21
+ connection.execute("SET #{key}=#{value}")
20
22
  end
21
23
 
22
24
  connection
23
-
24
25
  rescue Thrift::ApplicationException => e
25
26
  config.logger.fatal('An exception was thrown during start connection')
26
27
  config.logger.fatal(e)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'securerandom'
2
4
 
3
5
  module RSpec
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'retryable'
2
4
 
3
5
  module RSpec
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rspec/matchers'
2
4
 
3
5
  RSpec::Matchers.define :match_result_set do |expected|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'query_builder/row_transformer'
2
4
  require_relative 'query_builder/null_strategy'
3
5
  require_relative 'query_builder/value_by_type_strategy'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module RSpec
2
4
  module Hive
3
5
  class QueryBuilder
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'type_faker'
2
4
 
3
5
  module RSpec
@@ -23,12 +25,12 @@ module RSpec
23
25
 
24
26
  attr_reader :schema, :strategy
25
27
 
26
- HIVE_NIL = '\N'.freeze
28
+ HIVE_NIL = '\N'
27
29
 
28
30
  def array_row(row)
29
31
  size = schema.columns.size
30
32
  missing = size - row.size
31
- if missing > 0
33
+ if missing.positive?
32
34
  row_with_missing_columns(row)
33
35
  else
34
36
  row
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'faker'
2
4
 
3
5
  module RSpec
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'type_faker'
2
4
 
3
5
  module RSpec
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'query_builder'
2
4
 
3
5
  module RSpec
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rails'
2
4
 
3
5
  module RSpec
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'yaml'
2
4
  require 'colorize'
3
5
  require 'tmpdir'
@@ -23,7 +25,14 @@ namespace :spec do
23
25
  'docker_shared_directory_path' =>
24
26
  ENV['DOCKER_SHARED_DIR'] || default_config.docker_shared_directory_path,
25
27
  'hive_version' =>
26
- ENV['HIVE_VERSION'] || default_config.hive_version
28
+ ENV['HIVE_VERSION'] || default_config.hive_version,
29
+ 'hive_options' => {
30
+ 'hive.exec.dynamic.partition' => 'true',
31
+ 'hive.exec.dynamic.partition.mode' => 'nonstrict',
32
+ 'hive.exec.max.dynamic.partitions.pernode' => '100000',
33
+ 'hive.exec.max.dynamic.partitions' => '100000',
34
+ 'mapred.child.java.opts' => '-Xmx2048m'
35
+ }
27
36
  }
28
37
  }
29
38
  system 'mkdir', '-p', default_values['hive']['host_shared_directory_path']
@@ -42,14 +51,16 @@ namespace :spec do
42
51
  desc 'Runs docker using hive config file.'\
43
52
  ' It assumes your docker-machine is running.'
44
53
  task :run do
45
- fail 'Command `docker` not found.'.red unless system('which docker')
54
+ raise 'Command `docker` not found.'.red unless system('which docker')
46
55
 
47
56
  config_filepath = ENV['CONFIG_FILE'] || File.join('config', 'rspec-hive.yml')
48
- fail "There's no config file #{config_filepath} please"\
49
- "generate default or provide custom config.".red unless File.exist? config_filepath
57
+ unless File.exist? config_filepath
58
+ raise "There's no config file #{config_filepath} please"\
59
+ 'generate default or provide custom config.'.red
60
+ end
50
61
 
51
62
  interpolated = ERB.new(File.read(config_filepath)).result
52
- config = YAML.load(interpolated)['hive']
63
+ config = YAML.safe_load(interpolated)['hive']
53
64
 
54
65
  docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
55
66
  cmd = "docker run -v #{config['host_shared_directory_path']}:"\
@@ -62,7 +73,7 @@ namespace :spec do
62
73
 
63
74
  desc 'Downloads docker image from dockerhub.'
64
75
  task :download_image do
65
- fail 'Command `docker` not found.'.red unless system('which docker')
76
+ raise 'Command `docker` not found.'.red unless system('which docker')
66
77
 
67
78
  docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
68
79
 
@@ -76,36 +87,37 @@ namespace :spec do
76
87
  docker_conatiners = `docker ps`.lines
77
88
  if docker_conatiners.size != 2
78
89
  raise 'There is more than 1 instance of docker container running (or no running docker containers). '\
79
- 'Check `docker ps` and stop containers that are not in use right now or specify CONTAINER_ID and run this command again.'.red
90
+ 'Check `docker ps` and stop containers that are not in use right now or specify CONTAINER_ID '\
91
+ 'and run this command again.'.red
80
92
  else
81
93
  docker_conatiners[1].split[0]
82
94
  end
83
95
  end
84
96
 
85
97
  desc 'Load Hive UDFS (user defined functions) onto docker.'
86
- task :load_udfs, [:udfs_path] do |t, args|
98
+ task :load_udfs, [:udfs_path] do |_t, args|
87
99
  udfs_path = args[:udfs_path]
88
100
  config_filepath = ENV['CONFIG_FILE'] || File.join('config', 'rspec-hive.yml')
89
101
  interpolated = ERB.new(File.read(config_filepath)).result
90
- config = YAML.load(interpolated)['hive']
102
+ config = YAML.safe_load(interpolated)['hive']
91
103
 
92
104
  host_hive_udfs_path = File.join(config['host_shared_directory_path'], 'hive-udfs.jar')
93
- fail 'Please provide UDFS_PATH'.red unless udfs_path
105
+ raise 'Please provide UDFS_PATH'.red unless udfs_path
94
106
  if udfs_path.start_with?('s3://')
95
107
  puts 'Downloading from s3...'.yellow
96
108
  cmd = "aws s3 ls #{udfs_path}"
97
109
 
98
- fail 'awscli is not configured.'.red unless system(cmd)
110
+ raise 'awscli is not configured.'.red unless system(cmd)
99
111
  cmd = "aws s3 cp #{udfs_path} #{host_hive_udfs_path}"
100
- system(cmd)
101
112
  else
102
113
  puts 'Copying from local directory...'.yellow
103
114
  cmd = "cp #{udfs_path} #{host_hive_udfs_path}"
104
115
  end
116
+ system(cmd)
105
117
  puts 'Done'.green
106
118
 
107
119
  puts 'Copying to hadoop on docker...'.yellow
108
- cmd = "docker exec -it #{container_id} /bin/bash -c 'cp #{config['docker_shared_directory_path']}/hive-udfs.jar $HADOOP_HOME'"
120
+ cmd = "docker exec -it #{container_id} /bin/bash -c 'cp #{host_hive_udfs_path} $HADOOP_HOME'"
109
121
  system(cmd)
110
122
  puts 'Done'.green
111
123
  end
@@ -114,7 +126,8 @@ namespace :spec do
114
126
  desc 'Runs beeline console on hive.'
115
127
  task :beeline do
116
128
  puts "Connecting to docker container: #{container_id} and running beeline. To exit: '!q'".green
117
- cmd = "docker exec -it #{container_id} /bin/bash -c '$HIVE_HOME/bin/beeline -u jdbc:hive2://localhost:10000 -d org.apache.hive.jdbc.HiveDriver'"
129
+ bash_cmd = '$HIVE_HOME/bin/beeline -u jdbc:hive2://localhost:10000 -d org.apache.hive.jdbc.HiveDriver'
130
+ cmd = "docker exec -it #{container_id} /bin/bash -c #{bash_cmd}"
118
131
  system(cmd)
119
132
  end
120
133
  end
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module RSpec
2
4
  module Hive
3
- VERSION = '0.4.2'.freeze
5
+ VERSION = '0.6.0'
4
6
  end
5
7
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'exponential_backoff'
2
4
 
3
5
  module RSpec
@@ -1,4 +1,6 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
3
+
2
4
  lib = File.expand_path('../lib', __FILE__)
3
5
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
6
  require 'rspec/hive/version'
@@ -7,7 +9,7 @@ Gem::Specification.new do |spec|
7
9
  spec.name = 'rspec-hive'
8
10
  spec.version = RSpec::Hive::VERSION
9
11
  spec.authors = ['Wojtek Mielczarek', 'Mikołaj Nowak']
10
- spec.email = %w(wojtek.mielczarek@u2i.com mikolaj.nowak@u2i.com)
12
+ spec.email = %w[wojtek.mielczarek@u2i.com mikolaj.nowak@u2i.com]
11
13
  spec.summary = 'RSpec addition to test hive queries'
12
14
  spec.description = 'RSpecHive let you test your hive queries
13
15
  connecting to hive instance installed on docker'
@@ -19,21 +21,10 @@ Gem::Specification.new do |spec|
19
21
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
22
  spec.require_paths = ['lib']
21
23
 
22
- spec.add_dependency 'rake', ['>= 10.0', '< 12.0']
23
- spec.add_dependency 'colorize', '~> 0.7'
24
+ spec.add_dependency 'rake', ['>= 10.0', '< 13.0']
25
+ spec.add_dependency 'colorize', '~> 0.8.0'
24
26
  spec.add_dependency 'faker', '~> 1.6'
25
- spec.add_dependency 'retryable', '~> 2.0.3'
27
+ spec.add_dependency 'retryable', '~> 2.0'
26
28
  spec.add_dependency 'rspec', '~> 3.4'
27
- spec.add_dependency 'rbhive-u2i', '~> 1.0.0'
28
-
29
- spec.add_development_dependency 'bundler', '~> 1.7'
30
- spec.add_development_dependency 'rspec-its', '~> 1.2'
31
- spec.add_development_dependency 'rubocop', '~> 0.39'
32
- spec.add_development_dependency 'rubocop-rspec', '~> 1.4'
33
- spec.add_development_dependency 'guard', '~> 2.6'
34
- spec.add_development_dependency 'guard-rspec', '~> 4.3'
35
- spec.add_development_dependency 'guard-rubocop', '~> 1.2'
36
- spec.add_development_dependency 'codeclimate-test-reporter', '~> 0.4'
37
- spec.add_development_dependency 'pry'
38
- spec.add_development_dependency 'pry-byebug'
29
+ spec.add_dependency 'rbhive-u2i', '~> 1.0'
39
30
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
  require 'tempfile'
3
5
 
@@ -32,34 +34,21 @@ RSpec.describe RSpec::Hive::Configuration do
32
34
  end
33
35
  end
34
36
 
35
- let(:expected_host_shared_directory_path) do
36
- '/Users/Shared/tmp/spec-tmp-files'
37
- end
37
+ let(:expected_host_shared_directory_path) { '/Users/Shared/tmp/spec-tmp-files' }
38
38
  let(:expected_docker_shared_directory_path) { '/tmp/spec-tmp-files' }
39
- let(:expected_hive_version) { 10 }
40
- let(:expected_timeout) { 1800 }
41
- let(:expected_partition_flag) { 'true' }
42
- let(:expected_partition_mode) { 'nonstrict' }
43
- let(:expected_partiton_pernodexi) { '100000' }
44
- let(:expected_partitions) { '100000' }
45
- let(:expected_java_opts) { '-Xmx2048m' }
46
- let(:expected_hive_options) do
47
- {'hive.exec.dynamic.partition' => expected_partition_flag,
48
- 'hive.exec.dynamic.partition.mode' => expected_partition_mode,
49
- 'hive.exec.max.dynamic.partitions.pernodexi' => expected_partiton_pernodexi,
50
- 'hive.exec.max.dynamic.partitions' => expected_partitions,
51
- 'mapred.child.java.opts' => expected_java_opts}
52
- end
39
+ let(:expected_timeout) { 120 }
40
+ let(:expected_hive_options) { {} }
53
41
 
54
42
  context 'when no configuration file is provided' do
55
- let(:expected_port) { 10000 }
43
+ let(:expected_port) { 10_000 }
56
44
  let!(:original_host_os) { RbConfig::CONFIG['host_os'] }
45
+ let(:expected_hive_version) { described_class::DEFAULT_VERSION }
57
46
 
58
47
  before { allow(Dir).to receive(:mktmpdir) { mock_tmpdir } }
59
48
 
60
49
  context 'when on Mac' do
61
50
  let(:mock_tmpdir) { '/Users/Shared/test/' }
62
- let(:expected_host) { '192.168.99.100' }
51
+ let(:expected_host) { '127.0.0.1' }
63
52
  let(:expected_host_shared_directory_path) { '/Users/Shared/test/spec-tmp-files' }
64
53
 
65
54
  before do
@@ -88,64 +77,68 @@ RSpec.describe RSpec::Hive::Configuration do
88
77
 
89
78
  context 'when there is a configuration file' do
90
79
  let(:path_to_config_file) do
91
- Tempfile.open(%w(config .yml)) do |f|
80
+ Tempfile.open(%w[config .yml]) do |f|
92
81
  f.write yaml_hash.to_yaml
93
82
  f.path
94
83
  end
95
84
  end
96
85
  let(:expected_host) { '127.0.0.2' }
97
- let(:expected_port) { 10001 }
86
+ let(:expected_port) { 10_001 }
98
87
 
99
88
  context 'where all parameters are present' do
89
+ subject { described_class.new(path_to_config_file) }
90
+
91
+ let(:expected_hive_version) { 12 }
92
+
100
93
  let(:yaml_hash) do
101
94
  {
102
95
  'hive' =>
103
96
  {
104
97
  'host' => '127.0.0.2',
105
- 'port' => 10001,
98
+ 'port' => 10_001,
106
99
  'host_shared_directory_path' => expected_host_shared_directory_path,
107
100
  'docker_shared_directory_path' => expected_docker_shared_directory_path,
108
- 'hive_version' => '10',
109
- 'timeout' => 1800
101
+ 'hive_version' => '12',
102
+ 'timeout' => 120
110
103
  }
111
104
  }
112
105
  end
113
106
 
114
107
  after { File.unlink(path_to_config_file) }
115
108
 
116
- subject { described_class.new(path_to_config_file) }
117
-
118
109
  include_examples('config')
119
110
  end
120
111
 
121
112
  context 'where there are only required parameters' do
113
+ subject { described_class.new(path_to_config_file) }
114
+
115
+ let(:expected_hive_version) { described_class::DEFAULT_VERSION }
122
116
  let(:yaml_hash) do
123
117
  {
124
118
  'hive' =>
125
119
  {
126
120
  'host' => '127.0.0.2',
127
- 'port' => 10001,
121
+ 'port' => 10_001,
128
122
  'host_shared_directory_path' => expected_host_shared_directory_path,
129
123
  'docker_shared_directory_path' => expected_docker_shared_directory_path
130
124
  }
131
125
  }
132
126
  end
133
- let(:expected_hive_version) { 10 }
134
127
 
135
128
  after { File.unlink(path_to_config_file) }
136
129
 
137
- subject { described_class.new(path_to_config_file) }
138
-
139
130
  include_examples('config')
140
131
  end
141
132
 
142
133
  context 'where there are some parameters required and optional' do
134
+ subject { described_class.new(path_to_config_file) }
135
+
143
136
  let(:yaml_hash) do
144
137
  {
145
138
  'hive' =>
146
139
  {
147
140
  'host' => '127.0.0.2',
148
- 'port' => 10001,
141
+ 'port' => 10_001,
149
142
  'host_shared_directory_path' => expected_host_shared_directory_path,
150
143
  'docker_shared_directory_path' => expected_docker_shared_directory_path,
151
144
  'hive_version' => 11,
@@ -159,11 +152,14 @@ RSpec.describe RSpec::Hive::Configuration do
159
152
  let(:expected_timeout) { 60 }
160
153
  let(:expected_hive_version) { 11 }
161
154
  let(:expected_java_opts) { '-Xmx64m' }
155
+ let(:expected_hive_options) do
156
+ {
157
+ 'mapred.child.java.opts' => expected_java_opts
158
+ }
159
+ end
162
160
 
163
161
  after { File.unlink(path_to_config_file) }
164
162
 
165
- subject { described_class.new(path_to_config_file) }
166
-
167
163
  include_examples('config')
168
164
  end
169
165
  end