rspec-hive 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +0 -1
  3. data/.rubocop.yml +13 -1
  4. data/.rubocop_u2i.yml +9 -1
  5. data/.travis.yml +19 -0
  6. data/Guardfile +12 -1
  7. data/README.md +3 -1
  8. data/Rakefile +11 -2
  9. data/examples/{query.rb → lib/query.rb} +2 -5
  10. data/examples/{hive_tests_config.yml.example → rspec-hive.yml.example} +0 -0
  11. data/examples/spec/query_spec.rb +103 -0
  12. data/examples/spec/spec_helper.rb +22 -0
  13. data/lib/rspec/hive.rb +7 -2
  14. data/lib/rspec/hive/connection_delegator.rb +17 -5
  15. data/lib/rspec/hive/connector.rb +0 -1
  16. data/lib/rspec/hive/exponential_backoff.rb +15 -0
  17. data/lib/rspec/hive/matchers.rb +29 -0
  18. data/lib/rspec/hive/query_builder.rb +83 -0
  19. data/lib/rspec/hive/query_builder/null_strategy.rb +11 -0
  20. data/lib/rspec/hive/query_builder/row_transformer.rb +63 -0
  21. data/lib/rspec/hive/query_builder/type_faker.rb +36 -0
  22. data/lib/rspec/hive/query_builder/value_by_type_strategy.rb +13 -0
  23. data/lib/rspec/hive/query_builder_helper.rb +24 -0
  24. data/lib/rspec/{rake_tasks → hive}/railtie.rb +2 -2
  25. data/lib/rspec/{rake_tasks → hive/rake_tasks}/docker.rake +0 -0
  26. data/lib/rspec/hive/version.rb +1 -1
  27. data/lib/rspec/hive/with_hive_connection.rb +16 -10
  28. data/rspec-hive.gemspec +6 -4
  29. data/spec/.rubocop.yml +4 -0
  30. data/spec/lib/rspec/hive/configuration_spec.rb +9 -6
  31. data/spec/lib/rspec/hive/connection_delegator_spec.rb +1 -1
  32. data/spec/lib/rspec/hive/connector_spec.rb +1 -1
  33. data/spec/lib/rspec/hive/db_name_spec.rb +1 -1
  34. data/spec/lib/rspec/hive/matchers_spec.rb +94 -0
  35. data/spec/lib/rspec/hive/query_builder/row_transformer_spec.rb +37 -0
  36. data/spec/lib/rspec/hive/query_builder_spec.rb +143 -0
  37. data/spec/lib/rspec/hive_spec.rb +1 -1
  38. data/spec/spec_helper.rb +2 -62
  39. metadata +68 -24
  40. data/examples/config_helper.rb +0 -1
  41. data/examples/query_spec.rb +0 -41
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ca84700fd2dd11920a2cf224d634f444a9e325b9
4
- data.tar.gz: ac29aa0c169716541e7df32d8ddcfed8ed8a005e
3
+ metadata.gz: e0ecaa7596db283e59f0ea63fbb677942e714d09
4
+ data.tar.gz: ae43c8734de7c85b0551454534fc1e339a74d2f5
5
5
  SHA512:
6
- metadata.gz: 5ec841bc2886f78ff101a18481ff622abf64c4ae5abb387441fab204d77327c805c2181f94201e850911cdb248ca3a06c3f1f097074abfcf70438eb2730626ac
7
- data.tar.gz: 4cc12280d1c740e9c8b349995def32422796ac4c96bd7061d2d13ef936bc1fe3fbeea78eb138807e2c1c4751b1de26eb171927ad5c14c2ea23c63493acbdb618
6
+ metadata.gz: 906bd8f828ba76a4646c535cee8679d60f3acf23714e8686c62789ee770d754f95f216930758d504bf57964a40c127e189b58dc17e03c674f12a283f181e83cf
7
+ data.tar.gz: 6cf54197ee350b05235eb08869226dd2b26c3e63f83764ad17356c5ef842379f8d5b5b2d4a8aafe63d212d91b02c91b78d64bcd0bb729c5bb37daf8a8da05084
data/.rspec CHANGED
@@ -1,2 +1 @@
1
1
  --color
2
- --require spec_helper
data/.rubocop.yml CHANGED
@@ -1,11 +1,23 @@
1
1
  inherit_from: .rubocop_u2i.yml
2
2
 
3
+ Rails:
4
+ Enabled: false
5
+
3
6
  AllCops:
7
+ TargetRubyVersion: 2.1
8
+ Include:
9
+ - '**/Rakefile'
4
10
  Exclude:
5
11
  - spec/spec_helper.rb
12
+ - vendor/**/*
13
+ - examples/query_spec.rb
6
14
 
7
15
  Metrics/LineLength:
8
- Max: 80
16
+ Max: 120
9
17
  Enabled: true
10
18
  Exclude:
11
19
  - spec/**/*
20
+ - examples/spec/**/*
21
+
22
+ Style/SignalException:
23
+ EnforcedStyle: only_raise
data/.rubocop_u2i.yml CHANGED
@@ -1,7 +1,9 @@
1
1
  require: rubocop-rspec
2
2
 
3
+ Rails:
4
+ Enabled: true
5
+
3
6
  AllCops:
4
- RunRailsCops: true
5
7
  Include:
6
8
  - '**/Rakefile'
7
9
  - config.ru
@@ -11,6 +13,7 @@ AllCops:
11
13
  - config/**/*
12
14
  - script/**/*
13
15
  - db/schema.rb
16
+ - vendor/**/*
14
17
 
15
18
  Style/MultilineOperationIndentation:
16
19
  EnforcedStyle: indented
@@ -53,3 +56,8 @@ Metrics/MethodLength:
53
56
  RSpec/FilePath:
54
57
  Exclude:
55
58
  - spec/routing/**/*_routing_spec.rb
59
+
60
+ RSpec/DescribeClass:
61
+ Exclude:
62
+ - spec/lib/rspec/hive/matchers_spec.rb
63
+
data/.travis.yml ADDED
@@ -0,0 +1,19 @@
1
+ sudo: false
2
+
3
+ services:
4
+ - docker
5
+ language: ruby
6
+ rvm:
7
+ - 2.1
8
+ - 2.2
9
+ - 2.3.0
10
+ env: CONFIG_FILE_NAME=rspec-hive.yml PORT=10000 CONFIG_FILE=rspec-hive.yml
11
+
12
+ before_script:
13
+ - bundle exec rake spec:hive:config:generate_default
14
+ - bundle exec rake spec:hive:docker:run
15
+
16
+ script:
17
+ - bundle exec rake spec
18
+ - bundle exec rake rubocop
19
+ - bundle exec rake hive_spec
data/Guardfile CHANGED
@@ -1,6 +1,11 @@
1
1
  # A sample Guardfile
2
2
  # More info at https://github.com/guard/guard#readme
3
3
 
4
+ detect_docker = <<-BASH
5
+ CONTAINER_IDS=`docker ps -q --filter='ancestor=nielsensocial/hive' 2> /dev/null | xargs`
6
+ docker inspect --format='{{ .State.Running }}' $CONTAINER_IDS 2> /dev/null | uniq | grep true 2>&1 > /dev/null
7
+ BASH
8
+
4
9
  group :red_green_refactor, halt_on_fail: true do
5
10
  guard :rspec, cmd: 'bundle exec rspec' do
6
11
  watch(%r{^spec/.+_spec\.rb$})
@@ -8,7 +13,13 @@ group :red_green_refactor, halt_on_fail: true do
8
13
  watch('spec/spec_helper.rb') { 'spec' }
9
14
  end
10
15
 
11
- guard :rubocop do
16
+ guard :rspec, cmd: 'bundle exec rspec', spec_paths: ['examples/spec'] do
17
+ watch(%r{^examples/spec/.+_spec\.rb$})
18
+ watch(%r{^examples/lib/(.+)\.rb$}) { |m| "examples/spec/#{m[1]}_spec.rb" }
19
+ watch('examples/spec/spec_helper.rb') { 'examples/spec' }
20
+ end if system(detect_docker)
21
+
22
+ guard :rubocop, all_on_start: false do
12
23
  watch(%r{.+\.rb$})
13
24
  watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
14
25
  end
data/README.md CHANGED
@@ -1,8 +1,10 @@
1
+ [![Build Status](https://travis-ci.org/u2i/rspec-hive.svg?branch=master)](https://travis-ci.org/u2i/rspec-hive)
2
+ [![Dependency Status](https://gemnasium.com/u2i/rspec-hive.svg)](https://gemnasium.com/u2i/rspec-hive)
1
3
  [![Code Climate](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/badges/85abbc07acb75f664185/gpa.svg)](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/feed)
2
4
  [![Test Coverage](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/badges/85abbc07acb75f664185/coverage.svg)](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/coverage)
3
5
  [![Issue Count](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/badges/85abbc07acb75f664185/issue_count.svg)](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/feed)
4
6
 
5
- # rpsec-hive
7
+ # rspec-hive
6
8
 
7
9
  rspec-hive is a utility gem to help you write beautiful rspec tests for hive queries. The idea is simple - you just launch a docker machine with hadoop and hive installed. To test a query you create a simple RSpec file and extend it with `RSpec::Hive::WithHiveConnection`.
8
10
 
data/Rakefile CHANGED
@@ -1,5 +1,5 @@
1
1
  require 'bundler/gem_tasks'
2
- load 'lib/rspec/rake_tasks/docker.rake'
2
+ load 'lib/rspec/hive/rake_tasks/docker.rake'
3
3
 
4
4
  begin
5
5
  require 'rspec/core/rake_task'
@@ -20,4 +20,13 @@ rescue LoadError
20
20
  puts 'Cannot load RuboCop Rake tasks'
21
21
  end
22
22
 
23
- task default: [:spec, :rubocop]
23
+ detect_docker = <<-BASH
24
+ CONTAINER_IDS=`docker ps -q --filter='ancestor=nielsensocial/hive' 2> /dev/null | xargs`
25
+ docker inspect --format='{{ .State.Running }}' $CONTAINER_IDS 2> /dev/null | uniq | grep true 2>&1 > /dev/null
26
+ BASH
27
+
28
+ if system(detect_docker)
29
+ task default: [:spec, :hive_spec, :rubocop]
30
+ else
31
+ task default: [:spec, :rubocop]
32
+ end
@@ -1,3 +1,5 @@
1
+ require 'rbhive'
2
+
1
3
  class Query
2
4
  def table_schema
3
5
  RBHive::TableSchema.new('people', nil, line_sep: '\n', field_sep: ';') do
@@ -10,9 +12,4 @@ class Query
10
12
  def table_name
11
13
  table_schema.name
12
14
  end
13
-
14
- def run_hive_query(connection)
15
- query = "SELECT * FROM `#{table_schema.name}` WHERE `amount` > 2.5"
16
- connection.execute(query)
17
- end
18
15
  end
@@ -0,0 +1,103 @@
1
+ require_relative 'spec_helper'
2
+ require_relative '../lib/query'
3
+
4
+ RSpec.describe Query do
5
+ include RSpec::Hive::WithHiveConnection
6
+ include RSpec::Hive::QueryBuilderHelper
7
+
8
+ subject { described_class.new }
9
+
10
+ let(:schema) { subject.table_schema }
11
+
12
+ before { connection.execute(schema.create_table_statement) }
13
+
14
+ context 'if we have a partition' do
15
+ let(:schema) do
16
+ RBHive::TableSchema.new('partitioned_people', nil, line_sep: '\n', field_sep: ';') do
17
+ column :name, :string
18
+ column :address, :string
19
+ column :amount, :float
20
+ partition :dth, :int
21
+ end
22
+ end
23
+ let(:input_data) do
24
+ [
25
+ ['Mikolaj', 'Cos', 1.23, 1],
26
+ ['Wojtek', 'Cos', 3.76, 2]
27
+ ]
28
+ end
29
+ let(:dth) { '2016042210' }
30
+ let(:query) { "SELECT * FROM `#{schema.name}` WHERE amount > 3.2" }
31
+ let(:query_result) { connection.fetch(query) }
32
+ let(:expected_result_array) do
33
+ [[a_string_matching('Wojtek'), 'Cos', be_within(0.01).of(3.76), dth]]
34
+ end
35
+
36
+ before do
37
+ connection.execute("ALTER TABLE #{schema.name} ADD PARTITION (dth='#{dth}')")
38
+ into_hive(schema).insert(*input_data).partition(dth: dth).execute
39
+ end
40
+
41
+ it 'returns Wojtek' do
42
+ expect(query_result).to match_result_set(expected_result_array)
43
+ end
44
+ end
45
+
46
+ context 'without stubbing strategy' do
47
+ let(:input_data) do
48
+ [
49
+ ['Mikolaj', 'Cos', 1.23, 1],
50
+ ['Wojtek', 'Cos', 3.76, 2]
51
+ ]
52
+ end
53
+ let(:query_result) { connection.fetch(query) }
54
+
55
+ before { into_hive(schema).insert(*input_data).execute }
56
+
57
+ context 'when querying for amount > 3.2' do
58
+ let(:query) { "SELECT * FROM `#{subject.table_name}` WHERE amount > 3.2" }
59
+ let(:expected_result_array) do
60
+ [
61
+ [a_string_matching('Wojtek'), 'Cos', be_within(0.01).of(3.76)]
62
+ ]
63
+ end
64
+
65
+ it 'returns Wojtek' do
66
+ expect(query_result).to match_result_set(expected_result_array)
67
+ end
68
+ end
69
+ end
70
+
71
+ context 'with stubbing strategy' do
72
+ let(:input_data) { [{name: 'Michal'}, {name: 'Wojtek'}] }
73
+ let(:query_result) { connection.fetch(query) }
74
+
75
+ before { into_hive(schema).insert(*input_data).with_stubbing.execute }
76
+
77
+ context "when querying for name = 'Wojtek'" do
78
+ let(:query) do
79
+ "SELECT * FROM `#{subject.table_name}` WHERE name='Wojtek'"
80
+ end
81
+ let(:expected_result_array) do
82
+ [
83
+ ['Wojtek', a_kind_of(String), a_kind_of(Float)]
84
+ ]
85
+ end
86
+
87
+ it 'returns Wojtek' do
88
+ expect(query_result).to match_result_set(expected_result_array)
89
+ end
90
+ end
91
+
92
+ context 'when querying for name = Michal' do
93
+ let(:query) do
94
+ "SELECT * FROM `#{subject.table_name}` WHERE name='Michal'"
95
+ end
96
+ let(:expected_result_hash) { [{name: 'Michal'}] }
97
+
98
+ it 'returns Michal' do
99
+ expect(query_result).to match_result_set(expected_result_hash).partially
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,22 @@
1
+ if ENV['COVERAGE']
2
+ require 'simplecov'
3
+ SimpleCov.start
4
+ require 'simplecov'
5
+ SimpleCov.start
6
+ end
7
+
8
+ RSpec.configure do |config|
9
+ require 'rspec/hive'
10
+
11
+ RSpec::Hive.configure(File.join(__dir__, '../../rspec-hive.yml'))
12
+
13
+ config.mock_with :rspec do |mocks|
14
+ mocks.verify_partial_doubles = true
15
+ end
16
+
17
+ config.pattern = '*_spec.rb'
18
+
19
+ config.disable_monkey_patching!
20
+ config.order = :random
21
+ Kernel.srand config.seed
22
+ end
data/lib/rspec/hive.rb CHANGED
@@ -4,10 +4,15 @@ require 'rspec/hive/configuration'
4
4
  require 'rspec/hive/connection_delegator'
5
5
  require 'rspec/hive/connector'
6
6
  require 'rspec/hive/with_hive_connection'
7
+ require 'rspec/hive/query_builder'
8
+ require 'rspec/hive/query_builder_helper'
9
+ require 'rspec/hive/matchers'
7
10
 
8
11
  module RSpec
9
12
  module Hive
10
- attr_reader :configuration
13
+ def self.configuration
14
+ @configuration
15
+ end
11
16
 
12
17
  def self.configure(file_name = nil)
13
18
  @configuration = new_configuration(file_name)
@@ -28,4 +33,4 @@ module RSpec
28
33
  end
29
34
  end
30
35
 
31
- require 'rspec/rake_tasks/railtie' if defined?(Rails)
36
+ require 'rspec/hive/railtie' if defined?(::Rails)
@@ -24,9 +24,17 @@ module RSpec
24
24
  def load_into_table(table_schema, values, partitions = nil)
25
25
  table_name = table_schema.name
26
26
  Tempfile.open(table_name, @config.host_shared_directory_path) do |file|
27
- write_values_to_file(file, values, table_schema.instance_variable_get(:@field_sep))
27
+ write_values_to_file(
28
+ file,
29
+ values,
30
+ table_schema.instance_variable_get(:@field_sep)
31
+ )
28
32
  partition_query = partition_clause(partitions) if partitions
29
- load_file_to_hive_table(table_name, docker_path(file), partition_query)
33
+ load_file_to_hive_table(
34
+ table_name,
35
+ docker_path(file),
36
+ partition_query
37
+ )
30
38
  end
31
39
  end
32
40
 
@@ -70,16 +78,20 @@ module RSpec
70
78
  end
71
79
 
72
80
  def load_file_to_hive_table(table_name, path, partition_clause = nil)
73
- request_txt = "load data local inpath '#{path}' into table #{table_name}"
81
+ request_txt =
82
+ "load data local inpath '#{path}' into table #{table_name}"
74
83
  request_txt << " #{partition_clause}" unless partition_clause.nil?
75
84
  execute(request_txt)
76
85
  end
77
86
 
78
87
  def docker_path(file)
79
- File.join(@config.docker_shared_directory_path, File.basename(file.path))
88
+ File.join(
89
+ @config.docker_shared_directory_path,
90
+ File.basename(file.path)
91
+ )
80
92
  end
81
93
 
82
- def write_values_to_file(file, values, delimiter=';')
94
+ def write_values_to_file(file, values, delimiter = ';')
83
95
  values.each { |value| file.puts(value.join(delimiter)) }
84
96
  file.flush
85
97
  end
@@ -1,7 +1,6 @@
1
1
  require 'rbhive'
2
2
  require 'tempfile'
3
3
  require 'yaml'
4
- require 'pry'
5
4
 
6
5
  module RSpec
7
6
  module Hive
@@ -0,0 +1,15 @@
1
+ require 'retryable'
2
+
3
+ module RSpec
4
+ module Hive
5
+ class ExponentialBackoff
6
+ class << self
7
+ def retryable(tries: 5, on:)
8
+ Retryable.retryable(tries: tries, sleep: ->(r) { 2**r }, on: on) do
9
+ yield
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,29 @@
1
+ require 'rspec/matchers'
2
+
3
+ RSpec::Matchers.define :match_result_set do |expected|
4
+ match do |actual|
5
+ return false if expected.size != actual.size
6
+
7
+ expected.each.with_index.all? do |expected_row, i|
8
+ if expected_row.respond_to?(:each_pair)
9
+ if @partial_match
10
+ selected_actual_values = actual[i].values_at(*expected_row.keys)
11
+ values_match?(expected_row.values, selected_actual_values)
12
+ else
13
+ values_match?(expected_row, actual[i])
14
+ end
15
+ elsif expected_row.respond_to?(:each)
16
+ raise ArgumentError, "Can't use partially matcher with Arrays" if @partial_match
17
+ values_match?(expected_row, actual[i].values)
18
+ else
19
+ raise ArgumentError, 'Unknown type'
20
+ end
21
+ end
22
+ end
23
+
24
+ chain :partially do
25
+ @partial_match = true
26
+ end
27
+
28
+ diffable
29
+ end
@@ -0,0 +1,83 @@
1
+ require_relative 'query_builder/row_transformer'
2
+ require_relative 'query_builder/null_strategy'
3
+ require_relative 'query_builder/value_by_type_strategy'
4
+
5
+ module RSpec
6
+ module Hive
7
+ class QueryBuilder
8
+ def initialize(schema, connection)
9
+ @schema = schema
10
+ @connection = connection
11
+ @partition_hash = {}
12
+ @rows = []
13
+ @stubbing = false
14
+ end
15
+
16
+ def partition(hash)
17
+ spawn.partition!(hash)
18
+ end
19
+
20
+ def partition!(partition)
21
+ partition_hash.merge!(partition)
22
+ self
23
+ end
24
+
25
+ def insert(*new_rows)
26
+ spawn.insert!(new_rows)
27
+ end
28
+
29
+ def insert!(new_rows)
30
+ rows.concat(new_rows)
31
+ self
32
+ end
33
+
34
+ def execute
35
+ if partition_hash.empty?
36
+ connection.load_into_table(schema, transformed_rows)
37
+ else
38
+ connection.load_into_table(schema, transformed_rows, partition_hash)
39
+ end
40
+ end
41
+
42
+ def with_stubbing
43
+ spawn.with_stubbing!
44
+ end
45
+
46
+ def with_stubbing!
47
+ self.stubbing = true
48
+ self
49
+ end
50
+
51
+ protected
52
+
53
+ attr_accessor :partition_hash, :connection, :rows, :stubbing
54
+
55
+ private
56
+
57
+ attr_reader :schema
58
+
59
+ def spawn
60
+ clone
61
+ end
62
+
63
+ def stubbing?
64
+ stubbing
65
+ end
66
+
67
+ def transformed_rows
68
+ transformer = RowTransformer.new(schema, missing_column_strategy)
69
+ rows.map do |row|
70
+ transformer.transform(row)
71
+ end
72
+ end
73
+
74
+ def missing_column_strategy
75
+ if stubbing?
76
+ ValueByTypeStrategy.new
77
+ else
78
+ NullStrategy.new
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end