rspec-hive 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +0 -1
  3. data/.rubocop.yml +13 -1
  4. data/.rubocop_u2i.yml +9 -1
  5. data/.travis.yml +19 -0
  6. data/Guardfile +12 -1
  7. data/README.md +3 -1
  8. data/Rakefile +11 -2
  9. data/examples/{query.rb → lib/query.rb} +2 -5
  10. data/examples/{hive_tests_config.yml.example → rspec-hive.yml.example} +0 -0
  11. data/examples/spec/query_spec.rb +103 -0
  12. data/examples/spec/spec_helper.rb +22 -0
  13. data/lib/rspec/hive.rb +7 -2
  14. data/lib/rspec/hive/connection_delegator.rb +17 -5
  15. data/lib/rspec/hive/connector.rb +0 -1
  16. data/lib/rspec/hive/exponential_backoff.rb +15 -0
  17. data/lib/rspec/hive/matchers.rb +29 -0
  18. data/lib/rspec/hive/query_builder.rb +83 -0
  19. data/lib/rspec/hive/query_builder/null_strategy.rb +11 -0
  20. data/lib/rspec/hive/query_builder/row_transformer.rb +63 -0
  21. data/lib/rspec/hive/query_builder/type_faker.rb +36 -0
  22. data/lib/rspec/hive/query_builder/value_by_type_strategy.rb +13 -0
  23. data/lib/rspec/hive/query_builder_helper.rb +24 -0
  24. data/lib/rspec/{rake_tasks → hive}/railtie.rb +2 -2
  25. data/lib/rspec/{rake_tasks → hive/rake_tasks}/docker.rake +0 -0
  26. data/lib/rspec/hive/version.rb +1 -1
  27. data/lib/rspec/hive/with_hive_connection.rb +16 -10
  28. data/rspec-hive.gemspec +6 -4
  29. data/spec/.rubocop.yml +4 -0
  30. data/spec/lib/rspec/hive/configuration_spec.rb +9 -6
  31. data/spec/lib/rspec/hive/connection_delegator_spec.rb +1 -1
  32. data/spec/lib/rspec/hive/connector_spec.rb +1 -1
  33. data/spec/lib/rspec/hive/db_name_spec.rb +1 -1
  34. data/spec/lib/rspec/hive/matchers_spec.rb +94 -0
  35. data/spec/lib/rspec/hive/query_builder/row_transformer_spec.rb +37 -0
  36. data/spec/lib/rspec/hive/query_builder_spec.rb +143 -0
  37. data/spec/lib/rspec/hive_spec.rb +1 -1
  38. data/spec/spec_helper.rb +2 -62
  39. metadata +68 -24
  40. data/examples/config_helper.rb +0 -1
  41. data/examples/query_spec.rb +0 -41
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ca84700fd2dd11920a2cf224d634f444a9e325b9
4
- data.tar.gz: ac29aa0c169716541e7df32d8ddcfed8ed8a005e
3
+ metadata.gz: e0ecaa7596db283e59f0ea63fbb677942e714d09
4
+ data.tar.gz: ae43c8734de7c85b0551454534fc1e339a74d2f5
5
5
  SHA512:
6
- metadata.gz: 5ec841bc2886f78ff101a18481ff622abf64c4ae5abb387441fab204d77327c805c2181f94201e850911cdb248ca3a06c3f1f097074abfcf70438eb2730626ac
7
- data.tar.gz: 4cc12280d1c740e9c8b349995def32422796ac4c96bd7061d2d13ef936bc1fe3fbeea78eb138807e2c1c4751b1de26eb171927ad5c14c2ea23c63493acbdb618
6
+ metadata.gz: 906bd8f828ba76a4646c535cee8679d60f3acf23714e8686c62789ee770d754f95f216930758d504bf57964a40c127e189b58dc17e03c674f12a283f181e83cf
7
+ data.tar.gz: 6cf54197ee350b05235eb08869226dd2b26c3e63f83764ad17356c5ef842379f8d5b5b2d4a8aafe63d212d91b02c91b78d64bcd0bb729c5bb37daf8a8da05084
data/.rspec CHANGED
@@ -1,2 +1 @@
1
1
  --color
2
- --require spec_helper
data/.rubocop.yml CHANGED
@@ -1,11 +1,23 @@
1
1
  inherit_from: .rubocop_u2i.yml
2
2
 
3
+ Rails:
4
+ Enabled: false
5
+
3
6
  AllCops:
7
+ TargetRubyVersion: 2.1
8
+ Include:
9
+ - '**/Rakefile'
4
10
  Exclude:
5
11
  - spec/spec_helper.rb
12
+ - vendor/**/*
13
+ - examples/query_spec.rb
6
14
 
7
15
  Metrics/LineLength:
8
- Max: 80
16
+ Max: 120
9
17
  Enabled: true
10
18
  Exclude:
11
19
  - spec/**/*
20
+ - examples/spec/**/*
21
+
22
+ Style/SignalException:
23
+ EnforcedStyle: only_raise
data/.rubocop_u2i.yml CHANGED
@@ -1,7 +1,9 @@
1
1
  require: rubocop-rspec
2
2
 
3
+ Rails:
4
+ Enabled: true
5
+
3
6
  AllCops:
4
- RunRailsCops: true
5
7
  Include:
6
8
  - '**/Rakefile'
7
9
  - config.ru
@@ -11,6 +13,7 @@ AllCops:
11
13
  - config/**/*
12
14
  - script/**/*
13
15
  - db/schema.rb
16
+ - vendor/**/*
14
17
 
15
18
  Style/MultilineOperationIndentation:
16
19
  EnforcedStyle: indented
@@ -53,3 +56,8 @@ Metrics/MethodLength:
53
56
  RSpec/FilePath:
54
57
  Exclude:
55
58
  - spec/routing/**/*_routing_spec.rb
59
+
60
+ RSpec/DescribeClass:
61
+ Exclude:
62
+ - spec/lib/rspec/hive/matchers_spec.rb
63
+
data/.travis.yml ADDED
@@ -0,0 +1,19 @@
1
+ sudo: false
2
+
3
+ services:
4
+ - docker
5
+ language: ruby
6
+ rvm:
7
+ - 2.1
8
+ - 2.2
9
+ - 2.3.0
10
+ env: CONFIG_FILE_NAME=rspec-hive.yml PORT=10000 CONFIG_FILE=rspec-hive.yml
11
+
12
+ before_script:
13
+ - bundle exec rake spec:hive:config:generate_default
14
+ - bundle exec rake spec:hive:docker:run
15
+
16
+ script:
17
+ - bundle exec rake spec
18
+ - bundle exec rake rubocop
19
+ - bundle exec rake hive_spec
data/Guardfile CHANGED
@@ -1,6 +1,11 @@
1
1
  # A sample Guardfile
2
2
  # More info at https://github.com/guard/guard#readme
3
3
 
4
+ detect_docker = <<-BASH
5
+ CONTAINER_IDS=`docker ps -q --filter='ancestor=nielsensocial/hive' 2> /dev/null | xargs`
6
+ docker inspect --format='{{ .State.Running }}' $CONTAINER_IDS 2> /dev/null | uniq | grep true 2>&1 > /dev/null
7
+ BASH
8
+
4
9
  group :red_green_refactor, halt_on_fail: true do
5
10
  guard :rspec, cmd: 'bundle exec rspec' do
6
11
  watch(%r{^spec/.+_spec\.rb$})
@@ -8,7 +13,13 @@ group :red_green_refactor, halt_on_fail: true do
8
13
  watch('spec/spec_helper.rb') { 'spec' }
9
14
  end
10
15
 
11
- guard :rubocop do
16
+ guard :rspec, cmd: 'bundle exec rspec', spec_paths: ['examples/spec'] do
17
+ watch(%r{^examples/spec/.+_spec\.rb$})
18
+ watch(%r{^examples/lib/(.+)\.rb$}) { |m| "examples/spec/#{m[1]}_spec.rb" }
19
+ watch('examples/spec/spec_helper.rb') { 'examples/spec' }
20
+ end if system(detect_docker)
21
+
22
+ guard :rubocop, all_on_start: false do
12
23
  watch(%r{.+\.rb$})
13
24
  watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
14
25
  end
data/README.md CHANGED
@@ -1,8 +1,10 @@
1
+ [![Build Status](https://travis-ci.org/u2i/rspec-hive.svg?branch=master)](https://travis-ci.org/u2i/rspec-hive)
2
+ [![Dependency Status](https://gemnasium.com/u2i/rspec-hive.svg)](https://gemnasium.com/u2i/rspec-hive)
1
3
  [![Code Climate](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/badges/85abbc07acb75f664185/gpa.svg)](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/feed)
2
4
  [![Test Coverage](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/badges/85abbc07acb75f664185/coverage.svg)](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/coverage)
3
5
  [![Issue Count](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/badges/85abbc07acb75f664185/issue_count.svg)](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/feed)
4
6
 
5
- # rpsec-hive
7
+ # rspec-hive
6
8
 
7
9
  rspec-hive is a utility gem to help you write beautiful rspec tests for hive queries. The idea is simple - you just launch a docker machine with hadoop and hive installed. To test a query you create a simple RSpec file and extend it with `RSpec::Hive::WithHiveConnection`.
8
10
 
data/Rakefile CHANGED
@@ -1,5 +1,5 @@
1
1
  require 'bundler/gem_tasks'
2
- load 'lib/rspec/rake_tasks/docker.rake'
2
+ load 'lib/rspec/hive/rake_tasks/docker.rake'
3
3
 
4
4
  begin
5
5
  require 'rspec/core/rake_task'
@@ -20,4 +20,13 @@ rescue LoadError
20
20
  puts 'Cannot load RuboCop Rake tasks'
21
21
  end
22
22
 
23
- task default: [:spec, :rubocop]
23
+ detect_docker = <<-BASH
24
+ CONTAINER_IDS=`docker ps -q --filter='ancestor=nielsensocial/hive' 2> /dev/null | xargs`
25
+ docker inspect --format='{{ .State.Running }}' $CONTAINER_IDS 2> /dev/null | uniq | grep true 2>&1 > /dev/null
26
+ BASH
27
+
28
+ if system(detect_docker)
29
+ task default: [:spec, :hive_spec, :rubocop]
30
+ else
31
+ task default: [:spec, :rubocop]
32
+ end
@@ -1,3 +1,5 @@
1
+ require 'rbhive'
2
+
1
3
  class Query
2
4
  def table_schema
3
5
  RBHive::TableSchema.new('people', nil, line_sep: '\n', field_sep: ';') do
@@ -10,9 +12,4 @@ class Query
10
12
  def table_name
11
13
  table_schema.name
12
14
  end
13
-
14
- def run_hive_query(connection)
15
- query = "SELECT * FROM `#{table_schema.name}` WHERE `amount` > 2.5"
16
- connection.execute(query)
17
- end
18
15
  end
@@ -0,0 +1,103 @@
1
+ require_relative 'spec_helper'
2
+ require_relative '../lib/query'
3
+
4
+ RSpec.describe Query do
5
+ include RSpec::Hive::WithHiveConnection
6
+ include RSpec::Hive::QueryBuilderHelper
7
+
8
+ subject { described_class.new }
9
+
10
+ let(:schema) { subject.table_schema }
11
+
12
+ before { connection.execute(schema.create_table_statement) }
13
+
14
+ context 'if we have a partition' do
15
+ let(:schema) do
16
+ RBHive::TableSchema.new('partitioned_people', nil, line_sep: '\n', field_sep: ';') do
17
+ column :name, :string
18
+ column :address, :string
19
+ column :amount, :float
20
+ partition :dth, :int
21
+ end
22
+ end
23
+ let(:input_data) do
24
+ [
25
+ ['Mikolaj', 'Cos', 1.23, 1],
26
+ ['Wojtek', 'Cos', 3.76, 2]
27
+ ]
28
+ end
29
+ let(:dth) { '2016042210' }
30
+ let(:query) { "SELECT * FROM `#{schema.name}` WHERE amount > 3.2" }
31
+ let(:query_result) { connection.fetch(query) }
32
+ let(:expected_result_array) do
33
+ [[a_string_matching('Wojtek'), 'Cos', be_within(0.01).of(3.76), dth]]
34
+ end
35
+
36
+ before do
37
+ connection.execute("ALTER TABLE #{schema.name} ADD PARTITION (dth='#{dth}')")
38
+ into_hive(schema).insert(*input_data).partition(dth: dth).execute
39
+ end
40
+
41
+ it 'returns Wojtek' do
42
+ expect(query_result).to match_result_set(expected_result_array)
43
+ end
44
+ end
45
+
46
+ context 'without stubbing strategy' do
47
+ let(:input_data) do
48
+ [
49
+ ['Mikolaj', 'Cos', 1.23, 1],
50
+ ['Wojtek', 'Cos', 3.76, 2]
51
+ ]
52
+ end
53
+ let(:query_result) { connection.fetch(query) }
54
+
55
+ before { into_hive(schema).insert(*input_data).execute }
56
+
57
+ context 'when querying for amount > 3.2' do
58
+ let(:query) { "SELECT * FROM `#{subject.table_name}` WHERE amount > 3.2" }
59
+ let(:expected_result_array) do
60
+ [
61
+ [a_string_matching('Wojtek'), 'Cos', be_within(0.01).of(3.76)]
62
+ ]
63
+ end
64
+
65
+ it 'returns Wojtek' do
66
+ expect(query_result).to match_result_set(expected_result_array)
67
+ end
68
+ end
69
+ end
70
+
71
+ context 'with stubbing strategy' do
72
+ let(:input_data) { [{name: 'Michal'}, {name: 'Wojtek'}] }
73
+ let(:query_result) { connection.fetch(query) }
74
+
75
+ before { into_hive(schema).insert(*input_data).with_stubbing.execute }
76
+
77
+ context "when querying for name = 'Wojtek'" do
78
+ let(:query) do
79
+ "SELECT * FROM `#{subject.table_name}` WHERE name='Wojtek'"
80
+ end
81
+ let(:expected_result_array) do
82
+ [
83
+ ['Wojtek', a_kind_of(String), a_kind_of(Float)]
84
+ ]
85
+ end
86
+
87
+ it 'returns Wojtek' do
88
+ expect(query_result).to match_result_set(expected_result_array)
89
+ end
90
+ end
91
+
92
+ context 'when querying for name = Michal' do
93
+ let(:query) do
94
+ "SELECT * FROM `#{subject.table_name}` WHERE name='Michal'"
95
+ end
96
+ let(:expected_result_hash) { [{name: 'Michal'}] }
97
+
98
+ it 'returns Michal' do
99
+ expect(query_result).to match_result_set(expected_result_hash).partially
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,22 @@
1
+ if ENV['COVERAGE']
2
+ require 'simplecov'
3
+ SimpleCov.start
4
+ require 'simplecov'
5
+ SimpleCov.start
6
+ end
7
+
8
+ RSpec.configure do |config|
9
+ require 'rspec/hive'
10
+
11
+ RSpec::Hive.configure(File.join(__dir__, '../../rspec-hive.yml'))
12
+
13
+ config.mock_with :rspec do |mocks|
14
+ mocks.verify_partial_doubles = true
15
+ end
16
+
17
+ config.pattern = '*_spec.rb'
18
+
19
+ config.disable_monkey_patching!
20
+ config.order = :random
21
+ Kernel.srand config.seed
22
+ end
data/lib/rspec/hive.rb CHANGED
@@ -4,10 +4,15 @@ require 'rspec/hive/configuration'
4
4
  require 'rspec/hive/connection_delegator'
5
5
  require 'rspec/hive/connector'
6
6
  require 'rspec/hive/with_hive_connection'
7
+ require 'rspec/hive/query_builder'
8
+ require 'rspec/hive/query_builder_helper'
9
+ require 'rspec/hive/matchers'
7
10
 
8
11
  module RSpec
9
12
  module Hive
10
- attr_reader :configuration
13
+ def self.configuration
14
+ @configuration
15
+ end
11
16
 
12
17
  def self.configure(file_name = nil)
13
18
  @configuration = new_configuration(file_name)
@@ -28,4 +33,4 @@ module RSpec
28
33
  end
29
34
  end
30
35
 
31
- require 'rspec/rake_tasks/railtie' if defined?(Rails)
36
+ require 'rspec/hive/railtie' if defined?(::Rails)
@@ -24,9 +24,17 @@ module RSpec
24
24
  def load_into_table(table_schema, values, partitions = nil)
25
25
  table_name = table_schema.name
26
26
  Tempfile.open(table_name, @config.host_shared_directory_path) do |file|
27
- write_values_to_file(file, values, table_schema.instance_variable_get(:@field_sep))
27
+ write_values_to_file(
28
+ file,
29
+ values,
30
+ table_schema.instance_variable_get(:@field_sep)
31
+ )
28
32
  partition_query = partition_clause(partitions) if partitions
29
- load_file_to_hive_table(table_name, docker_path(file), partition_query)
33
+ load_file_to_hive_table(
34
+ table_name,
35
+ docker_path(file),
36
+ partition_query
37
+ )
30
38
  end
31
39
  end
32
40
 
@@ -70,16 +78,20 @@ module RSpec
70
78
  end
71
79
 
72
80
  def load_file_to_hive_table(table_name, path, partition_clause = nil)
73
- request_txt = "load data local inpath '#{path}' into table #{table_name}"
81
+ request_txt =
82
+ "load data local inpath '#{path}' into table #{table_name}"
74
83
  request_txt << " #{partition_clause}" unless partition_clause.nil?
75
84
  execute(request_txt)
76
85
  end
77
86
 
78
87
  def docker_path(file)
79
- File.join(@config.docker_shared_directory_path, File.basename(file.path))
88
+ File.join(
89
+ @config.docker_shared_directory_path,
90
+ File.basename(file.path)
91
+ )
80
92
  end
81
93
 
82
- def write_values_to_file(file, values, delimiter=';')
94
+ def write_values_to_file(file, values, delimiter = ';')
83
95
  values.each { |value| file.puts(value.join(delimiter)) }
84
96
  file.flush
85
97
  end
@@ -1,7 +1,6 @@
1
1
  require 'rbhive'
2
2
  require 'tempfile'
3
3
  require 'yaml'
4
- require 'pry'
5
4
 
6
5
  module RSpec
7
6
  module Hive
@@ -0,0 +1,15 @@
1
+ require 'retryable'
2
+
3
+ module RSpec
4
+ module Hive
5
+ class ExponentialBackoff
6
+ class << self
7
+ def retryable(tries: 5, on:)
8
+ Retryable.retryable(tries: tries, sleep: ->(r) { 2**r }, on: on) do
9
+ yield
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,29 @@
1
+ require 'rspec/matchers'
2
+
3
+ RSpec::Matchers.define :match_result_set do |expected|
4
+ match do |actual|
5
+ return false if expected.size != actual.size
6
+
7
+ expected.each.with_index.all? do |expected_row, i|
8
+ if expected_row.respond_to?(:each_pair)
9
+ if @partial_match
10
+ selected_actual_values = actual[i].values_at(*expected_row.keys)
11
+ values_match?(expected_row.values, selected_actual_values)
12
+ else
13
+ values_match?(expected_row, actual[i])
14
+ end
15
+ elsif expected_row.respond_to?(:each)
16
+ raise ArgumentError, "Can't use partially matcher with Arrays" if @partial_match
17
+ values_match?(expected_row, actual[i].values)
18
+ else
19
+ raise ArgumentError, 'Unknown type'
20
+ end
21
+ end
22
+ end
23
+
24
+ chain :partially do
25
+ @partial_match = true
26
+ end
27
+
28
+ diffable
29
+ end
@@ -0,0 +1,83 @@
1
+ require_relative 'query_builder/row_transformer'
2
+ require_relative 'query_builder/null_strategy'
3
+ require_relative 'query_builder/value_by_type_strategy'
4
+
5
+ module RSpec
6
+ module Hive
7
+ class QueryBuilder
8
+ def initialize(schema, connection)
9
+ @schema = schema
10
+ @connection = connection
11
+ @partition_hash = {}
12
+ @rows = []
13
+ @stubbing = false
14
+ end
15
+
16
+ def partition(hash)
17
+ spawn.partition!(hash)
18
+ end
19
+
20
+ def partition!(partition)
21
+ partition_hash.merge!(partition)
22
+ self
23
+ end
24
+
25
+ def insert(*new_rows)
26
+ spawn.insert!(new_rows)
27
+ end
28
+
29
+ def insert!(new_rows)
30
+ rows.concat(new_rows)
31
+ self
32
+ end
33
+
34
+ def execute
35
+ if partition_hash.empty?
36
+ connection.load_into_table(schema, transformed_rows)
37
+ else
38
+ connection.load_into_table(schema, transformed_rows, partition_hash)
39
+ end
40
+ end
41
+
42
+ def with_stubbing
43
+ spawn.with_stubbing!
44
+ end
45
+
46
+ def with_stubbing!
47
+ self.stubbing = true
48
+ self
49
+ end
50
+
51
+ protected
52
+
53
+ attr_accessor :partition_hash, :connection, :rows, :stubbing
54
+
55
+ private
56
+
57
+ attr_reader :schema
58
+
59
+ def spawn
60
+ clone
61
+ end
62
+
63
+ def stubbing?
64
+ stubbing
65
+ end
66
+
67
+ def transformed_rows
68
+ transformer = RowTransformer.new(schema, missing_column_strategy)
69
+ rows.map do |row|
70
+ transformer.transform(row)
71
+ end
72
+ end
73
+
74
+ def missing_column_strategy
75
+ if stubbing?
76
+ ValueByTypeStrategy.new
77
+ else
78
+ NullStrategy.new
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end