rspec-hive 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0b75898c0b7364fe12b80f91a36da04a54c5bab6
4
- data.tar.gz: 86e7fcbbb5a50fc536a0954ddc27fd4cb90ef915
3
+ metadata.gz: ff9d8db0b9d16154a53d7da9cee367f6e0b7eb92
4
+ data.tar.gz: 52e4eb836d42abaa5d7493f2b77f5a851ace0593
5
5
  SHA512:
6
- metadata.gz: da919d5129641e77e15c5bfeed2bef2396e3d02dd3eaa93ef61dae7af099323d6a9106a8b9b8ac02644035ab39588a88c6cbabbe40f7f53832e961d499a7244f
7
- data.tar.gz: 2ea4738a1f9bb1d84426ed4873a33bfb2b8f2480a1b557fbb73e2b5fe8821fbb266defffe8fab57c3114558d6c10c3e0a8f9ee3d11fc0fd0c5138de884c4748c
6
+ metadata.gz: 7374dda80d27b31634bf4651369c7eb5fece08407aa824cbd915b45e7469e22570c6370efc55659a4d42b77e761f438505b7165c56929900354eafa03daf0a4e
7
+ data.tar.gz: 2505ba599502e1ad140472d2898d48e77bed2cfcc0e43f8b2b134882afdfbe1827bb8c09a6405213236bd9e0263ddcf1a0d46d4f0d72fc081a741c495d728397
@@ -3,6 +3,8 @@ require_relative 'query_builder'
3
3
  module RSpec
4
4
  module Hive
5
5
  module QueryBuilderHelper
6
+ HiveConnectionNotFound = Class.new(StandardError)
7
+
6
8
  def into_hive(schema)
7
9
  hive_connection_guard!
8
10
  ::RSpec::Hive::QueryBuilder.new(schema, connection)
@@ -17,7 +19,7 @@ module RSpec
17
19
  end
18
20
 
19
21
  def hive_connection_guard!
20
- raise 'Include WithHiveConnection' unless hive_connection_present?
22
+ raise HiveConnectionNotFound, 'Include WithHiveConnection' unless hive_connection_present?
21
23
  end
22
24
  end
23
25
  end
@@ -26,10 +26,10 @@ namespace :spec do
26
26
  }
27
27
  }
28
28
  system 'mkdir', '-p', default_values['hive']['host_shared_directory_path']
29
- file_path = File.join(
30
- ENV['CONFIG_FILE_DIR'] || '.',
31
- ENV['CONFIG_FILE_NAME'] || 'rspec-hive.yml'
32
- )
29
+ system 'mkdir', '-p', 'config'
30
+
31
+ file_path = ENV['CONFIG_FILE'] || File.join('config', 'rspec-hive.yml')
32
+
33
33
  File.open(file_path, 'w+') do |f|
34
34
  f.write default_values.to_yaml
35
35
  puts "Default config written to #{f.path}".green
@@ -41,18 +41,16 @@ namespace :spec do
41
41
  desc 'Runs docker using hive config file.'\
42
42
  ' It assumes your docker-machine is running.'
43
43
  task :run do
44
- puts 'Command `docker` not found.'.red unless system('which docker')
44
+ fail 'Command `docker` not found.'.red unless system('which docker')
45
45
 
46
- config_filepath = ENV['CONFIG_FILE'] || 'rspec-hive.yml'
47
- docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
48
- unless File.exist? config_filepath
49
- puts "There's no config file #{config_filepath} please generate default or provide custom config.".red
50
- raise Errno::ENOENT.new config_filepath unless File.exist? config_filepath
51
- end
46
+ config_filepath = ENV['CONFIG_FILE'] || File.join('config', 'rspec-hive.yml')
47
+ fail "There's no config file #{config_filepath} please"\
48
+ "generate default or provide custom config.".red unless File.exist? config_filepath
52
49
 
53
50
  interpolated = ERB.new(File.read(config_filepath)).result
54
51
  config = YAML.load(interpolated)['hive']
55
52
 
53
+ docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
56
54
  cmd = "docker run -v #{config['host_shared_directory_path']}:"\
57
55
  "#{config['docker_shared_directory_path']}"\
58
56
  " -d -p #{config['port']}:10000 #{docker_image_name}"
@@ -63,7 +61,7 @@ namespace :spec do
63
61
 
64
62
  desc 'Downloads docker image from dockerhub.'
65
63
  task :download_image do
66
- puts 'Command `docker` not found.'.red unless system('which docker')
64
+ fail 'Command `docker` not found.'.red unless system('which docker')
67
65
 
68
66
  docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
69
67
 
@@ -71,6 +69,52 @@ namespace :spec do
71
69
  puts "Running `#{cmd}`...".green
72
70
  system(cmd)
73
71
  end
72
+
73
+ def container_id
74
+ return ENV['CONTAINER_ID'] if ENV['CONTAINER_ID']
75
+ docker_conatiners = `docker ps`.lines
76
+ if docker_conatiners.size != 2
77
+ raise 'There is more than 1 instance of docker container running (or no running docker containers). '\
78
+ 'Check `docker ps` and stop containers that are not in use right now or specify CONTAINER_ID and run this command again.'.red
79
+ else
80
+ docker_conatiners[1].split[0]
81
+ end
82
+ end
83
+
84
+ desc 'Load Hive UDFS (user defined functions) onto docker.'
85
+ task :load_udfs, [:udfs_path] do |t, args|
86
+ udfs_path = args[:udfs_path]
87
+ config_filepath = ENV['CONFIG_FILE'] || File.join('config', 'rspec-hive.yml')
88
+ interpolated = ERB.new(File.read(config_filepath)).result
89
+ config = YAML.load(interpolated)['hive']
90
+
91
+ host_hive_udfs_path = File.join(config['host_shared_directory_path'], 'hive-udfs.jar')
92
+ fail 'Please provide UDFS_PATH'.red unless udfs_path
93
+ if udfs_path.start_with?('s3://')
94
+ puts 'Downloading from s3...'.yellow
95
+ cmd = "aws s3 ls #{udfs_path}"
96
+
97
+ fail 'awscli is not configured.'.red unless system(cmd)
98
+ cmd = "aws s3 cp #{udfs_path} #{host_hive_udfs_path}"
99
+ system(cmd)
100
+ else
101
+ puts 'Copying from local directory...'.yellow
102
+ cmd = "cp #{udfs_path} #{host_hive_udfs_path}"
103
+ end
104
+ puts 'Done'.green
105
+
106
+ puts 'Copying to hadoop on docker...'.yellow
107
+ cmd = "docker exec -it #{container_id} /bin/bash -c 'cp #{config['docker_shared_directory_path']}/hive-udfs.jar $HADOOP_HOME'"
108
+ system(cmd)
109
+ puts 'Done'.green
110
+ end
111
+ end
112
+
113
+ desc 'Runs beeline console on hive.'
114
+ task :beeline do
115
+ puts "Connecting to docker container: #{container_id} and running beeline. To exit: '!q'".green
116
+ cmd = "docker exec -it #{container_id} /bin/bash -c '$HIVE_HOME/bin/beeline -u jdbc:hive2://localhost:10000 -d org.apache.hive.jdbc.HiveDriver'"
117
+ system(cmd)
74
118
  end
75
119
  end
76
120
  end
@@ -1,5 +1,5 @@
1
1
  module RSpec
2
2
  module Hive
3
- VERSION = '0.3.0'.freeze
3
+ VERSION = '0.4.0'.freeze
4
4
  end
5
5
  end
@@ -0,0 +1,48 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe RSpec::Hive::QueryBuilderHelper do
4
+ let(:connection) { double }
5
+ let(:schema) { double }
6
+ let(:dummy_class) { double }
7
+
8
+ before { dummy_class.extend(described_class) }
9
+
10
+ describe '#into_hive' do
11
+ context 'when no connection has been defined' do
12
+ it 'raises and error' do
13
+ expect { dummy_class.into_hive(schema) }.
14
+ to raise_error(RSpec::Hive::QueryBuilderHelper::HiveConnectionNotFound).
15
+ with_message('Include WithHiveConnection')
16
+ end
17
+ end
18
+
19
+ context 'when RBhive connection has been given' do
20
+ let(:dummy_class) { double(connection: connection) }
21
+
22
+ before do
23
+ allow(connection).to receive(:is_a?).with(RBHive::TCLIConnection).and_return(true)
24
+ end
25
+
26
+ subject { dummy_class.into_hive(schema) }
27
+
28
+ it 'returns a query_builder' do
29
+ expect(subject).to be_a_kind_of(RSpec::Hive::QueryBuilder)
30
+ end
31
+ end
32
+
33
+ context 'when ConnectionDelegator has been given' do
34
+ let(:dummy_class) { double(connection: connection) }
35
+
36
+ before do
37
+ allow(connection).to receive(:is_a?).with(RBHive::TCLIConnection).and_return(false)
38
+ allow(connection).to receive(:is_a?).with(RSpec::Hive::ConnectionDelegator).and_return(true)
39
+ end
40
+
41
+ subject { dummy_class.into_hive(schema) }
42
+
43
+ it 'returns a query_builder' do
44
+ expect(subject).to be_a_kind_of(RSpec::Hive::QueryBuilder)
45
+ end
46
+ end
47
+ end
48
+ end
@@ -4,6 +4,7 @@ RSpec.describe RSpec::Hive::QueryBuilder do
4
4
  let(:connection) { instance_double(RBHive::TCLIConnection) }
5
5
  let(:connection_delegator) { RSpec::Hive::ConnectionDelegator.new(connection, {}) }
6
6
  let(:query_builder) { described_class.new(schema, connection_delegator) }
7
+ let(:schema) { double }
7
8
 
8
9
  describe '#execute' do
9
10
  subject { builder.execute }
@@ -13,7 +14,6 @@ RSpec.describe RSpec::Hive::QueryBuilder do
13
14
  expect(connection_delegator).to receive(:load_into_table).with(schema, expected_rows)
14
15
  end
15
16
 
16
- let(:partition) { nil }
17
17
  let(:schema) do
18
18
  RBHive::TableSchema.new('table_name', nil) do
19
19
  column :col1, :string
@@ -130,7 +130,7 @@ RSpec.describe RSpec::Hive::QueryBuilder do
130
130
  end
131
131
  end
132
132
 
133
- context 'when has no partition' do
133
+ context 'when has a partition' do
134
134
  let(:schema) do
135
135
  RBHive::TableSchema.new('table_name', nil) do
136
136
  column :col1, :string
@@ -138,6 +138,24 @@ RSpec.describe RSpec::Hive::QueryBuilder do
138
138
  partition :dt, :int
139
139
  end
140
140
  end
141
+
142
+ before do
143
+ expect(connection_delegator).
144
+ to receive(:load_into_table).with(schema, expected_rows, partitions)
145
+ end
146
+
147
+ context 'when no data stubbing' do
148
+ context 'when single row is passed' do
149
+ let(:builder) { query_builder.insert(row1).partition(partitions) }
150
+ let(:row1) { ['col1', 343] }
151
+ let(:partitions) { {dt: :int} }
152
+ let(:expected_rows) { [row1] }
153
+
154
+ it 'loads single row' do
155
+ subject
156
+ end
157
+ end
158
+ end
141
159
  end
142
160
  end
143
161
  end
@@ -1,7 +1,13 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe RSpec::Hive do
4
- describe 'configure' do
4
+ describe '.connector' do
5
+ subject { described_class.connector }
6
+
7
+ it { is_expected.to be_an_instance_of(RSpec::Hive::Connector) }
8
+ end
9
+
10
+ describe '.configure' do
5
11
  let(:expected_host) { '127.0.0.1' }
6
12
  let(:expected_port) { '10000' }
7
13
  let(:expected_host_shared_directory_path) do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rspec-hive
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Wojtek Mielczarek
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-05-12 00:00:00.000000000 Z
12
+ date: 2016-06-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -288,6 +288,7 @@ files:
288
288
  - spec/lib/rspec/hive/db_name_spec.rb
289
289
  - spec/lib/rspec/hive/matchers_spec.rb
290
290
  - spec/lib/rspec/hive/query_builder/row_transformer_spec.rb
291
+ - spec/lib/rspec/hive/query_builder_helper_spec.rb
291
292
  - spec/lib/rspec/hive/query_builder_spec.rb
292
293
  - spec/lib/rspec/hive_spec.rb
293
294
  - spec/spec_helper.rb
@@ -323,6 +324,7 @@ test_files:
323
324
  - spec/lib/rspec/hive/db_name_spec.rb
324
325
  - spec/lib/rspec/hive/matchers_spec.rb
325
326
  - spec/lib/rspec/hive/query_builder/row_transformer_spec.rb
327
+ - spec/lib/rspec/hive/query_builder_helper_spec.rb
326
328
  - spec/lib/rspec/hive/query_builder_spec.rb
327
329
  - spec/lib/rspec/hive_spec.rb
328
330
  - spec/spec_helper.rb