rspec-hive 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,86 @@
1
+ require 'tmpdir'
2
+
3
+ module RSpec
4
+ module Hive
5
+ class Configuration
6
+ attr_accessor :host,
7
+ :port,
8
+ :host_shared_directory_path,
9
+ :docker_shared_directory_path,
10
+ :logger,
11
+ :hive_version,
12
+ :connection_timeout,
13
+ :hive_options
14
+
15
+ def initialize(path_to_config_file = nil)
16
+ if path_to_config_file.nil?
17
+ load_default_variables
18
+ else
19
+ interpolated = ERB.new(File.read(path_to_config_file)).result
20
+ config = YAML.load(interpolated)['hive']
21
+ load_variables_from_config(config)
22
+ end
23
+ @logger = Logger.new(STDOUT)
24
+ end
25
+
26
+ private
27
+
28
+ def load_default_variables
29
+ @host = platform_specific_host
30
+ @port = 10000
31
+ @host_shared_directory_path = platform_specific_host_shared_dir_path
32
+ @docker_shared_directory_path = '/tmp/spec-tmp-files'
33
+ @hive_version = default_version
34
+ @connection_timeout = default_timeout
35
+ @hive_options = default_hive_options
36
+ end
37
+
38
+ def load_variables_from_config(config)
39
+ @host = config['host']
40
+ @port = config['port']
41
+ @host_shared_directory_path = config['host_shared_directory_path']
42
+ @docker_shared_directory_path = config['docker_shared_directory_path']
43
+ @hive_version = (config['hive_version'] || default_version).to_i
44
+ @connection_timeout = (config['timeout'] || default_timeout).to_i
45
+ @hive_options = merge_config_options(default_hive_options, config)
46
+ end
47
+
48
+ def merge_config_options(hash, config)
49
+ hash.merge(config['hive_options'].to_h)
50
+ end
51
+
52
+ def mac?
53
+ host_os = RbConfig::CONFIG['host_os']
54
+ host_os =~ /darwin|mac os/
55
+ end
56
+
57
+ def platform_specific_host
58
+ mac? ? '192.168.99.100' : '127.0.0.1'
59
+ end
60
+
61
+ def platform_specific_host_shared_dir_path
62
+ if mac?
63
+ File.join(Dir.mktmpdir(nil, '/Users/Shared'), 'spec-tmp-files')
64
+ else
65
+ File.join(Dir.mktmpdir, 'spec-tmp-files')
66
+ end
67
+ end
68
+
69
+ def default_timeout
70
+ 1800
71
+ end
72
+
73
+ def default_version
74
+ 10
75
+ end
76
+
77
+ def default_hive_options
78
+ {'hive.exec.dynamic.partition' => 'true',
79
+ 'hive.exec.dynamic.partition.mode' => 'nonstrict',
80
+ 'hive.exec.max.dynamic.partitions.pernodexi' => '100000',
81
+ 'hive.exec.max.dynamic.partitions' => '100000',
82
+ 'mapred.child.java.opts' => '-Xmx2048m'}
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,88 @@
1
+ require 'delegate'
2
+ require 'tempfile'
3
+
4
+ module RSpec
5
+ module Hive
6
+ class ConnectionDelegator < SimpleDelegator
7
+ def initialize(connection, config)
8
+ super(connection)
9
+ @config = config
10
+ end
11
+
12
+ def create_table(table_schema)
13
+ table_schema = table_schema.dup
14
+ table_schema.instance_variable_set(:@location, nil)
15
+ execute(table_schema.create_table_statement)
16
+ end
17
+
18
+ def load_partitions(table_name, partitions)
19
+ partitions = partition_clause(partitions)
20
+ query = "ALTER TABLE #{table_name} ADD #{partitions}"
21
+ execute(query)
22
+ end
23
+
24
+ def load_into_table(table_schema, values, partitions = nil)
25
+ table_name = table_schema.name
26
+ Tempfile.open(table_name, @config.host_shared_directory_path) do |file|
27
+ write_values_to_file(file, values, table_schema.instance_variable_get(:@field_sep))
28
+ partition_query = partition_clause(partitions) if partitions
29
+ load_file_to_hive_table(table_name, docker_path(file), partition_query)
30
+ end
31
+ end
32
+
33
+ def show_tables
34
+ fetch('SHOW TABLES')
35
+ end
36
+
37
+ def create_database(name)
38
+ execute("CREATE DATABASE IF NOT EXISTS `#{name}`")
39
+ end
40
+
41
+ def use_database(name)
42
+ execute("USE `#{name}`")
43
+ end
44
+
45
+ def drop_database(name)
46
+ execute("DROP DATABASE `#{name}`")
47
+ end
48
+
49
+ def show_databases
50
+ fetch('SHOW DATABASES')
51
+ end
52
+
53
+ def switch_database(db_name)
54
+ create_database(db_name)
55
+ use_database(db_name)
56
+ end
57
+
58
+ private
59
+
60
+ def partition_clause(partitions)
61
+ if partitions.is_a?(Array)
62
+ partitions.collect { |x| to_partition_clause(x) }.join(' ')
63
+ else
64
+ to_partition_clause(partitions)
65
+ end
66
+ end
67
+
68
+ def to_partition_clause(partition)
69
+ "PARTITION(#{partition.map { |k, v| "#{k}='#{v}'" }.join(',')})"
70
+ end
71
+
72
+ def load_file_to_hive_table(table_name, path, partition_clause = nil)
73
+ request_txt = "load data local inpath '#{path}' into table #{table_name}"
74
+ request_txt << " #{partition_clause}" unless partition_clause.nil?
75
+ execute(request_txt)
76
+ end
77
+
78
+ def docker_path(file)
79
+ File.join(@config.docker_shared_directory_path, File.basename(file.path))
80
+ end
81
+
82
+ def write_values_to_file(file, values, delimiter=';')
83
+ values.each { |value| file.puts(value.join(delimiter)) }
84
+ file.flush
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,82 @@
1
+ require 'rbhive'
2
+ require 'tempfile'
3
+ require 'yaml'
4
+ require 'pry'
5
+
6
+ module RSpec
7
+ module Hive
8
+ class Connector
9
+ attr_reader :result, :config
10
+
11
+ def initialize(configuration)
12
+ @config = configuration
13
+ # transport: :sasl, sasl_params: {username: 'hive', password: ''},
14
+ end
15
+
16
+ def start_connection(db_name = DbName.random_name)
17
+ connection = open_connection
18
+ connection.switch_database(db_name)
19
+ @config.hive_options.each do |key, value|
20
+ connection.execute("SET #{key}=#{value};")
21
+ end
22
+
23
+ connection
24
+
25
+ rescue Thrift::ApplicationException => e
26
+ config.logger.fatal('An exception was thrown during start connection')
27
+ config.logger.fatal(e)
28
+ stop_connection(connection)
29
+ connection
30
+ end
31
+
32
+ def stop_connection(connection)
33
+ connection.close_session if connection.session
34
+ connection.close
35
+ rescue IOError => e
36
+ config.logger.fatal('An exception was thrown during close connection')
37
+ config.logger.fatal(e)
38
+ end
39
+
40
+ def tlcli_connect
41
+ RBHive.tcli_connect(@config.host,
42
+ @config.port,
43
+ connection_options) do |connection|
44
+ yield ConnectionDelegator.new(connection, @config)
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ def log_connection_params
51
+ @config.logger.info('Opening connection.')
52
+ @config.logger.info("Connection options: #{connection_options}")
53
+ @config.logger.info("Config #{@config.inspect}")
54
+ end
55
+
56
+ def open_connection
57
+ log_connection_params
58
+
59
+ connection = RBHive::TCLIConnection.new(
60
+ @config.host,
61
+ @config.port,
62
+ connection_options
63
+ )
64
+ connection = ConnectionDelegator.new(connection, @config)
65
+
66
+ connection.open
67
+ connection.open_session
68
+ connection
69
+ end
70
+
71
+ def connection_options
72
+ {
73
+ hive_version: @config.hive_version,
74
+ transport: :sasl,
75
+ sasl_params: {},
76
+ logger: @config.logger,
77
+ timeout: @config.connection_timeout
78
+ }
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,23 @@
1
+ require 'securerandom'
2
+
3
+ module RSpec
4
+ module Hive
5
+ class DbName
6
+ class << self
7
+ def random_name
8
+ "#{timestamp}_#{random_key}"
9
+ end
10
+
11
+ private
12
+
13
+ def timestamp
14
+ Time.now.getutc.to_i.to_s
15
+ end
16
+
17
+ def random_key
18
+ SecureRandom.uuid.delete('-')
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,5 @@
1
+ module RSpec
2
+ module Hive
3
+ VERSION = '0.1.0'
4
+ end
5
+ end
@@ -0,0 +1,27 @@
1
+ module RSpec
2
+ module Hive
3
+ module WithHiveConnection
4
+ def hive
5
+ Hive.connector
6
+ end
7
+
8
+ def connection
9
+ @connection ||= hive.start_connection
10
+ end
11
+
12
+ def self.included(mod)
13
+ mod.before(:all) do
14
+ connection
15
+ end
16
+
17
+ mod.before(:each) do
18
+ connection.switch_database(DbName.random_name)
19
+ end
20
+
21
+ mod.after(:all) do
22
+ hive.stop_connection(connection) unless hive && connection
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,76 @@
1
+ require 'yaml'
2
+ require 'colorize'
3
+ require 'tmpdir'
4
+ require 'rspec/hive'
5
+
6
+ namespace :spec do
7
+ namespace :hive do
8
+ namespace :config do
9
+ desc 'Generates example config file. Accepts directory to file.'
10
+ task :generate_default do
11
+ require 'rbconfig'
12
+
13
+ default_config = RSpec::Hive::Configuration.new
14
+
15
+ default_values = {
16
+ 'hive' =>
17
+ {
18
+ 'host' => ENV['HOST'] || default_config.host,
19
+ 'port' => ENV['PORT'] || default_config.port,
20
+ 'host_shared_directory_path' =>
21
+ ENV['HOST_SHARED_DIR'] || default_config.host_shared_directory_path,
22
+ 'docker_shared_directory_path' =>
23
+ ENV['DOCKER_SHARED_DIR'] || default_config.docker_shared_directory_path,
24
+ 'hive_version' =>
25
+ ENV['HIVE_VERSION'] || default_config.hive_version
26
+ }
27
+ }
28
+ system 'mkdir', '-p', default_values['hive']['host_shared_directory_path']
29
+ file_path = File.join(
30
+ ENV['CONFIG_FILE_DIR'] || '.',
31
+ ENV['CONFIG_FILE_NAME'] || 'rspec-hive.yml'
32
+ )
33
+ File.open(file_path, 'w+') do |f|
34
+ f.write default_values.to_yaml
35
+ puts "Default config written to #{f.path}".green
36
+ end
37
+ end
38
+ end
39
+
40
+ namespace :docker do
41
+ desc 'Runs docker using hive config file.'\
42
+ ' It assumes your docker-machine is running.'
43
+ task :run do
44
+ puts 'Command `docker` not found.'.red unless system('which docker')
45
+
46
+ config_filepath = ENV['CONFIG_FILE'] || 'rspec-hive.yml'
47
+ docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
48
+ unless File.exist? config_filepath
49
+ puts "There's no config file #{config_filepath} please generate default or provide custom config.".red
50
+ raise Errno::ENOENT.new config_filepath unless File.exist? config_filepath
51
+ end
52
+
53
+ interpolated = ERB.new(File.read(config_filepath)).result
54
+ config = YAML.load(interpolated)['hive']
55
+
56
+ cmd = "docker run -v #{config['host_shared_directory_path']}:"\
57
+ "#{config['docker_shared_directory_path']}"\
58
+ " -d -p #{config['port']}:10000 #{docker_image_name}"
59
+
60
+ puts "Running `#{cmd}`...".green
61
+ system(cmd)
62
+ end
63
+
64
+ desc 'Downloads docker image from dockerhub.'
65
+ task :download_image do
66
+ puts 'Command `docker` not found.'.red unless system('which docker')
67
+
68
+ docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
69
+
70
+ cmd = "docker pull #{docker_image_name}"
71
+ puts "Running `#{cmd}`...".green
72
+ system(cmd)
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,11 @@
1
+ require 'rails'
2
+
3
+ module RSpec
4
+ module Hive
5
+ class Railtie < Rails::Railtie
6
+ rake_tasks do
7
+ load 'rspec/rake_tasks/docker.rake'
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'rspec/hive/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'rspec-hive'
8
+ spec.version = RSpec::Hive::VERSION
9
+ spec.authors = ['Wojtek Mielczarek', 'Mikołaj Nowak']
10
+ spec.email = %w(wojtek.mielczarek@u2i.com mikolaj.nowak@u2i.com)
11
+ spec.summary = 'RSpec addition to test hive queries'
12
+ spec.description = 'RSpecHive let you test your hive queries
13
+ connecting to hive instance installed on docker'
14
+ spec.homepage = 'https://github.com/u2i/ns-rspec-hive'
15
+ spec.license = 'MIT'
16
+
17
+ spec.files = `git ls-files -z`.split("\x0")
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ['lib']
21
+
22
+ spec.add_dependency 'rake', '~> 10.0'
23
+ spec.add_dependency 'colorize', '~> 0.7'
24
+
25
+ spec.add_development_dependency 'bundler', '~> 1.7'
26
+ spec.add_development_dependency 'rspec', '~> 3.4'
27
+ spec.add_development_dependency 'rspec-its', '~> 1.2'
28
+ spec.add_development_dependency 'rbhive', '~> 0.6.0'
29
+ spec.add_development_dependency 'rubocop', '~> 0.34'
30
+ spec.add_development_dependency 'rubocop-rspec', '~> 1.3'
31
+ spec.add_development_dependency 'guard', '~> 2.6'
32
+ spec.add_development_dependency 'guard-rspec', '~> 4.3'
33
+ spec.add_development_dependency 'guard-rubocop', '~> 1.2'
34
+ spec.add_development_dependency 'codeclimate-test-reporter', '~> 0.4'
35
+ spec.add_development_dependency 'pry'
36
+ spec.add_development_dependency 'pry-byebug'
37
+ end