rspec-hive 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,86 @@
1
+ require 'tmpdir'
2
+
3
+ module RSpec
4
+ module Hive
5
+ class Configuration
6
+ attr_accessor :host,
7
+ :port,
8
+ :host_shared_directory_path,
9
+ :docker_shared_directory_path,
10
+ :logger,
11
+ :hive_version,
12
+ :connection_timeout,
13
+ :hive_options
14
+
15
+ def initialize(path_to_config_file = nil)
16
+ if path_to_config_file.nil?
17
+ load_default_variables
18
+ else
19
+ interpolated = ERB.new(File.read(path_to_config_file)).result
20
+ config = YAML.load(interpolated)['hive']
21
+ load_variables_from_config(config)
22
+ end
23
+ @logger = Logger.new(STDOUT)
24
+ end
25
+
26
+ private
27
+
28
+ def load_default_variables
29
+ @host = platform_specific_host
30
+ @port = 10000
31
+ @host_shared_directory_path = platform_specific_host_shared_dir_path
32
+ @docker_shared_directory_path = '/tmp/spec-tmp-files'
33
+ @hive_version = default_version
34
+ @connection_timeout = default_timeout
35
+ @hive_options = default_hive_options
36
+ end
37
+
38
+ def load_variables_from_config(config)
39
+ @host = config['host']
40
+ @port = config['port']
41
+ @host_shared_directory_path = config['host_shared_directory_path']
42
+ @docker_shared_directory_path = config['docker_shared_directory_path']
43
+ @hive_version = (config['hive_version'] || default_version).to_i
44
+ @connection_timeout = (config['timeout'] || default_timeout).to_i
45
+ @hive_options = merge_config_options(default_hive_options, config)
46
+ end
47
+
48
+ def merge_config_options(hash, config)
49
+ hash.merge(config['hive_options'].to_h)
50
+ end
51
+
52
+ def mac?
53
+ host_os = RbConfig::CONFIG['host_os']
54
+ host_os =~ /darwin|mac os/
55
+ end
56
+
57
+ def platform_specific_host
58
+ mac? ? '192.168.99.100' : '127.0.0.1'
59
+ end
60
+
61
+ def platform_specific_host_shared_dir_path
62
+ if mac?
63
+ File.join(Dir.mktmpdir(nil, '/Users/Shared'), 'spec-tmp-files')
64
+ else
65
+ File.join(Dir.mktmpdir, 'spec-tmp-files')
66
+ end
67
+ end
68
+
69
+ def default_timeout
70
+ 1800
71
+ end
72
+
73
+ def default_version
74
+ 10
75
+ end
76
+
77
+ def default_hive_options
78
+ {'hive.exec.dynamic.partition' => 'true',
79
+ 'hive.exec.dynamic.partition.mode' => 'nonstrict',
80
+ 'hive.exec.max.dynamic.partitions.pernodexi' => '100000',
81
+ 'hive.exec.max.dynamic.partitions' => '100000',
82
+ 'mapred.child.java.opts' => '-Xmx2048m'}
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,88 @@
1
+ require 'delegate'
2
+ require 'tempfile'
3
+
4
+ module RSpec
5
+ module Hive
6
+ class ConnectionDelegator < SimpleDelegator
7
+ def initialize(connection, config)
8
+ super(connection)
9
+ @config = config
10
+ end
11
+
12
+ def create_table(table_schema)
13
+ table_schema = table_schema.dup
14
+ table_schema.instance_variable_set(:@location, nil)
15
+ execute(table_schema.create_table_statement)
16
+ end
17
+
18
+ def load_partitions(table_name, partitions)
19
+ partitions = partition_clause(partitions)
20
+ query = "ALTER TABLE #{table_name} ADD #{partitions}"
21
+ execute(query)
22
+ end
23
+
24
+ def load_into_table(table_schema, values, partitions = nil)
25
+ table_name = table_schema.name
26
+ Tempfile.open(table_name, @config.host_shared_directory_path) do |file|
27
+ write_values_to_file(file, values, table_schema.instance_variable_get(:@field_sep))
28
+ partition_query = partition_clause(partitions) if partitions
29
+ load_file_to_hive_table(table_name, docker_path(file), partition_query)
30
+ end
31
+ end
32
+
33
+ def show_tables
34
+ fetch('SHOW TABLES')
35
+ end
36
+
37
+ def create_database(name)
38
+ execute("CREATE DATABASE IF NOT EXISTS `#{name}`")
39
+ end
40
+
41
+ def use_database(name)
42
+ execute("USE `#{name}`")
43
+ end
44
+
45
+ def drop_database(name)
46
+ execute("DROP DATABASE `#{name}`")
47
+ end
48
+
49
+ def show_databases
50
+ fetch('SHOW DATABASES')
51
+ end
52
+
53
+ def switch_database(db_name)
54
+ create_database(db_name)
55
+ use_database(db_name)
56
+ end
57
+
58
+ private
59
+
60
+ def partition_clause(partitions)
61
+ if partitions.is_a?(Array)
62
+ partitions.collect { |x| to_partition_clause(x) }.join(' ')
63
+ else
64
+ to_partition_clause(partitions)
65
+ end
66
+ end
67
+
68
+ def to_partition_clause(partition)
69
+ "PARTITION(#{partition.map { |k, v| "#{k}='#{v}'" }.join(',')})"
70
+ end
71
+
72
+ def load_file_to_hive_table(table_name, path, partition_clause = nil)
73
+ request_txt = "load data local inpath '#{path}' into table #{table_name}"
74
+ request_txt << " #{partition_clause}" unless partition_clause.nil?
75
+ execute(request_txt)
76
+ end
77
+
78
+ def docker_path(file)
79
+ File.join(@config.docker_shared_directory_path, File.basename(file.path))
80
+ end
81
+
82
+ def write_values_to_file(file, values, delimiter=';')
83
+ values.each { |value| file.puts(value.join(delimiter)) }
84
+ file.flush
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,82 @@
1
+ require 'rbhive'
2
+ require 'tempfile'
3
+ require 'yaml'
4
+ require 'pry'
5
+
6
+ module RSpec
7
+ module Hive
8
+ class Connector
9
+ attr_reader :result, :config
10
+
11
+ def initialize(configuration)
12
+ @config = configuration
13
+ # transport: :sasl, sasl_params: {username: 'hive', password: ''},
14
+ end
15
+
16
+ def start_connection(db_name = DbName.random_name)
17
+ connection = open_connection
18
+ connection.switch_database(db_name)
19
+ @config.hive_options.each do |key, value|
20
+ connection.execute("SET #{key}=#{value};")
21
+ end
22
+
23
+ connection
24
+
25
+ rescue Thrift::ApplicationException => e
26
+ config.logger.fatal('An exception was thrown during start connection')
27
+ config.logger.fatal(e)
28
+ stop_connection(connection)
29
+ connection
30
+ end
31
+
32
+ def stop_connection(connection)
33
+ connection.close_session if connection.session
34
+ connection.close
35
+ rescue IOError => e
36
+ config.logger.fatal('An exception was thrown during close connection')
37
+ config.logger.fatal(e)
38
+ end
39
+
40
+ def tlcli_connect
41
+ RBHive.tcli_connect(@config.host,
42
+ @config.port,
43
+ connection_options) do |connection|
44
+ yield ConnectionDelegator.new(connection, @config)
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ def log_connection_params
51
+ @config.logger.info('Opening connection.')
52
+ @config.logger.info("Connection options: #{connection_options}")
53
+ @config.logger.info("Config #{@config.inspect}")
54
+ end
55
+
56
+ def open_connection
57
+ log_connection_params
58
+
59
+ connection = RBHive::TCLIConnection.new(
60
+ @config.host,
61
+ @config.port,
62
+ connection_options
63
+ )
64
+ connection = ConnectionDelegator.new(connection, @config)
65
+
66
+ connection.open
67
+ connection.open_session
68
+ connection
69
+ end
70
+
71
+ def connection_options
72
+ {
73
+ hive_version: @config.hive_version,
74
+ transport: :sasl,
75
+ sasl_params: {},
76
+ logger: @config.logger,
77
+ timeout: @config.connection_timeout
78
+ }
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,23 @@
1
+ require 'securerandom'
2
+
3
+ module RSpec
4
+ module Hive
5
+ class DbName
6
+ class << self
7
+ def random_name
8
+ "#{timestamp}_#{random_key}"
9
+ end
10
+
11
+ private
12
+
13
+ def timestamp
14
+ Time.now.getutc.to_i.to_s
15
+ end
16
+
17
+ def random_key
18
+ SecureRandom.uuid.delete('-')
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,5 @@
1
+ module RSpec
2
+ module Hive
3
+ VERSION = '0.1.0'
4
+ end
5
+ end
@@ -0,0 +1,27 @@
1
+ module RSpec
2
+ module Hive
3
+ module WithHiveConnection
4
+ def hive
5
+ Hive.connector
6
+ end
7
+
8
+ def connection
9
+ @connection ||= hive.start_connection
10
+ end
11
+
12
+ def self.included(mod)
13
+ mod.before(:all) do
14
+ connection
15
+ end
16
+
17
+ mod.before(:each) do
18
+ connection.switch_database(DbName.random_name)
19
+ end
20
+
21
+ mod.after(:all) do
22
+ hive.stop_connection(connection) unless hive && connection
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,76 @@
1
+ require 'yaml'
2
+ require 'colorize'
3
+ require 'tmpdir'
4
+ require 'rspec/hive'
5
+
6
+ namespace :spec do
7
+ namespace :hive do
8
+ namespace :config do
9
+ desc 'Generates example config file. Accepts directory to file.'
10
+ task :generate_default do
11
+ require 'rbconfig'
12
+
13
+ default_config = RSpec::Hive::Configuration.new
14
+
15
+ default_values = {
16
+ 'hive' =>
17
+ {
18
+ 'host' => ENV['HOST'] || default_config.host,
19
+ 'port' => ENV['PORT'] || default_config.port,
20
+ 'host_shared_directory_path' =>
21
+ ENV['HOST_SHARED_DIR'] || default_config.host_shared_directory_path,
22
+ 'docker_shared_directory_path' =>
23
+ ENV['DOCKER_SHARED_DIR'] || default_config.docker_shared_directory_path,
24
+ 'hive_version' =>
25
+ ENV['HIVE_VERSION'] || default_config.hive_version
26
+ }
27
+ }
28
+ system 'mkdir', '-p', default_values['hive']['host_shared_directory_path']
29
+ file_path = File.join(
30
+ ENV['CONFIG_FILE_DIR'] || '.',
31
+ ENV['CONFIG_FILE_NAME'] || 'rspec-hive.yml'
32
+ )
33
+ File.open(file_path, 'w+') do |f|
34
+ f.write default_values.to_yaml
35
+ puts "Default config written to #{f.path}".green
36
+ end
37
+ end
38
+ end
39
+
40
+ namespace :docker do
41
+ desc 'Runs docker using hive config file.'\
42
+ ' It assumes your docker-machine is running.'
43
+ task :run do
44
+ puts 'Command `docker` not found.'.red unless system('which docker')
45
+
46
+ config_filepath = ENV['CONFIG_FILE'] || 'rspec-hive.yml'
47
+ docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
48
+ unless File.exist? config_filepath
49
+ puts "There's no config file #{config_filepath} please generate default or provide custom config.".red
50
+ raise Errno::ENOENT.new config_filepath unless File.exist? config_filepath
51
+ end
52
+
53
+ interpolated = ERB.new(File.read(config_filepath)).result
54
+ config = YAML.load(interpolated)['hive']
55
+
56
+ cmd = "docker run -v #{config['host_shared_directory_path']}:"\
57
+ "#{config['docker_shared_directory_path']}"\
58
+ " -d -p #{config['port']}:10000 #{docker_image_name}"
59
+
60
+ puts "Running `#{cmd}`...".green
61
+ system(cmd)
62
+ end
63
+
64
+ desc 'Downloads docker image from dockerhub.'
65
+ task :download_image do
66
+ puts 'Command `docker` not found.'.red unless system('which docker')
67
+
68
+ docker_image_name = ENV['DOCKER_IMAGE_NAME'] || 'nielsensocial/hive'
69
+
70
+ cmd = "docker pull #{docker_image_name}"
71
+ puts "Running `#{cmd}`...".green
72
+ system(cmd)
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,11 @@
1
+ require 'rails'
2
+
3
+ module RSpec
4
+ module Hive
5
+ class Railtie < Rails::Railtie
6
+ rake_tasks do
7
+ load 'rspec/rake_tasks/docker.rake'
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'rspec/hive/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'rspec-hive'
8
+ spec.version = RSpec::Hive::VERSION
9
+ spec.authors = ['Wojtek Mielczarek', 'Mikołaj Nowak']
10
+ spec.email = %w(wojtek.mielczarek@u2i.com mikolaj.nowak@u2i.com)
11
+ spec.summary = 'RSpec addition to test hive queries'
12
+ spec.description = 'RSpecHive let you test your hive queries
13
+ connecting to hive instance installed on docker'
14
+ spec.homepage = 'https://github.com/u2i/ns-rspec-hive'
15
+ spec.license = 'MIT'
16
+
17
+ spec.files = `git ls-files -z`.split("\x0")
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ['lib']
21
+
22
+ spec.add_dependency 'rake', '~> 10.0'
23
+ spec.add_dependency 'colorize', '~> 0.7'
24
+
25
+ spec.add_development_dependency 'bundler', '~> 1.7'
26
+ spec.add_development_dependency 'rspec', '~> 3.4'
27
+ spec.add_development_dependency 'rspec-its', '~> 1.2'
28
+ spec.add_development_dependency 'rbhive', '~> 0.6.0'
29
+ spec.add_development_dependency 'rubocop', '~> 0.34'
30
+ spec.add_development_dependency 'rubocop-rspec', '~> 1.3'
31
+ spec.add_development_dependency 'guard', '~> 2.6'
32
+ spec.add_development_dependency 'guard-rspec', '~> 4.3'
33
+ spec.add_development_dependency 'guard-rubocop', '~> 1.2'
34
+ spec.add_development_dependency 'codeclimate-test-reporter', '~> 0.4'
35
+ spec.add_development_dependency 'pry'
36
+ spec.add_development_dependency 'pry-byebug'
37
+ end