rspec-hive 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ca84700fd2dd11920a2cf224d634f444a9e325b9
4
+ data.tar.gz: ac29aa0c169716541e7df32d8ddcfed8ed8a005e
5
+ SHA512:
6
+ metadata.gz: 5ec841bc2886f78ff101a18481ff622abf64c4ae5abb387441fab204d77327c805c2181f94201e850911cdb248ca3a06c3f1f097074abfcf70438eb2730626ac
7
+ data.tar.gz: 4cc12280d1c740e9c8b349995def32422796ac4c96bd7061d2d13ef936bc1fe3fbeea78eb138807e2c1c4751b1de26eb171927ad5c14c2ea23c63493acbdb618
data/.codeclimate.yml ADDED
@@ -0,0 +1,14 @@
1
+ ---
2
+ engines:
3
+ bundler-audit:
4
+ enabled: true
5
+ fixme:
6
+ enabled: true
7
+ rubocop:
8
+ enabled: true
9
+ ratings:
10
+ paths:
11
+ - Gemfile.lock
12
+ - "**.rb"
13
+ exclude_paths:
14
+ - spec/**/*
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ .idea
2
+ *.iml
3
+
4
+ /.bundle/
5
+ /.yardoc
6
+ /Gemfile.lock
7
+ /_yardoc/
8
+ /coverage/
9
+ /doc/
10
+ /pkg/
11
+ /spec/reports/
12
+ /tmp/
13
+ *.bundle
14
+ *.so
15
+ *.o
16
+ *.a
17
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,11 @@
1
+ inherit_from: .rubocop_u2i.yml
2
+
3
+ AllCops:
4
+ Exclude:
5
+ - spec/spec_helper.rb
6
+
7
+ Metrics/LineLength:
8
+ Max: 80
9
+ Enabled: true
10
+ Exclude:
11
+ - spec/**/*
data/.rubocop_u2i.yml ADDED
@@ -0,0 +1,55 @@
1
+ require: rubocop-rspec
2
+
3
+ AllCops:
4
+ RunRailsCops: true
5
+ Include:
6
+ - '**/Rakefile'
7
+ - config.ru
8
+ Exclude:
9
+ - bin/**/*
10
+ - db/**/*
11
+ - config/**/*
12
+ - script/**/*
13
+ - db/schema.rb
14
+
15
+ Style/MultilineOperationIndentation:
16
+ EnforcedStyle: indented
17
+
18
+ Style/Documentation:
19
+ Enabled: false
20
+
21
+ Style/WhileUntilModifier:
22
+ MaxLineLength: 120
23
+ Exclude:
24
+ - spec/**/*
25
+
26
+ Style/SpaceInsideHashLiteralBraces:
27
+ EnforcedStyle: no_space
28
+ EnforcedStyleForEmptyBraces: no_space
29
+
30
+ Style/MultilineBlockChain:
31
+ Enabled: true
32
+
33
+ Style/DotPosition:
34
+ EnforcedStyle: trailing
35
+ Enabled: true
36
+
37
+ Style/SingleLineMethods:
38
+ AllowIfMethodIsEmpty: false
39
+
40
+ Style/NumericLiterals:
41
+ Enabled: false
42
+
43
+ Metrics/LineLength:
44
+ Max: 120
45
+ Enabled: true
46
+ Exclude:
47
+ - spec/**/*
48
+
49
+ Metrics/MethodLength:
50
+ CountComments: false # count full line comments?
51
+ Max: 30
52
+
53
+ RSpec/FilePath:
54
+ Exclude:
55
+ - spec/routing/**/*_routing_spec.rb
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-2.1.5
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,15 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ group :red_green_refactor, halt_on_fail: true do
5
+ guard :rspec, cmd: 'bundle exec rspec' do
6
+ watch(%r{^spec/.+_spec\.rb$})
7
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
8
+ watch('spec/spec_helper.rb') { 'spec' }
9
+ end
10
+
11
+ guard :rubocop do
12
+ watch(%r{.+\.rb$})
13
+ watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
14
+ end
15
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Wojtek Mielczarek
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,123 @@
1
+ [![Code Climate](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/badges/85abbc07acb75f664185/gpa.svg)](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/feed)
2
+ [![Test Coverage](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/badges/85abbc07acb75f664185/coverage.svg)](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/coverage)
3
+ [![Issue Count](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/badges/85abbc07acb75f664185/issue_count.svg)](https://codeclimate.com/repos/567b03d7bd3f3b2512002248/feed)
4
+
5
+ # rpsec-hive
6
+
7
+ rspec-hive is a utility gem to help you write beautiful rspec tests for hive queries. The idea is simple - you just launch a docker machine with hadoop and hive installed. To test a query you create a simple RSpec file and extend it with `RSpec::Hive::WithHiveConnection`.
8
+
9
+ We have prepared a few simple rake tasks that will let you create sample config file, download correct docker image and run docker container with proper parameters.
10
+
11
+ ## Installation
12
+
13
+ Add this line to your application's Gemfile:
14
+
15
+ ```ruby
16
+ gem 'rspec-hive'
17
+ ```
18
+
19
+ And then execute:
20
+
21
+ $ bundle
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install rspec-hive
26
+
27
+ ### Configuring tests
28
+
29
+ #### Config file
30
+
31
+ To run tests on docker you will need a configurtion file that will let you put up a docker container and maintain your connection to this container from tests. You can do this manually and provide just a path to file, but we have also prepared special rake tasks to help you out. Try running:
32
+
33
+ $ rake spec:hive:config:generate_default
34
+
35
+ It will create `rspec-hive.yml` in your current directory. You can of course pass some parameters to this rake task doing something like:
36
+
37
+ $ rake spec:hive:config:generate_default HOST=127.0.0.1 PORT=5032
38
+
39
+ You can specify following arguments:
40
+ * HOST - ip of docker container
41
+ * PORT - port to connect to docker
42
+ * HOST_SHARED_DIR - directory on your local machine that docker will share
43
+ * DOCKER_SHARED_DIR - directory on your docker container that will be shared with your local machine
44
+ * HIVE_VERSION - version of hive
45
+ * CONFIG_FILE_DIR - directory where to put generated config file
46
+ * CONFIG_FILE_NAME - name of the config file that will be generated
47
+
48
+ #### Installing Docker
49
+ Detailed instruction may be found on https://docs.docker.com/engine/installation.
50
+
51
+ Once docker is sucessfully installed on your machine you can verify if it works by using `docker` command.
52
+ In case of error such as `Cannot connect to the Docker daemon. Is the docker daemon running on this host?` make sure you added your user to the docker group, you can do this using `sudo usermod -aG docker username` on Linux or `eval "$(docker-machine env default)"` on OSX.
53
+
54
+ On Linux you can run the docker daemon by using:
55
+ `sudo docker daemon -D -g /mnt`
56
+
57
+ #### Docker image
58
+ Once you have generated a config file you should download to your local machine proper docker image. You can create your own docker image. However if you would like to use ours just run:
59
+
60
+ $ rake spec:hive:docker:download_image
61
+
62
+ It will download `nielsensocial/hive` from [dockerhub](https://hub.docker.com/r/nielsensocial/hive/).
63
+ You can change Docker's storage base directory (where container and images go) using the -goption when starting the Docker daemon.
64
+ If you have another image you can also use this rake task and provide special argument:
65
+ * DOCKER_IMAGE_NAME - image name that should be pulled
66
+
67
+
68
+ #### Running docker container
69
+ You should now be ready to run your docker container. To do this run:
70
+
71
+ $ rake spec:hive:docker:run
72
+
73
+ This command will run docker container using default config `rspec-hive.yml` and default docker image `nielsensocial/hive`. You can pass arguments like:
74
+ * CONFIG_FILE - name of config file to use
75
+ * DOCKER_IMAGE_NAME - docker image to use
76
+
77
+ You are ready now to run your tests.
78
+
79
+ #### Docker utils
80
+
81
+ To check container id
82
+
83
+ `$ docker ps`
84
+
85
+ To attach to output of hive
86
+
87
+ `$ docker attach <docker-container-id>`
88
+
89
+ To run bash terminal on docker
90
+
91
+ `$ docker exec -it <docker-container-id> bash`
92
+
93
+ #### Hive utils
94
+
95
+ When you are on hive and you have set up `JAVA_HOME` and `HADOOP_HOME` directories you might find usefull the tool named beeline. It should be present in your hive directory in `bin` folder (if you are using ours `nielsensocial/hive` when you run bash terminal on docker container this directory could be entered by calling `cd $HIVE_HOME/bin`). There you can run:
96
+
97
+ $ ./beeline
98
+
99
+ And in the presented console connect by jdbc to hive:
100
+
101
+ beeline> !connect jdbc:hive2://localhost:10000 org.apache.hive.jdbc.HiveDriver
102
+
103
+ ## Usage
104
+
105
+ In `examples/` directory we have prepared a simple query. It is available in `query_spec.rb` file. Notice how we configure `rspec-hive` by using:
106
+
107
+ require_relative 'config_helper'
108
+
109
+ Where we invoke:
110
+
111
+ RSpec::Hive.configure(File.join(__dir__, '/config.yml'))
112
+
113
+ ## Note
114
+
115
+ Please remember docker does not remove containers automatically, use `docker ps -a` to list all unused containers.
116
+
117
+ ## Contributing
118
+
119
+ 1. Fork it ( https://github.com/[my-github-username]/rspec-hive/fork )
120
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
121
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
122
+ 4. Push to the branch (`git push origin my-new-feature`)
123
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ require 'bundler/gem_tasks'
2
+ load 'lib/rspec/rake_tasks/docker.rake'
3
+
4
+ begin
5
+ require 'rspec/core/rake_task'
6
+
7
+ RSpec::Core::RakeTask.new(:spec)
8
+
9
+ RSpec::Core::RakeTask.new(:hive_spec) do |t|
10
+ t.pattern = 'examples/**/*_spec.rb'
11
+ end
12
+ rescue LoadError
13
+ puts 'Cannot load rspec Rake tasks'
14
+ end
15
+
16
+ begin
17
+ require 'rubocop/rake_task'
18
+ RuboCop::RakeTask.new(:rubocop)
19
+ rescue LoadError
20
+ puts 'Cannot load RuboCop Rake tasks'
21
+ end
22
+
23
+ task default: [:spec, :rubocop]
data/docker/Dockerfile ADDED
@@ -0,0 +1,47 @@
1
+ FROM ubuntu:latest
2
+ RUN apt-get update \
3
+ && apt-get install -y wget \
4
+ && rm -rf /var/lib/apt/lists/*
5
+
6
+ RUN echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections
7
+
8
+ RUN apt-get update
9
+ RUN apt-get install -y software-properties-common
10
+ RUN add-apt-repository ppa:webupd8team/java
11
+
12
+ RUN apt-get update
13
+ RUN apt-get install -y oracle-java8-installer
14
+ RUN apt-get install -y oracle-java8-set-default
15
+
16
+ ENV SHARED_FOLDER /tmp/spec-tmp-files/
17
+ ENV TMP_ROOT /tmp/root/
18
+ ENV WAREHOUSE /tmp/warehouse/
19
+
20
+ ENV HADOOP_BIN $HOME/hadoop-bin/hadoop-1.2.1/
21
+ ENV HIVE_BIN $HOME/hive-bin/hive-0.11/
22
+
23
+ ENV HADOOP_HOME $HOME/hadoop/
24
+ ENV HIVE_HOME $HOME/hive/
25
+
26
+ ENV JAVA_HOME /usr/
27
+
28
+ RUN mkdir -p $HADOOP_BIN \
29
+ && mkdir -p $HIVE_BIN \
30
+ && mkdir -p $HADOOP_HOME \
31
+ && mkdir -p $HIVE_HOME \
32
+ && mkdir -p $SHARED_FOLDER \
33
+ && mkdir -p $TMP_ROOT \
34
+ && mkdir -p $WAREHOUSE
35
+
36
+ RUN wget https://www.apache.org/dist/hadoop/core/hadoop-1.2.1/hadoop-1.2.1-bin.tar.gz -O $HADOOP_BIN/hadoop.tar.gz
37
+ RUN wget https://archive.apache.org/dist/hive/hive-0.11.0/hive-0.11.0-bin.tar.gz -O $HIVE_BIN/hive.tar.gz
38
+
39
+ RUN tar -xzf $HADOOP_BIN/hadoop.tar.gz --strip-components=1 -C $HADOOP_HOME
40
+ RUN tar -xzf $HIVE_BIN/hive.tar.gz --strip-components=1 -C $HIVE_HOME
41
+
42
+ CMD $HIVE_HOME/bin/hive \
43
+ --service hiveserver2 \
44
+ --hiveconf hive.server2.enable.doAs=false \
45
+ --hiveconf hive.server2.thrift.port=10000 \
46
+ --hiveconf hive.root.logger=INFO,console \
47
+ --hiveconf hive.metastore.warehouse.dir=$WAREHOUSE
@@ -0,0 +1 @@
1
+ RSpec::Hive.configure(File.join(__dir__, 'config.yml'))
@@ -0,0 +1,6 @@
1
+ hive:
2
+ host: 192.168.99.100
3
+ port: 10000
4
+ host_shared_directory_path: /Users/Shared/tmp/spec-tmp-files
5
+ docker_shared_directory_path: /tmp/spec-tmp-files
6
+ hive_version: 10
data/examples/query.rb ADDED
@@ -0,0 +1,18 @@
1
+ class Query
2
+ def table_schema
3
+ RBHive::TableSchema.new('people', nil, line_sep: '\n', field_sep: ';') do
4
+ column :name, :string
5
+ column :address, :string
6
+ column :amount, :float
7
+ end
8
+ end
9
+
10
+ def table_name
11
+ table_schema.name
12
+ end
13
+
14
+ def run_hive_query(connection)
15
+ query = "SELECT * FROM `#{table_schema.name}` WHERE `amount` > 2.5"
16
+ connection.execute(query)
17
+ end
18
+ end
@@ -0,0 +1,41 @@
1
+ require 'spec_helper'
2
+ require_relative 'query'
3
+ require_relative 'config_helper'
4
+
5
+ describe Query do
6
+ include RSpec::Hive::WithHiveConnection
7
+
8
+ subject { described_class.new }
9
+
10
+ describe 'hive query' do
11
+ let(:input_data) do
12
+ [
13
+ ['Mikolaj', 'Cos', 1.23],
14
+ ['Wojtek', 'Cos', 3.76]
15
+ ]
16
+ end
17
+
18
+ before do
19
+ connection.execute(subject.table_schema.create_table_statement)
20
+ connection.load_into_table(subject.table_schema, input_data)
21
+ end
22
+
23
+ it 'query returns one row' do
24
+ query = "SELECT * FROM `#{subject.table_name}` WHERE amount > 3.2"
25
+ query_result = connection.fetch(query).first.values
26
+ expect(query_result).to contain_exactly(
27
+ a_string_matching('Wojtek'),
28
+ a_string_matching('Cos'),
29
+ a_string_matching(/3\.7.*/))
30
+ end
31
+
32
+ it 'query returns one row 2' do
33
+ query = "SELECT * FROM `#{subject.table_name}` WHERE amount < 3.2"
34
+ query_result = connection.fetch(query).first.values
35
+ expect(query_result).to contain_exactly(
36
+ a_string_matching('Mikolaj'),
37
+ a_string_matching('Cos'),
38
+ a_string_matching(/1\.2.*/))
39
+ end
40
+ end
41
+ end
data/lib/rspec/hive.rb ADDED
@@ -0,0 +1,31 @@
1
+ require 'rspec/hive/version'
2
+ require 'rspec/hive/db_name'
3
+ require 'rspec/hive/configuration'
4
+ require 'rspec/hive/connection_delegator'
5
+ require 'rspec/hive/connector'
6
+ require 'rspec/hive/with_hive_connection'
7
+
8
+ module RSpec
9
+ module Hive
10
+ attr_reader :configuration
11
+
12
+ def self.configure(file_name = nil)
13
+ @configuration = new_configuration(file_name)
14
+ yield(@configuration) if block_given?
15
+ @configuration
16
+ end
17
+
18
+ def self.connector
19
+ @configuration ||= Configuration.new
20
+ Connector.new(@configuration)
21
+ end
22
+
23
+ def self.new_configuration(file_name)
24
+ Configuration.new(file_name)
25
+ end
26
+
27
+ private_class_method :new_configuration
28
+ end
29
+ end
30
+
31
+ require 'rspec/rake_tasks/railtie' if defined?(Rails)