hadupils 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.md CHANGED
@@ -44,3 +44,15 @@
44
44
  Helper method for assembling gzipped archives the contents of which
45
45
  are hive-ext compatible.
46
46
 
47
+ ### 0.5.0
48
+ * Introduced a hadoop command enforcing user config and hadoop-ext
49
+ configuration files (hadoop.xml) that calls the hadoop runner
50
+ * Introduced the following commands and runners that utilize the hadoop
51
+ runner (to work against the DFS): mktemp, withtmpdir and rm
52
+ * Support for hadoop_confs in Hadupils::Extensions
53
+ * Introduced Hadupils::Extensions::Dfs::TmpFile
54
+ * Introduced Hadupils::Hacks module for String Refinements (self.randcase)
55
+ for Ruby 2+ and Monkey Patching for the String class for Ruby < 2.0
56
+ * Some refactoring and fixed a bug with the specs for Mac OS X
57
+ * Tweaked old unit tests and added new ones for the new features
58
+ * Updated the README with examples
data/Gemfile CHANGED
@@ -1,6 +1,13 @@
1
- source :rubygems
1
+ source 'https://rubygems.org'
2
2
 
3
- gem 'mocha', :group => :test
4
- gem 'rake', '10.1.0', :group => :development
5
- gem 'shoulda-context', :group => :test
3
+ gem 'uuid', '~> 2.3.0'
6
4
 
5
+ group :development do
6
+ gem 'rake', '~> 10.1.0'
7
+ gem 'pry', '~> 0.9.0'
8
+ end
9
+
10
+ group :test do
11
+ gem 'mocha', :group => :test
12
+ gem 'shoulda-context', :group => :test
13
+ end
data/Gemfile.lock CHANGED
@@ -1,16 +1,30 @@
1
1
  GEM
2
- remote: http://rubygems.org/
2
+ remote: https://rubygems.org/
3
3
  specs:
4
+ coderay (1.0.9)
5
+ macaddr (1.6.1)
6
+ systemu (~> 2.5.0)
4
7
  metaclass (0.0.1)
8
+ method_source (0.8.1)
5
9
  mocha (0.14.0)
6
10
  metaclass (~> 0.0.1)
11
+ pry (0.9.12)
12
+ coderay (~> 1.0.5)
13
+ method_source (~> 0.8)
14
+ slop (~> 3.4)
7
15
  rake (10.1.0)
8
16
  shoulda-context (1.1.5)
17
+ slop (3.4.4)
18
+ systemu (2.5.2)
19
+ uuid (2.3.7)
20
+ macaddr (~> 1.0)
9
21
 
10
22
  PLATFORMS
11
23
  ruby
12
24
 
13
25
  DEPENDENCIES
14
26
  mocha
15
- rake (= 10.1.0)
27
+ pry (~> 0.9.0)
28
+ rake (~> 10.1.0)
16
29
  shoulda-context
30
+ uuid (~> 2.3.0)
data/README.md CHANGED
@@ -2,3 +2,28 @@ hadupils
2
2
  ========
3
3
 
4
4
  Operating environment oriented utilities for hadoop (Hadoop + Utils => hadupils)
5
+
6
+ ## Shell Environment Variables
7
+ - $HADUPILS_BASE_TMP_PATH
8
+ * This is the base path for DFS temporary file/directory creation
9
+ * Defaults to '/tmp' on the DFS (only set this if you need another base directory)
10
+ - $HADUPILS_TMPDIR_PATH
11
+ * Set when the subcommand is executed in a subshell via the hadupils 'withtmpdir' command
12
+ * The value comes from the tmp directory that hadupils created for the subcommand
13
+ * It will cleanup (remove) the directory if the subcommand returns an exitstatus of zero
14
+
15
+ ## Hadpuils' Commands
16
+ - hive __command__ _options_
17
+ - hadoop __command__ _options_
18
+ - mktemp [-d]
19
+ - withtmpdir __subshell_command__
20
+ - rm [-r] __full_path_to_file_or_directory__
21
+
22
+ ### Example Usages
23
+ ``` shell
24
+ hadupils hive -e 'select a.col from tab1 a'
25
+ hadupils hadoop fs -ls /tmp
26
+ hadupils mktemp -d
27
+ hadupils withtmpdir 'echo $HADUPILS_TMPDIR_PATH'
28
+ hadupils rm -r /tmp/hadupils-tmp-e341afe01721013128c122000af92329
29
+ ```
data/bin/hadupils CHANGED
@@ -5,4 +5,3 @@
5
5
  require 'hadupils'
6
6
 
7
7
  exit Hadupils::Commands.run ARGV[0], ARGV[1..-1]
8
-
@@ -1,5 +1,5 @@
1
1
  module Hadupils::Commands
2
- def self.run(command, params)
2
+ def self.run(command, params=[])
3
3
  handler = handler_for command
4
4
  handler.run params
5
5
  end
@@ -18,9 +18,13 @@ module Hadupils::Commands
18
18
  end
19
19
 
20
20
  class SimpleCommand
21
- def self.run(params)
21
+ def self.run(params=[])
22
22
  self.new.run params
23
23
  end
24
+
25
+ def successful?(exitstatus)
26
+ exitstatus == 0
27
+ end
24
28
  end
25
29
 
26
30
  module HadoopExt
@@ -29,18 +33,42 @@ module Hadupils::Commands
29
33
  end
30
34
  end
31
35
 
36
+ module HiveExt
37
+ def hive_ext
38
+ @hive_ext ||= Hadupils::Extensions::HiveSet.new(Hadupils::Search.hive_extensions)
39
+ end
40
+ end
41
+
32
42
  module UserConf
33
43
  def user_config
34
44
  @user_config ||= Hadupils::Extensions::Static.new(Hadupils::Search.user_config)
35
45
  end
36
46
  end
37
47
 
38
- module HiveExt
39
- def hive_ext
40
- @hive_ext ||= Hadupils::Extensions::HiveSet.new(Hadupils::Search.hive_extensions)
48
+ class Hadoop < SimpleCommand
49
+ include HadoopExt
50
+ include UserConf
51
+
52
+ def assemble_parameters(parameters)
53
+ @hadoop_ext = Hadupils::Extensions::Static.new(Hadupils::Search.hadoop_assets)
54
+ hadoop_cmd = parameters[0...1]
55
+ hadoop_cmd_opts = parameters[1..-1] || []
56
+
57
+ if %w(fs dfs).include? parameters[0]
58
+ hadoop_cmd + user_config.hadoop_confs + hadoop_ext.hadoop_confs + hadoop_cmd_opts
59
+ else
60
+ # TODO: Assemble command line parameters to pkg assets/code for submitting jobs, for i.e.
61
+ hadoop_cmd + user_config.hadoop_confs + hadoop_ext.hadoop_confs + hadoop_cmd_opts
62
+ end
63
+ end
64
+
65
+ def run(parameters)
66
+ Hadupils::Runners::Hadoop.run assemble_parameters(parameters)
41
67
  end
42
68
  end
43
69
 
70
+ register_handler :hadoop, Hadoop
71
+
44
72
  class Hive < SimpleCommand
45
73
  include HadoopExt
46
74
  include HiveExt
@@ -56,4 +84,88 @@ module Hadupils::Commands
56
84
  end
57
85
 
58
86
  register_handler :hive, Hive
87
+
88
+ class MkTmpFile < SimpleCommand
89
+ def run(parameters)
90
+ # Creates a new tmpdir and puts the full tmpdir_path to STDOUT
91
+ Hadupils::Extensions::Dfs::TmpFile.reset_tmpfile!
92
+ tmpdir_path = Hadupils::Extensions::Dfs::TmpFile.tmpfile_path
93
+
94
+ # Similar to shell mktemp, but for Hadoop DFS!
95
+ # Makes a tmp file by default; a tmp directory with '-d' flag
96
+ fs_cmd = parameters[0] == '-d' ? '-mkdir' : '-touchz'
97
+ exitstatus = Hadupils::Commands::Hadoop.run ['fs', fs_cmd, tmpdir_path]
98
+ if successful? exitstatus
99
+ exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-chmod', '700', tmpdir_path]
100
+ if successful? exitstatus
101
+ puts tmpdir_path
102
+ else
103
+ $stderr.puts "Failed to chmod 700 dfs tmpdir: #{tmpdir_path}"
104
+ end
105
+ else
106
+ $stderr.puts "Failed creating dfs tmpdir: #{tmpdir_path}"
107
+ end
108
+ exitstatus
109
+ end
110
+ end
111
+
112
+ register_handler :mktemp, MkTmpFile
113
+
114
+ class RmFile < SimpleCommand
115
+ def run(parameters)
116
+ # Similar to shell rm, but for Hadoop DFS!
117
+ # Removes files by default; removes directories recursively with '-r' flag
118
+ fs_cmd, tmp_dirs =
119
+ if parameters[0] == '-r'
120
+ ['-rmr', parameters[1..-1]]
121
+ else
122
+ ['-rm', parameters[0..-1]]
123
+ end
124
+
125
+ if tmp_dirs.empty?
126
+ $stderr.puts 'Failed to remove unspecified tmpdir(s), please specify tmpdir_path'
127
+ 255
128
+ else
129
+ exitstatus = Hadupils::Commands::Hadoop.run ['fs', fs_cmd, tmp_dirs].flatten
130
+ if successful? exitstatus
131
+ Hadupils::Extensions::Dfs::TmpFile.reset_tmpfile!
132
+ else
133
+ $stderr.puts "Failed to remove dfs tmpdir: #{tmp_dirs.join(' ')}"
134
+ end
135
+ exitstatus
136
+ end
137
+ end
138
+ end
139
+
140
+ register_handler :rm, RmFile
141
+
142
+ class WithTmpDir < SimpleCommand
143
+ def run(parameters)
144
+ # Runs provided subcommand with tmpdir and cleans up tmpdir on an exitstatus of zero
145
+ if parameters.empty?
146
+ $stderr.puts 'Yeeaaahhh... sooo... you failed to provide a subcommand...'
147
+ 255
148
+ else
149
+ # Let's create the tmpdir
150
+ exitstatus = Hadupils::Commands::MkTmpFile.run ['-d']
151
+ if successful? exitstatus
152
+ tmpdir_path = Hadupils::Extensions::Dfs::TmpFile.tmpfile_path
153
+ parameters.unshift({'HADUPILS_TMPDIR_PATH' => tmpdir_path})
154
+
155
+ # Let's run the shell subcommand!
156
+ exitstatus = Hadupils::Runners::Subcommand.run parameters
157
+
158
+ if successful? exitstatus
159
+ # Let's attempt to cleanup tmpdir_path
160
+ exitstatus = Hadupils::Commands::RmFile.run ['-r', tmpdir_path]
161
+ else
162
+ $stderr.puts "Failed to run shell subcommand: #{parameters}"
163
+ end
164
+ end
165
+ exitstatus
166
+ end
167
+ end
168
+ end
169
+
170
+ register_handler :withtmpdir, WithTmpDir
59
171
  end
@@ -1,3 +1,6 @@
1
+ require 'tempfile'
2
+ require 'fileutils'
3
+
1
4
  module Hadupils::Extensions
2
5
 
3
6
  # Hive-targeted extensions derived from filesystem layout
@@ -204,8 +207,6 @@ module Hadupils::Extensions
204
207
  end
205
208
  end
206
209
 
207
- require 'tempfile'
208
- require 'fileutils'
209
210
  ::Dir.mktmpdir do |workdir|
210
211
  basenames = dist.collect do |src|
211
212
  FileUtils.cp src, File.join(workdir, File.basename(src))
@@ -1,4 +1,55 @@
1
+ require 'uuid'
2
+ require 'tempfile'
3
+
1
4
  module Hadupils::Extensions
5
+ # Tools for managing tmp files in the hadoop dfs
6
+ module Dfs
7
+ module TmpFile
8
+ def self.uuid
9
+ @uuid ||= UUID.new
10
+ end
11
+
12
+ def self.tmp_path
13
+ @tmp_path ||= (ENV['HADUPILS_BASE_TMP_PATH'] || '/tmp')
14
+ end
15
+
16
+ def self.tmpfile_path
17
+ @tmpdir_path ||= ::File.join(tmp_path, "hadupils-tmp-#{uuid.generate(:compact)}")
18
+ end
19
+
20
+ def self.reset_tmpfile!
21
+ @tmpdir_path = nil
22
+ end
23
+ end
24
+ end
25
+
26
+ # Tools for managing hadoop configuration files ("hadoop.xml").
27
+ module HadoopConf
28
+ module HadoopOpt
29
+ def hadoop_opts
30
+ ['-conf', path]
31
+ end
32
+ end
33
+
34
+ # Wraps an extant hadoop configuration file and provides
35
+ # an interface compatible with the critical parts of the
36
+ # Static sibling class so they may be used interchangeably
37
+ # by runners when determining hadoop options.
38
+ class Static
39
+ attr_reader :path
40
+
41
+ include HadoopOpt
42
+
43
+ # Given a path, expands it ti
44
+ def initialize(path)
45
+ @path = ::File.expand_path(path)
46
+ end
47
+
48
+ def close
49
+ end
50
+ end
51
+ end
52
+
2
53
  # Tools for managing hive initialization files ("hiverc").
3
54
  module HiveRC
4
55
  module HiveOpt
@@ -32,7 +83,6 @@ module Hadupils::Extensions
32
83
  attr_reader :file
33
84
 
34
85
  include HiveOpt
35
- require 'tempfile'
36
86
 
37
87
  # This will allow us to change what handles the dynamic files.
38
88
  def self.file_handler=(handler)
@@ -87,6 +137,12 @@ module Hadupils::Extensions
87
137
  end
88
138
  end
89
139
 
140
+ def hadoop_conf(&block)
141
+ @scope.instance_eval do
142
+ @hadoop_conf_block = block
143
+ end
144
+ end
145
+
90
146
  def hiverc(&block)
91
147
  @scope.instance_eval do
92
148
  @hiverc_block = block
@@ -110,12 +166,16 @@ module Hadupils::Extensions
110
166
  assets
111
167
  end
112
168
 
169
+ def hadoop_confs
170
+ []
171
+ end
172
+
113
173
  def hivercs
114
174
  []
115
175
  end
116
176
 
117
177
  def self.gather_assets(directory)
118
- if not directory.nil?
178
+ if directory
119
179
  Hadupils::Assets.assets_in(directory)
120
180
  else
121
181
  []
@@ -179,12 +239,26 @@ module Hadupils::Extensions
179
239
  []
180
240
  end
181
241
 
242
+ def hadoop_conf_path
243
+ ::File.join(path, 'hadoop.xml') if path
244
+ end
245
+
182
246
  def hiverc_path
183
- ::File.join(path, 'hiverc')
247
+ ::File.join(path, 'hiverc') if path
248
+ end
249
+
250
+ def hadoop_conf?
251
+ hadoop_conf_path ? ::File.file?(hadoop_conf_path) : false
184
252
  end
185
253
 
186
254
  def hiverc?
187
- ::File.file? hiverc_path
255
+ hiverc_path ? ::File.file?(hiverc_path) : false
256
+ end
257
+
258
+ def hadoop_confs
259
+ r = []
260
+ r << Hadupils::Extensions::HadoopConf::Static.new(hadoop_conf_path) if hadoop_conf?
261
+ r
188
262
  end
189
263
 
190
264
  def hivercs
@@ -0,0 +1,29 @@
1
+ if RUBY_VERSION < '2.0'
2
+ class String
3
+ def randcase
4
+ dup.split('').map do |char|
5
+ if rand(1..10) > 5
6
+ char.upcase
7
+ else
8
+ char.downcase
9
+ end
10
+ end.join
11
+ end
12
+ end
13
+ else
14
+ module Hadupils
15
+ module Hacks
16
+ refine ::String do
17
+ def randcase
18
+ dup.split('').map do |char|
19
+ if rand(1..100) > 50
20
+ char.upcase
21
+ else
22
+ char.downcase
23
+ end
24
+ end.join
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -6,6 +6,10 @@ module Hadupils::Runners
6
6
  @params = params
7
7
  end
8
8
 
9
+ def self.run(*params)
10
+ self.new(*params).wait!
11
+ end
12
+
9
13
  def command; end
10
14
 
11
15
  def execute!
@@ -35,19 +39,39 @@ module Hadupils::Runners
35
39
  def wait!
36
40
  @last_result = execute!
37
41
  @last_status = $?
42
+ last_exitstatus
43
+ end
44
+
45
+ def last_exitstatus
38
46
  if @last_result.nil?
39
47
  255
40
48
  else
41
49
  @last_status.exitstatus
42
50
  end
43
51
  end
52
+ end
44
53
 
45
- def self.run(*params)
46
- self.new(*params).wait!
54
+ class Hadoop < Base
55
+ class << self; attr_writer :base_runner; end
56
+
57
+ def self.base_runner
58
+ @base_runner || ::File.join(ENV['HADOOP_HOME'], 'bin', 'hadoop')
59
+ end
60
+
61
+ def command
62
+ params.inject([self.class.base_runner]) do |result, param|
63
+ if param.respond_to? :hadoop_opts
64
+ result + param.hadoop_opts
65
+ else
66
+ result << param
67
+ end
68
+ end
47
69
  end
48
70
  end
49
71
 
50
72
  class Hive < Base
73
+ class << self; attr_writer :base_runner; end
74
+
51
75
  def initialize(params, hive_aux_jars_path='')
52
76
  super(params)
53
77
  @hive_aux_jars_path = hive_aux_jars_path
@@ -57,10 +81,6 @@ module Hadupils::Runners
57
81
  @base_runner || ::File.join(ENV['HIVE_HOME'], 'bin', 'hive')
58
82
  end
59
83
 
60
- def self.base_runner=(runner_path)
61
- @base_runner = runner_path
62
- end
63
-
64
84
  def command
65
85
  params.inject([env_overrides, self.class.base_runner]) do |result, param|
66
86
  if param.respond_to? :hive_opts
@@ -72,14 +92,20 @@ module Hadupils::Runners
72
92
  end
73
93
 
74
94
  def env_overrides
75
- e = {}
95
+ env = {}
76
96
  settings = [@hive_aux_jars_path, ::ENV['HIVE_AUX_JARS_PATH']].reject do |val|
77
- val.nil? or val.strip == ''
97
+ val.nil? || val.strip.empty?
78
98
  end
79
99
  if settings.length > 0
80
- e['HIVE_AUX_JARS_PATH'] = settings.join(',')
100
+ env['HIVE_AUX_JARS_PATH'] = settings.join(',')
81
101
  end
82
- e
102
+ env
103
+ end
104
+ end
105
+
106
+ class Subcommand < Base
107
+ def command
108
+ params
83
109
  end
84
110
  end
85
111
  end
data/lib/hadupils/util.rb CHANGED
@@ -1,7 +1,8 @@
1
+ require 'rubygems/package'
2
+ require 'zlib'
3
+
1
4
  module Hadupils::Util
2
5
  def self.read_archive(archive_path)
3
- require 'rubygems/package'
4
- require 'zlib'
5
6
  Zlib::GzipReader.open(archive_path) do |zlib|
6
7
  Gem::Package::TarReader.new(zlib) do |tar|
7
8
  tar.rewind
data/lib/hadupils.rb CHANGED
@@ -8,3 +8,4 @@ require 'hadupils/extensions'
8
8
  require 'hadupils/runners'
9
9
  require 'hadupils/search'
10
10
  require 'hadupils/util'
11
+ require 'hadupils/hacks'
@@ -4,6 +4,7 @@ require 'test/unit'
4
4
  require 'shoulda-context'
5
5
  require 'mocha/setup'
6
6
  require 'tempfile'
7
+ require 'pathname'
7
8
  require 'hadupils'
8
9
 
9
10
  # Add tempdir niceties to Test::Unit::TestCase
@@ -35,7 +36,7 @@ class Test::Unit::TestCase
35
36
  def self.tempdir_context(name, &block)
36
37
  context name do
37
38
  setup do
38
- @tempdir = Test::Unit::TestCase::DirWrapper.new(::File.expand_path(::Dir.mktmpdir))
39
+ @tempdir = Test::Unit::TestCase::DirWrapper.new(::Pathname.new(::File.expand_path(::Dir.mktmpdir)).realpath.to_s)
39
40
  end
40
41
 
41
42
  teardown do
@@ -1,3 +1,5 @@
1
+ using Hadupils::Hacks unless RUBY_VERSION < '2.0'
2
+
1
3
  class Hadupils::CommandsTest < Test::Unit::TestCase
2
4
  context Hadupils::Commands do
3
5
  context 'run singleton method' do
@@ -26,16 +28,100 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
26
28
  end
27
29
  end
28
30
 
31
+ context 'Hadoop' do
32
+ setup do
33
+ @klass = Hadupils::Commands::Hadoop
34
+ end
35
+
36
+ should 'register with :hadoop name' do
37
+ handlers = [:hadoop]
38
+ run_handler_assertions_for handlers
39
+ end
40
+
41
+ should 'have a #run singleton method that dispatches to an instance #run' do
42
+ @klass.expects(:new).with.returns(instance = mock())
43
+ instance.expects(:run).with(params = mock()).returns(result = mock())
44
+ assert_equal result, @klass.run(params)
45
+ end
46
+
47
+ should 'have a Static extension based on a search for hadoop-ext' do
48
+ Hadupils::Search.expects(:hadoop_assets).with.returns(conf = mock())
49
+ Hadupils::Extensions::Static.expects(:new).with(conf).returns(extension = mock())
50
+ hadoop_ext = Hadupils::Extensions::Static.new(Hadupils::Search.hadoop_assets)
51
+ cmd = @klass.new
52
+ cmd.stubs(:hadoop_ext).with.returns(hadoop_ext)
53
+ assert_equal extension, cmd.hadoop_ext
54
+ # This should cause failure if the previous result wasn't
55
+ # cached internally (by breaking expectations).
56
+ cmd.hadoop_ext
57
+ end
58
+
59
+ should 'have a Static extensions based on user config' do
60
+ Hadupils::Search.expects(:user_config).with.returns(conf = mock())
61
+ Hadupils::Extensions::Static.expects(:new).with(conf).returns(extension = mock())
62
+ cmd = @klass.new
63
+ assert_equal extension, cmd.user_config
64
+ # Fails on expectations if previous result wasn't cached.
65
+ cmd.user_config
66
+ end
67
+
68
+ context '#run' do
69
+ setup do
70
+ @command = @klass.new
71
+ @command.stubs(:user_config).with.returns(@user_config = mock())
72
+ @command.stubs(:hadoop_ext).with.returns(@hadoop_ext = mock())
73
+ @runner_class = Hadupils::Runners::Hadoop
74
+ end
75
+
76
+ context 'with user config and hadoop_confs' do
77
+ setup do
78
+ @user_config.stubs(:hadoop_confs).returns(@user_config_hadoop_confs = [mock(), mock()])
79
+ @hadoop_ext.stubs(:hadoop_confs).returns(@hadoop_ext_hadoop_confs = [mock(), mock(), mock()])
80
+ end
81
+
82
+ should 'apply hadoop_conf options to hadoop runner call' do
83
+ @runner_class.expects(:run).with(@user_config_hadoop_confs +
84
+ @hadoop_ext_hadoop_confs).returns(result = mock())
85
+ assert_equal result, @command.run([])
86
+ end
87
+
88
+ should 'insert hadoop_conf options into position 1 of given params array to hadoop runner call' do
89
+ params = [mock(), mock()]
90
+ @runner_class.expects(:run).with(params[0...1] +
91
+ @user_config_hadoop_confs +
92
+ @hadoop_ext_hadoop_confs +
93
+ params[1..-1]).returns(result = mock())
94
+ assert_equal result, @command.run(params)
95
+ end
96
+ end
97
+
98
+ context 'without hadoop_confs' do
99
+ setup do
100
+ @user_config.stubs(:hadoop_confs).returns([])
101
+ @hadoop_ext.stubs(:hadoop_confs).returns([])
102
+ end
103
+
104
+ should 'pass params unchanged through to hadoop runner call' do
105
+ @runner_class.expects(:run).with(params = [mock(), mock()]).returns(result = mock())
106
+ assert_equal result, @command.run(params)
107
+ end
108
+
109
+ should 'handle empty params' do
110
+ @runner_class.expects(:run).with([]).returns(result = mock())
111
+ assert_equal result, @command.run([])
112
+ end
113
+ end
114
+ end
115
+ end
116
+
29
117
  context 'Hive' do
30
118
  setup do
31
119
  @klass = Hadupils::Commands::Hive
32
120
  end
33
121
 
34
122
  should 'register with :hive name' do
35
- assert_same @klass, Hadupils::Commands.handler_for(:hive)
36
- assert_same @klass, Hadupils::Commands.handler_for(:HivE)
37
- assert_same @klass, Hadupils::Commands.handler_for('hive')
38
- assert_same @klass, Hadupils::Commands.handler_for('hIVe')
123
+ handlers = [:hive]
124
+ run_handler_assertions_for handlers
39
125
  end
40
126
 
41
127
  should 'have a #run singleton method that dispatches to an instance #run' do
@@ -207,21 +293,21 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
207
293
 
208
294
  should 'produce a valid set of parameters and hivercs' do
209
295
  Kernel.stubs(:system).with() do |*args|
210
- args[0] == {'HIVE_AUX_JARS_PATH' => @hive_aux_jars_path_val} and
211
- args[1] == @hive_prog and
212
- args[2] == '-i' and
213
- File.open(args[3], 'r').read == @static_hiverc_content and
214
- args[4] == '-i' and
215
- File.open(args[5], 'r').read == @dynamic_hiverc_content and
216
- args[6] == '-i' and
217
- File.open(args[7], 'r').read == @hive_exts[:one][:dynamic_hiverc_content] and
218
- args[8] == '-i' and
219
- File.open(args[9], 'r').read == @hive_exts[:one][:static_hiverc_content] and
220
- args[10] == '-i' and
221
- File.open(args[11], 'r').read == @hive_exts[:two][:dynamic_hiverc_content] and
222
- args[12] == '-i' and
223
- File.open(args[13], 'r').read == @hive_exts[:two][:static_hiverc_content] and
224
- args[14] == '--hiveconf' and
296
+ args[0] == {'HIVE_AUX_JARS_PATH' => @hive_aux_jars_path_val} &&
297
+ args[1] == @hive_prog &&
298
+ args[2] == '-i' &&
299
+ File.open(args[3], 'r').read == @static_hiverc_content &&
300
+ args[4] == '-i' &&
301
+ File.open(args[5], 'r').read == @dynamic_hiverc_content &&
302
+ args[6] == '-i' &&
303
+ File.open(args[7], 'r').read == @hive_exts[:one][:dynamic_hiverc_content] &&
304
+ args[8] == '-i' &&
305
+ File.open(args[9], 'r').read == @hive_exts[:one][:static_hiverc_content] &&
306
+ args[10] == '-i' &&
307
+ File.open(args[11], 'r').read == @hive_exts[:two][:dynamic_hiverc_content] &&
308
+ args[12] == '-i' &&
309
+ File.open(args[13], 'r').read == @hive_exts[:two][:static_hiverc_content] &&
310
+ args[14] == '--hiveconf' &&
225
311
  args[15] == 'my.foo=your.fu'
226
312
  end
227
313
  Hadupils::Commands.run 'hive', ['--hiveconf', 'my.foo=your.fu']
@@ -231,7 +317,149 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
231
317
  ::Dir.chdir @pwd
232
318
  end
233
319
  end
320
+ context 'MkTempFile' do
321
+ setup do
322
+ @klass = Hadupils::Commands::MkTmpFile
323
+ end
324
+
325
+ should 'register with :mktemp name' do
326
+ handlers = [:mktemp]
327
+ run_handler_assertions_for handlers
328
+ end
329
+
330
+ should 'have a #run singleton method that dispatches to an instance #run' do
331
+ @klass.expects(:new).with.returns(instance = mock())
332
+ instance.expects(:run).with(params = mock()).returns(result = mock())
333
+ assert_equal result, @klass.run(params)
334
+ end
335
+
336
+ context '#run' do
337
+ setup do
338
+ @command = @klass.new
339
+ Hadupils::Runners::Hadoop.stubs(:base_runner).returns(@hadoop_path = mock().to_s + '-hadoop')
340
+ end
341
+
342
+ should 'provide invocation for bare mktemp if given empty parameters' do
343
+ tmpdir_path = mock().to_s
344
+ Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
345
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-touchz', tmpdir_path).returns(0)
346
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-chmod', '700', tmpdir_path).returns(0)
347
+ assert_equal 0, @command.run([])
348
+ end
349
+
350
+ should 'provide invocation for mktemp if given with -d flag parameter' do
351
+ tmpdir_path = mock().to_s
352
+ Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
353
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-mkdir', tmpdir_path).returns(0)
354
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-chmod', '700', tmpdir_path).returns(0)
355
+ assert_equal 0, @command.run(['-d'])
356
+ end
357
+ end
358
+ end
359
+
360
+ context 'RmFile' do
361
+ setup do
362
+ @klass = Hadupils::Commands::RmFile
363
+ end
364
+
365
+ should 'register with :rm name' do
366
+ handlers = [:rm]
367
+ run_handler_assertions_for handlers
368
+ end
369
+
370
+ should 'have a #run singleton method that dispatches to an instance #run' do
371
+ @klass.expects(:new).with.returns(instance = mock())
372
+ instance.expects(:run).with(params = mock()).returns(result = mock())
373
+ assert_equal result, @klass.run(params)
374
+ end
375
+
376
+ context '#run' do
377
+ setup do
378
+ @command = @klass.new
379
+ Hadupils::Runners::Hadoop.stubs(:base_runner).returns(@hadoop_path = mock().to_s + '-hadoop')
380
+ end
381
+
382
+ should 'provide invocation for bare rm if given empty parameters' do
383
+ assert_equal 255, @klass.run([])
384
+ end
385
+
386
+ should 'provide invocation for rm if just tmpdir_path parameter' do
387
+ tmpdir_path = mock().to_s
388
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-rm', tmpdir_path).returns(0)
389
+ assert_equal 0, @klass.run([tmpdir_path])
390
+ end
391
+
392
+ should 'provide invocation for hadoop if just tmpdir_path with -r flag parameter' do
393
+ tmpdir_path = mock().to_s
394
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-rmr', tmpdir_path).returns(0)
395
+ assert_equal 0, @klass.run(['-r', tmpdir_path])
396
+ end
397
+ end
398
+
399
+ context 'WithTempDir' do
400
+ setup do
401
+ @klass = Hadupils::Commands::WithTmpDir
402
+ end
403
+
404
+ should 'register with :withtmpdir name' do
405
+ handlers = [:withtmpdir]
406
+ run_handler_assertions_for handlers
407
+ end
408
+
409
+ should 'have a #run singleton method that dispatches to an instance #run' do
410
+ @klass.expects(:new).with.returns(instance = mock())
411
+ instance.expects(:run).with(params = mock()).returns(result = mock())
412
+ assert_equal result, @klass.run(params)
413
+ end
414
+
415
+ context '#run' do
416
+ setup do
417
+ @command = @klass.new
418
+ Hadupils::Runners::Hadoop.stubs(:base_runner).returns(@hadoop_path = mock().to_s + '-hadoop')
419
+ end
420
+
421
+ should 'provide invocation for withtmpdir if given parameters for shell subcommand' do
422
+ tmpdir_path = mock().to_s
423
+ run_common_subcommand_assertions_with tmpdir_path
424
+ Kernel.expects(:system).with({'HADUPILS_TMPDIR_PATH' => tmpdir_path}, '/path/to/my_wonderful_script.sh').returns(0)
425
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-rmr', tmpdir_path).returns(0)
426
+ assert_equal 0, @klass.run(['/path/to/my_wonderful_script.sh'])
427
+ end
428
+
429
+ should 'provide invocation for withtmpdir if given parameters for shell subcommand (another hadupils command)' do
430
+ tmpdir_path = mock().to_s
431
+ run_common_subcommand_assertions_with tmpdir_path
432
+ Kernel.expects(:system).with({'HADUPILS_TMPDIR_PATH' => tmpdir_path}, 'hadupils hadoop ls /tmp').returns(0)
433
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-rmr', tmpdir_path).returns('')
434
+ assert_equal 0, @klass.run(['hadupils hadoop ls /tmp'])
435
+ end
436
+
437
+ should 'provide invocation for withtmpdir if given parameters for shell subcommand with nil result' do
438
+ tmpdir_path = mock().to_s
439
+ run_common_subcommand_assertions_with tmpdir_path
440
+ Kernel.expects(:system).with({'HADUPILS_TMPDIR_PATH' => tmpdir_path}, '/path/to/my_wonderful_script.sh').returns(nil)
441
+ assert_equal 255, @klass.run(['/path/to/my_wonderful_script.sh'])
442
+ end
443
+ end
444
+ end
445
+ end
234
446
  end
235
447
  end
236
- end
237
448
 
449
+ def run_common_subcommand_assertions_with(tmpdir_path)
450
+ Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
451
+ Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
452
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-mkdir', tmpdir_path).returns(0)
453
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-chmod', '700', tmpdir_path).returns(0)
454
+ end
455
+
456
+ def run_handler_assertions_for(handlers)
457
+ handlers.each do |handler|
458
+ handler = handler.to_s.downcase
459
+ assert_same @klass, Hadupils::Commands.handler_for(handler.to_sym)
460
+ assert_same @klass, Hadupils::Commands.handler_for(handler.randcase.to_sym)
461
+ assert_same @klass, Hadupils::Commands.handler_for(handler)
462
+ assert_same @klass, Hadupils::Commands.handler_for(handler.randcase)
463
+ end
464
+ end
465
+ end
@@ -1,3 +1,6 @@
1
+ require 'rubygems/package'
2
+ require 'zlib'
3
+
1
4
  class Hadupils::ExtensionsTest < Test::Unit::TestCase
2
5
  context Hadupils::Extensions::Base do
3
6
  context 'initialization with nil path' do
@@ -234,8 +237,6 @@ class Hadupils::ExtensionsTest < Test::Unit::TestCase
234
237
 
235
238
  context 'ArchivePath extension' do
236
239
  setup do
237
- require 'rubygems/package'
238
- require 'zlib'
239
240
  @klass = Hadupils::Extensions::FlatArchivePath
240
241
  @bin_archives = %w(one two).collect do |name|
241
242
  @tempdir.file("zarchive-with-bin-#{name}.tar.gz") do |f|
@@ -1,3 +1,5 @@
1
+ require 'stringio'
2
+
1
3
  class Hadupils::Extensions::HiveTest < Test::Unit::TestCase
2
4
  shared_context :provide_hive_ext do
3
5
  setup do
@@ -235,8 +237,6 @@ class Hadupils::Extensions::HiveTest < Test::Unit::TestCase
235
237
  end
236
238
 
237
239
  context 'build_archive singleton method' do
238
- require 'stringio'
239
-
240
240
  setup do
241
241
  ::Dir.mkdir(@some_dir = @tempdir.full_path('some_stuff'))
242
242
  ::Dir.mkdir(@other_dir = @tempdir.full_path('other_stuff'))
@@ -85,6 +85,49 @@ class Hadupils::RunnersTest < Test::Unit::TestCase
85
85
  end
86
86
  end
87
87
 
88
+ context Hadupils::Runners::Hadoop do
89
+ setup do
90
+ @klass = Hadupils::Runners::Hadoop
91
+ end
92
+
93
+ should 'be a runner' do
94
+ assert_kind_of Hadupils::Runners::Base, @klass.new([])
95
+ end
96
+
97
+ should 'use $HADOOP_HOME/bin/hadoop as the base runner' do
98
+ ENV.expects(:[]).with('HADOOP_HOME').returns(home = mock().to_s)
99
+ assert_equal ::File.join(home, 'bin', 'hadoop'),
100
+ @klass.base_runner
101
+ end
102
+
103
+ context '#command' do
104
+ setup do
105
+ @klass.stubs(:base_runner).returns(@hadoop_path = mock().to_s + '-hadoop')
106
+ end
107
+
108
+ should 'provide invocation for bare hadoop if given empty parameters' do
109
+ assert_equal [@hadoop_path], @klass.new([]).command
110
+ end
111
+
112
+ should 'provide invocation for hadoop with all given parameters' do
113
+ params = [mock().to_s, mock().to_s, mock().to_s, mock().to_s]
114
+ assert_equal [@hadoop_path] + params,
115
+ @klass.new(params).command
116
+ end
117
+
118
+ should 'provide args for hadoop with :hadoop_opts on supporting params' do
119
+ p1 = mock()
120
+ p1.expects(:hadoop_opts).with.returns(p1_opts = ['-conf', mock().to_s])
121
+ p2 = mock()
122
+ p2.expects(:hadoop_opts).with.returns(p2_opts = ['-conf', mock().to_s])
123
+ s1 = mock().to_s
124
+ s2 = mock().to_s
125
+ assert_equal [@hadoop_path, s1] + p1_opts + p2_opts + [s2],
126
+ @klass.new([s1, p1, p2, s2]).command
127
+ end
128
+ end
129
+ end
130
+
88
131
  context Hadupils::Runners::Hive do
89
132
  setup do
90
133
  @klass = Hadupils::Runners::Hive
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hadupils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,24 +9,40 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-08-22 00:00:00.000000000 Z
12
+ date: 2013-10-11 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: uuid
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 2.3.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 2.3.0
14
30
  - !ruby/object:Gem::Dependency
15
31
  name: bundler
16
32
  requirement: !ruby/object:Gem::Requirement
17
33
  none: false
18
34
  requirements:
19
- - - ! '>='
35
+ - - ~>
20
36
  - !ruby/object:Gem::Version
21
- version: '0'
37
+ version: 1.3.5
22
38
  type: :development
23
39
  prerelease: false
24
40
  version_requirements: !ruby/object:Gem::Requirement
25
41
  none: false
26
42
  requirements:
27
- - - ! '>='
43
+ - - ~>
28
44
  - !ruby/object:Gem::Version
29
- version: '0'
45
+ version: 1.3.5
30
46
  - !ruby/object:Gem::Dependency
31
47
  name: mocha
32
48
  requirement: !ruby/object:Gem::Requirement
@@ -48,17 +64,17 @@ dependencies:
48
64
  requirement: !ruby/object:Gem::Requirement
49
65
  none: false
50
66
  requirements:
51
- - - ! '>='
67
+ - - ~>
52
68
  - !ruby/object:Gem::Version
53
- version: '0'
69
+ version: 10.1.0
54
70
  type: :development
55
71
  prerelease: false
56
72
  version_requirements: !ruby/object:Gem::Requirement
57
73
  none: false
58
74
  requirements:
59
- - - ! '>='
75
+ - - ~>
60
76
  - !ruby/object:Gem::Version
61
- version: '0'
77
+ version: 10.1.0
62
78
  - !ruby/object:Gem::Dependency
63
79
  name: shoulda-context
64
80
  requirement: !ruby/object:Gem::Requirement
@@ -87,6 +103,7 @@ files:
87
103
  - lib/hadupils/runners.rb
88
104
  - lib/hadupils/extensions/hive.rb
89
105
  - lib/hadupils/extensions.rb
106
+ - lib/hadupils/hacks.rb
90
107
  - lib/hadupils/assets.rb
91
108
  - lib/hadupils/util.rb
92
109
  - lib/hadupils.rb