hadupils 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md CHANGED
@@ -44,3 +44,15 @@
44
44
  Helper method for assembling gzipped archives the contents of which
45
45
  are hive-ext compatible.
46
46
 
47
+ ### 0.5.0
48
+ * Introduced a hadoop command enforcing user config and hadoop-ext
49
+ configuration files (hadoop.xml) that calls the hadoop runner
50
+ * Introduced the following commands and runners that utilize the hadoop
51
+ runner (to work against the DFS): mktemp, withtmpdir and rm
52
+ * Support for hadoop_confs in Hadupils::Extensions
53
+ * Introduced Hadupils::Extensions::Dfs::TmpFile
54
+ * Introduced Hadupils::Hacks module for String Refinements (self.randcase)
55
+ for Ruby 2+ and Monkey Patching for the String class for Ruby < 2.0
56
+ * Some refactoring and fixed a bug with the specs for Mac OS X
57
+ * Tweaked old unit tests and added new ones for the new features
58
+ * Updated the README with examples
data/Gemfile CHANGED
@@ -1,6 +1,13 @@
1
- source :rubygems
1
+ source 'https://rubygems.org'
2
2
 
3
- gem 'mocha', :group => :test
4
- gem 'rake', '10.1.0', :group => :development
5
- gem 'shoulda-context', :group => :test
3
+ gem 'uuid', '~> 2.3.0'
6
4
 
5
+ group :development do
6
+ gem 'rake', '~> 10.1.0'
7
+ gem 'pry', '~> 0.9.0'
8
+ end
9
+
10
+ group :test do
11
+ gem 'mocha', :group => :test
12
+ gem 'shoulda-context', :group => :test
13
+ end
data/Gemfile.lock CHANGED
@@ -1,16 +1,30 @@
1
1
  GEM
2
- remote: http://rubygems.org/
2
+ remote: https://rubygems.org/
3
3
  specs:
4
+ coderay (1.0.9)
5
+ macaddr (1.6.1)
6
+ systemu (~> 2.5.0)
4
7
  metaclass (0.0.1)
8
+ method_source (0.8.1)
5
9
  mocha (0.14.0)
6
10
  metaclass (~> 0.0.1)
11
+ pry (0.9.12)
12
+ coderay (~> 1.0.5)
13
+ method_source (~> 0.8)
14
+ slop (~> 3.4)
7
15
  rake (10.1.0)
8
16
  shoulda-context (1.1.5)
17
+ slop (3.4.4)
18
+ systemu (2.5.2)
19
+ uuid (2.3.7)
20
+ macaddr (~> 1.0)
9
21
 
10
22
  PLATFORMS
11
23
  ruby
12
24
 
13
25
  DEPENDENCIES
14
26
  mocha
15
- rake (= 10.1.0)
27
+ pry (~> 0.9.0)
28
+ rake (~> 10.1.0)
16
29
  shoulda-context
30
+ uuid (~> 2.3.0)
data/README.md CHANGED
@@ -2,3 +2,28 @@ hadupils
2
2
  ========
3
3
 
4
4
  Operating environment oriented utilities for hadoop (Hadoop + Utils => hadupils)
5
+
6
+ ## Shell Environment Variables
7
+ - $HADUPILS_BASE_TMP_PATH
8
+ * This is the base path for DFS temporary file/directory creation
9
+ * Defaults to '/tmp' on the DFS (only set this if you need another base directory)
10
+ - $HADUPILS_TMPDIR_PATH
11
+ * Set when the subcommand is executed in a subshell via the hadupils 'withtmpdir' command
12
+ * The value comes from the tmp directory that hadupils created for the subcommand
13
+ * It will cleanup (remove) the directory if the subcommand returns an exitstatus of zero
14
+
15
+ ## Hadpuils' Commands
16
+ - hive __command__ _options_
17
+ - hadoop __command__ _options_
18
+ - mktemp [-d]
19
+ - withtmpdir __subshell_command__
20
+ - rm [-r] __full_path_to_file_or_directory__
21
+
22
+ ### Example Usages
23
+ ``` shell
24
+ hadupils hive -e 'select a.col from tab1 a'
25
+ hadupils hadoop fs -ls /tmp
26
+ hadupils mktemp -d
27
+ hadupils withtmpdir 'echo $HADUPILS_TMPDIR_PATH'
28
+ hadupils rm -r /tmp/hadupils-tmp-e341afe01721013128c122000af92329
29
+ ```
data/bin/hadupils CHANGED
@@ -5,4 +5,3 @@
5
5
  require 'hadupils'
6
6
 
7
7
  exit Hadupils::Commands.run ARGV[0], ARGV[1..-1]
8
-
@@ -1,5 +1,5 @@
1
1
  module Hadupils::Commands
2
- def self.run(command, params)
2
+ def self.run(command, params=[])
3
3
  handler = handler_for command
4
4
  handler.run params
5
5
  end
@@ -18,9 +18,13 @@ module Hadupils::Commands
18
18
  end
19
19
 
20
20
  class SimpleCommand
21
- def self.run(params)
21
+ def self.run(params=[])
22
22
  self.new.run params
23
23
  end
24
+
25
+ def successful?(exitstatus)
26
+ exitstatus == 0
27
+ end
24
28
  end
25
29
 
26
30
  module HadoopExt
@@ -29,18 +33,42 @@ module Hadupils::Commands
29
33
  end
30
34
  end
31
35
 
36
+ module HiveExt
37
+ def hive_ext
38
+ @hive_ext ||= Hadupils::Extensions::HiveSet.new(Hadupils::Search.hive_extensions)
39
+ end
40
+ end
41
+
32
42
  module UserConf
33
43
  def user_config
34
44
  @user_config ||= Hadupils::Extensions::Static.new(Hadupils::Search.user_config)
35
45
  end
36
46
  end
37
47
 
38
- module HiveExt
39
- def hive_ext
40
- @hive_ext ||= Hadupils::Extensions::HiveSet.new(Hadupils::Search.hive_extensions)
48
+ class Hadoop < SimpleCommand
49
+ include HadoopExt
50
+ include UserConf
51
+
52
+ def assemble_parameters(parameters)
53
+ @hadoop_ext = Hadupils::Extensions::Static.new(Hadupils::Search.hadoop_assets)
54
+ hadoop_cmd = parameters[0...1]
55
+ hadoop_cmd_opts = parameters[1..-1] || []
56
+
57
+ if %w(fs dfs).include? parameters[0]
58
+ hadoop_cmd + user_config.hadoop_confs + hadoop_ext.hadoop_confs + hadoop_cmd_opts
59
+ else
60
+ # TODO: Assemble command line parameters to pkg assets/code for submitting jobs, for i.e.
61
+ hadoop_cmd + user_config.hadoop_confs + hadoop_ext.hadoop_confs + hadoop_cmd_opts
62
+ end
63
+ end
64
+
65
+ def run(parameters)
66
+ Hadupils::Runners::Hadoop.run assemble_parameters(parameters)
41
67
  end
42
68
  end
43
69
 
70
+ register_handler :hadoop, Hadoop
71
+
44
72
  class Hive < SimpleCommand
45
73
  include HadoopExt
46
74
  include HiveExt
@@ -56,4 +84,88 @@ module Hadupils::Commands
56
84
  end
57
85
 
58
86
  register_handler :hive, Hive
87
+
88
+ class MkTmpFile < SimpleCommand
89
+ def run(parameters)
90
+ # Creates a new tmpdir and puts the full tmpdir_path to STDOUT
91
+ Hadupils::Extensions::Dfs::TmpFile.reset_tmpfile!
92
+ tmpdir_path = Hadupils::Extensions::Dfs::TmpFile.tmpfile_path
93
+
94
+ # Similar to shell mktemp, but for Hadoop DFS!
95
+ # Makes a tmp file by default; a tmp directory with '-d' flag
96
+ fs_cmd = parameters[0] == '-d' ? '-mkdir' : '-touchz'
97
+ exitstatus = Hadupils::Commands::Hadoop.run ['fs', fs_cmd, tmpdir_path]
98
+ if successful? exitstatus
99
+ exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-chmod', '700', tmpdir_path]
100
+ if successful? exitstatus
101
+ puts tmpdir_path
102
+ else
103
+ $stderr.puts "Failed to chmod 700 dfs tmpdir: #{tmpdir_path}"
104
+ end
105
+ else
106
+ $stderr.puts "Failed creating dfs tmpdir: #{tmpdir_path}"
107
+ end
108
+ exitstatus
109
+ end
110
+ end
111
+
112
+ register_handler :mktemp, MkTmpFile
113
+
114
+ class RmFile < SimpleCommand
115
+ def run(parameters)
116
+ # Similar to shell rm, but for Hadoop DFS!
117
+ # Removes files by default; removes directories recursively with '-r' flag
118
+ fs_cmd, tmp_dirs =
119
+ if parameters[0] == '-r'
120
+ ['-rmr', parameters[1..-1]]
121
+ else
122
+ ['-rm', parameters[0..-1]]
123
+ end
124
+
125
+ if tmp_dirs.empty?
126
+ $stderr.puts 'Failed to remove unspecified tmpdir(s), please specify tmpdir_path'
127
+ 255
128
+ else
129
+ exitstatus = Hadupils::Commands::Hadoop.run ['fs', fs_cmd, tmp_dirs].flatten
130
+ if successful? exitstatus
131
+ Hadupils::Extensions::Dfs::TmpFile.reset_tmpfile!
132
+ else
133
+ $stderr.puts "Failed to remove dfs tmpdir: #{tmp_dirs.join(' ')}"
134
+ end
135
+ exitstatus
136
+ end
137
+ end
138
+ end
139
+
140
+ register_handler :rm, RmFile
141
+
142
+ class WithTmpDir < SimpleCommand
143
+ def run(parameters)
144
+ # Runs provided subcommand with tmpdir and cleans up tmpdir on an exitstatus of zero
145
+ if parameters.empty?
146
+ $stderr.puts 'Yeeaaahhh... sooo... you failed to provide a subcommand...'
147
+ 255
148
+ else
149
+ # Let's create the tmpdir
150
+ exitstatus = Hadupils::Commands::MkTmpFile.run ['-d']
151
+ if successful? exitstatus
152
+ tmpdir_path = Hadupils::Extensions::Dfs::TmpFile.tmpfile_path
153
+ parameters.unshift({'HADUPILS_TMPDIR_PATH' => tmpdir_path})
154
+
155
+ # Let's run the shell subcommand!
156
+ exitstatus = Hadupils::Runners::Subcommand.run parameters
157
+
158
+ if successful? exitstatus
159
+ # Let's attempt to cleanup tmpdir_path
160
+ exitstatus = Hadupils::Commands::RmFile.run ['-r', tmpdir_path]
161
+ else
162
+ $stderr.puts "Failed to run shell subcommand: #{parameters}"
163
+ end
164
+ end
165
+ exitstatus
166
+ end
167
+ end
168
+ end
169
+
170
+ register_handler :withtmpdir, WithTmpDir
59
171
  end
@@ -1,3 +1,6 @@
1
+ require 'tempfile'
2
+ require 'fileutils'
3
+
1
4
  module Hadupils::Extensions
2
5
 
3
6
  # Hive-targeted extensions derived from filesystem layout
@@ -204,8 +207,6 @@ module Hadupils::Extensions
204
207
  end
205
208
  end
206
209
 
207
- require 'tempfile'
208
- require 'fileutils'
209
210
  ::Dir.mktmpdir do |workdir|
210
211
  basenames = dist.collect do |src|
211
212
  FileUtils.cp src, File.join(workdir, File.basename(src))
@@ -1,4 +1,55 @@
1
+ require 'uuid'
2
+ require 'tempfile'
3
+
1
4
  module Hadupils::Extensions
5
+ # Tools for managing tmp files in the hadoop dfs
6
+ module Dfs
7
+ module TmpFile
8
+ def self.uuid
9
+ @uuid ||= UUID.new
10
+ end
11
+
12
+ def self.tmp_path
13
+ @tmp_path ||= (ENV['HADUPILS_BASE_TMP_PATH'] || '/tmp')
14
+ end
15
+
16
+ def self.tmpfile_path
17
+ @tmpdir_path ||= ::File.join(tmp_path, "hadupils-tmp-#{uuid.generate(:compact)}")
18
+ end
19
+
20
+ def self.reset_tmpfile!
21
+ @tmpdir_path = nil
22
+ end
23
+ end
24
+ end
25
+
26
+ # Tools for managing hadoop configuration files ("hadoop.xml").
27
+ module HadoopConf
28
+ module HadoopOpt
29
+ def hadoop_opts
30
+ ['-conf', path]
31
+ end
32
+ end
33
+
34
+ # Wraps an extant hadoop configuration file and provides
35
+ # an interface compatible with the critical parts of the
36
+ # Static sibling class so they may be used interchangeably
37
+ # by runners when determining hadoop options.
38
+ class Static
39
+ attr_reader :path
40
+
41
+ include HadoopOpt
42
+
43
+ # Given a path, expands it ti
44
+ def initialize(path)
45
+ @path = ::File.expand_path(path)
46
+ end
47
+
48
+ def close
49
+ end
50
+ end
51
+ end
52
+
2
53
  # Tools for managing hive initialization files ("hiverc").
3
54
  module HiveRC
4
55
  module HiveOpt
@@ -32,7 +83,6 @@ module Hadupils::Extensions
32
83
  attr_reader :file
33
84
 
34
85
  include HiveOpt
35
- require 'tempfile'
36
86
 
37
87
  # This will allow us to change what handles the dynamic files.
38
88
  def self.file_handler=(handler)
@@ -87,6 +137,12 @@ module Hadupils::Extensions
87
137
  end
88
138
  end
89
139
 
140
+ def hadoop_conf(&block)
141
+ @scope.instance_eval do
142
+ @hadoop_conf_block = block
143
+ end
144
+ end
145
+
90
146
  def hiverc(&block)
91
147
  @scope.instance_eval do
92
148
  @hiverc_block = block
@@ -110,12 +166,16 @@ module Hadupils::Extensions
110
166
  assets
111
167
  end
112
168
 
169
+ def hadoop_confs
170
+ []
171
+ end
172
+
113
173
  def hivercs
114
174
  []
115
175
  end
116
176
 
117
177
  def self.gather_assets(directory)
118
- if not directory.nil?
178
+ if directory
119
179
  Hadupils::Assets.assets_in(directory)
120
180
  else
121
181
  []
@@ -179,12 +239,26 @@ module Hadupils::Extensions
179
239
  []
180
240
  end
181
241
 
242
+ def hadoop_conf_path
243
+ ::File.join(path, 'hadoop.xml') if path
244
+ end
245
+
182
246
  def hiverc_path
183
- ::File.join(path, 'hiverc')
247
+ ::File.join(path, 'hiverc') if path
248
+ end
249
+
250
+ def hadoop_conf?
251
+ hadoop_conf_path ? ::File.file?(hadoop_conf_path) : false
184
252
  end
185
253
 
186
254
  def hiverc?
187
- ::File.file? hiverc_path
255
+ hiverc_path ? ::File.file?(hiverc_path) : false
256
+ end
257
+
258
+ def hadoop_confs
259
+ r = []
260
+ r << Hadupils::Extensions::HadoopConf::Static.new(hadoop_conf_path) if hadoop_conf?
261
+ r
188
262
  end
189
263
 
190
264
  def hivercs
@@ -0,0 +1,29 @@
1
+ if RUBY_VERSION < '2.0'
2
+ class String
3
+ def randcase
4
+ dup.split('').map do |char|
5
+ if rand(1..10) > 5
6
+ char.upcase
7
+ else
8
+ char.downcase
9
+ end
10
+ end.join
11
+ end
12
+ end
13
+ else
14
+ module Hadupils
15
+ module Hacks
16
+ refine ::String do
17
+ def randcase
18
+ dup.split('').map do |char|
19
+ if rand(1..100) > 50
20
+ char.upcase
21
+ else
22
+ char.downcase
23
+ end
24
+ end.join
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -6,6 +6,10 @@ module Hadupils::Runners
6
6
  @params = params
7
7
  end
8
8
 
9
+ def self.run(*params)
10
+ self.new(*params).wait!
11
+ end
12
+
9
13
  def command; end
10
14
 
11
15
  def execute!
@@ -35,19 +39,39 @@ module Hadupils::Runners
35
39
  def wait!
36
40
  @last_result = execute!
37
41
  @last_status = $?
42
+ last_exitstatus
43
+ end
44
+
45
+ def last_exitstatus
38
46
  if @last_result.nil?
39
47
  255
40
48
  else
41
49
  @last_status.exitstatus
42
50
  end
43
51
  end
52
+ end
44
53
 
45
- def self.run(*params)
46
- self.new(*params).wait!
54
+ class Hadoop < Base
55
+ class << self; attr_writer :base_runner; end
56
+
57
+ def self.base_runner
58
+ @base_runner || ::File.join(ENV['HADOOP_HOME'], 'bin', 'hadoop')
59
+ end
60
+
61
+ def command
62
+ params.inject([self.class.base_runner]) do |result, param|
63
+ if param.respond_to? :hadoop_opts
64
+ result + param.hadoop_opts
65
+ else
66
+ result << param
67
+ end
68
+ end
47
69
  end
48
70
  end
49
71
 
50
72
  class Hive < Base
73
+ class << self; attr_writer :base_runner; end
74
+
51
75
  def initialize(params, hive_aux_jars_path='')
52
76
  super(params)
53
77
  @hive_aux_jars_path = hive_aux_jars_path
@@ -57,10 +81,6 @@ module Hadupils::Runners
57
81
  @base_runner || ::File.join(ENV['HIVE_HOME'], 'bin', 'hive')
58
82
  end
59
83
 
60
- def self.base_runner=(runner_path)
61
- @base_runner = runner_path
62
- end
63
-
64
84
  def command
65
85
  params.inject([env_overrides, self.class.base_runner]) do |result, param|
66
86
  if param.respond_to? :hive_opts
@@ -72,14 +92,20 @@ module Hadupils::Runners
72
92
  end
73
93
 
74
94
  def env_overrides
75
- e = {}
95
+ env = {}
76
96
  settings = [@hive_aux_jars_path, ::ENV['HIVE_AUX_JARS_PATH']].reject do |val|
77
- val.nil? or val.strip == ''
97
+ val.nil? || val.strip.empty?
78
98
  end
79
99
  if settings.length > 0
80
- e['HIVE_AUX_JARS_PATH'] = settings.join(',')
100
+ env['HIVE_AUX_JARS_PATH'] = settings.join(',')
81
101
  end
82
- e
102
+ env
103
+ end
104
+ end
105
+
106
+ class Subcommand < Base
107
+ def command
108
+ params
83
109
  end
84
110
  end
85
111
  end
data/lib/hadupils/util.rb CHANGED
@@ -1,7 +1,8 @@
1
+ require 'rubygems/package'
2
+ require 'zlib'
3
+
1
4
  module Hadupils::Util
2
5
  def self.read_archive(archive_path)
3
- require 'rubygems/package'
4
- require 'zlib'
5
6
  Zlib::GzipReader.open(archive_path) do |zlib|
6
7
  Gem::Package::TarReader.new(zlib) do |tar|
7
8
  tar.rewind
data/lib/hadupils.rb CHANGED
@@ -8,3 +8,4 @@ require 'hadupils/extensions'
8
8
  require 'hadupils/runners'
9
9
  require 'hadupils/search'
10
10
  require 'hadupils/util'
11
+ require 'hadupils/hacks'
@@ -4,6 +4,7 @@ require 'test/unit'
4
4
  require 'shoulda-context'
5
5
  require 'mocha/setup'
6
6
  require 'tempfile'
7
+ require 'pathname'
7
8
  require 'hadupils'
8
9
 
9
10
  # Add tempdir niceties to Test::Unit::TestCase
@@ -35,7 +36,7 @@ class Test::Unit::TestCase
35
36
  def self.tempdir_context(name, &block)
36
37
  context name do
37
38
  setup do
38
- @tempdir = Test::Unit::TestCase::DirWrapper.new(::File.expand_path(::Dir.mktmpdir))
39
+ @tempdir = Test::Unit::TestCase::DirWrapper.new(::Pathname.new(::File.expand_path(::Dir.mktmpdir)).realpath.to_s)
39
40
  end
40
41
 
41
42
  teardown do
@@ -1,3 +1,5 @@
1
+ using Hadupils::Hacks unless RUBY_VERSION < '2.0'
2
+
1
3
  class Hadupils::CommandsTest < Test::Unit::TestCase
2
4
  context Hadupils::Commands do
3
5
  context 'run singleton method' do
@@ -26,16 +28,100 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
26
28
  end
27
29
  end
28
30
 
31
+ context 'Hadoop' do
32
+ setup do
33
+ @klass = Hadupils::Commands::Hadoop
34
+ end
35
+
36
+ should 'register with :hadoop name' do
37
+ handlers = [:hadoop]
38
+ run_handler_assertions_for handlers
39
+ end
40
+
41
+ should 'have a #run singleton method that dispatches to an instance #run' do
42
+ @klass.expects(:new).with.returns(instance = mock())
43
+ instance.expects(:run).with(params = mock()).returns(result = mock())
44
+ assert_equal result, @klass.run(params)
45
+ end
46
+
47
+ should 'have a Static extension based on a search for hadoop-ext' do
48
+ Hadupils::Search.expects(:hadoop_assets).with.returns(conf = mock())
49
+ Hadupils::Extensions::Static.expects(:new).with(conf).returns(extension = mock())
50
+ hadoop_ext = Hadupils::Extensions::Static.new(Hadupils::Search.hadoop_assets)
51
+ cmd = @klass.new
52
+ cmd.stubs(:hadoop_ext).with.returns(hadoop_ext)
53
+ assert_equal extension, cmd.hadoop_ext
54
+ # This should cause failure if the previous result wasn't
55
+ # cached internally (by breaking expectations).
56
+ cmd.hadoop_ext
57
+ end
58
+
59
+ should 'have a Static extensions based on user config' do
60
+ Hadupils::Search.expects(:user_config).with.returns(conf = mock())
61
+ Hadupils::Extensions::Static.expects(:new).with(conf).returns(extension = mock())
62
+ cmd = @klass.new
63
+ assert_equal extension, cmd.user_config
64
+ # Fails on expectations if previous result wasn't cached.
65
+ cmd.user_config
66
+ end
67
+
68
+ context '#run' do
69
+ setup do
70
+ @command = @klass.new
71
+ @command.stubs(:user_config).with.returns(@user_config = mock())
72
+ @command.stubs(:hadoop_ext).with.returns(@hadoop_ext = mock())
73
+ @runner_class = Hadupils::Runners::Hadoop
74
+ end
75
+
76
+ context 'with user config and hadoop_confs' do
77
+ setup do
78
+ @user_config.stubs(:hadoop_confs).returns(@user_config_hadoop_confs = [mock(), mock()])
79
+ @hadoop_ext.stubs(:hadoop_confs).returns(@hadoop_ext_hadoop_confs = [mock(), mock(), mock()])
80
+ end
81
+
82
+ should 'apply hadoop_conf options to hadoop runner call' do
83
+ @runner_class.expects(:run).with(@user_config_hadoop_confs +
84
+ @hadoop_ext_hadoop_confs).returns(result = mock())
85
+ assert_equal result, @command.run([])
86
+ end
87
+
88
+ should 'insert hadoop_conf options into position 1 of given params array to hadoop runner call' do
89
+ params = [mock(), mock()]
90
+ @runner_class.expects(:run).with(params[0...1] +
91
+ @user_config_hadoop_confs +
92
+ @hadoop_ext_hadoop_confs +
93
+ params[1..-1]).returns(result = mock())
94
+ assert_equal result, @command.run(params)
95
+ end
96
+ end
97
+
98
+ context 'without hadoop_confs' do
99
+ setup do
100
+ @user_config.stubs(:hadoop_confs).returns([])
101
+ @hadoop_ext.stubs(:hadoop_confs).returns([])
102
+ end
103
+
104
+ should 'pass params unchanged through to hadoop runner call' do
105
+ @runner_class.expects(:run).with(params = [mock(), mock()]).returns(result = mock())
106
+ assert_equal result, @command.run(params)
107
+ end
108
+
109
+ should 'handle empty params' do
110
+ @runner_class.expects(:run).with([]).returns(result = mock())
111
+ assert_equal result, @command.run([])
112
+ end
113
+ end
114
+ end
115
+ end
116
+
29
117
  context 'Hive' do
30
118
  setup do
31
119
  @klass = Hadupils::Commands::Hive
32
120
  end
33
121
 
34
122
  should 'register with :hive name' do
35
- assert_same @klass, Hadupils::Commands.handler_for(:hive)
36
- assert_same @klass, Hadupils::Commands.handler_for(:HivE)
37
- assert_same @klass, Hadupils::Commands.handler_for('hive')
38
- assert_same @klass, Hadupils::Commands.handler_for('hIVe')
123
+ handlers = [:hive]
124
+ run_handler_assertions_for handlers
39
125
  end
40
126
 
41
127
  should 'have a #run singleton method that dispatches to an instance #run' do
@@ -207,21 +293,21 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
207
293
 
208
294
  should 'produce a valid set of parameters and hivercs' do
209
295
  Kernel.stubs(:system).with() do |*args|
210
- args[0] == {'HIVE_AUX_JARS_PATH' => @hive_aux_jars_path_val} and
211
- args[1] == @hive_prog and
212
- args[2] == '-i' and
213
- File.open(args[3], 'r').read == @static_hiverc_content and
214
- args[4] == '-i' and
215
- File.open(args[5], 'r').read == @dynamic_hiverc_content and
216
- args[6] == '-i' and
217
- File.open(args[7], 'r').read == @hive_exts[:one][:dynamic_hiverc_content] and
218
- args[8] == '-i' and
219
- File.open(args[9], 'r').read == @hive_exts[:one][:static_hiverc_content] and
220
- args[10] == '-i' and
221
- File.open(args[11], 'r').read == @hive_exts[:two][:dynamic_hiverc_content] and
222
- args[12] == '-i' and
223
- File.open(args[13], 'r').read == @hive_exts[:two][:static_hiverc_content] and
224
- args[14] == '--hiveconf' and
296
+ args[0] == {'HIVE_AUX_JARS_PATH' => @hive_aux_jars_path_val} &&
297
+ args[1] == @hive_prog &&
298
+ args[2] == '-i' &&
299
+ File.open(args[3], 'r').read == @static_hiverc_content &&
300
+ args[4] == '-i' &&
301
+ File.open(args[5], 'r').read == @dynamic_hiverc_content &&
302
+ args[6] == '-i' &&
303
+ File.open(args[7], 'r').read == @hive_exts[:one][:dynamic_hiverc_content] &&
304
+ args[8] == '-i' &&
305
+ File.open(args[9], 'r').read == @hive_exts[:one][:static_hiverc_content] &&
306
+ args[10] == '-i' &&
307
+ File.open(args[11], 'r').read == @hive_exts[:two][:dynamic_hiverc_content] &&
308
+ args[12] == '-i' &&
309
+ File.open(args[13], 'r').read == @hive_exts[:two][:static_hiverc_content] &&
310
+ args[14] == '--hiveconf' &&
225
311
  args[15] == 'my.foo=your.fu'
226
312
  end
227
313
  Hadupils::Commands.run 'hive', ['--hiveconf', 'my.foo=your.fu']
@@ -231,7 +317,149 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
231
317
  ::Dir.chdir @pwd
232
318
  end
233
319
  end
320
+ context 'MkTempFile' do
321
+ setup do
322
+ @klass = Hadupils::Commands::MkTmpFile
323
+ end
324
+
325
+ should 'register with :mktemp name' do
326
+ handlers = [:mktemp]
327
+ run_handler_assertions_for handlers
328
+ end
329
+
330
+ should 'have a #run singleton method that dispatches to an instance #run' do
331
+ @klass.expects(:new).with.returns(instance = mock())
332
+ instance.expects(:run).with(params = mock()).returns(result = mock())
333
+ assert_equal result, @klass.run(params)
334
+ end
335
+
336
+ context '#run' do
337
+ setup do
338
+ @command = @klass.new
339
+ Hadupils::Runners::Hadoop.stubs(:base_runner).returns(@hadoop_path = mock().to_s + '-hadoop')
340
+ end
341
+
342
+ should 'provide invocation for bare mktemp if given empty parameters' do
343
+ tmpdir_path = mock().to_s
344
+ Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
345
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-touchz', tmpdir_path).returns(0)
346
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-chmod', '700', tmpdir_path).returns(0)
347
+ assert_equal 0, @command.run([])
348
+ end
349
+
350
+ should 'provide invocation for mktemp if given with -d flag parameter' do
351
+ tmpdir_path = mock().to_s
352
+ Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
353
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-mkdir', tmpdir_path).returns(0)
354
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-chmod', '700', tmpdir_path).returns(0)
355
+ assert_equal 0, @command.run(['-d'])
356
+ end
357
+ end
358
+ end
359
+
360
+ context 'RmFile' do
361
+ setup do
362
+ @klass = Hadupils::Commands::RmFile
363
+ end
364
+
365
+ should 'register with :rm name' do
366
+ handlers = [:rm]
367
+ run_handler_assertions_for handlers
368
+ end
369
+
370
+ should 'have a #run singleton method that dispatches to an instance #run' do
371
+ @klass.expects(:new).with.returns(instance = mock())
372
+ instance.expects(:run).with(params = mock()).returns(result = mock())
373
+ assert_equal result, @klass.run(params)
374
+ end
375
+
376
+ context '#run' do
377
+ setup do
378
+ @command = @klass.new
379
+ Hadupils::Runners::Hadoop.stubs(:base_runner).returns(@hadoop_path = mock().to_s + '-hadoop')
380
+ end
381
+
382
+ should 'provide invocation for bare rm if given empty parameters' do
383
+ assert_equal 255, @klass.run([])
384
+ end
385
+
386
+ should 'provide invocation for rm if just tmpdir_path parameter' do
387
+ tmpdir_path = mock().to_s
388
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-rm', tmpdir_path).returns(0)
389
+ assert_equal 0, @klass.run([tmpdir_path])
390
+ end
391
+
392
+ should 'provide invocation for hadoop if just tmpdir_path with -r flag parameter' do
393
+ tmpdir_path = mock().to_s
394
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-rmr', tmpdir_path).returns(0)
395
+ assert_equal 0, @klass.run(['-r', tmpdir_path])
396
+ end
397
+ end
398
+
399
+ context 'WithTempDir' do
400
+ setup do
401
+ @klass = Hadupils::Commands::WithTmpDir
402
+ end
403
+
404
+ should 'register with :withtmpdir name' do
405
+ handlers = [:withtmpdir]
406
+ run_handler_assertions_for handlers
407
+ end
408
+
409
+ should 'have a #run singleton method that dispatches to an instance #run' do
410
+ @klass.expects(:new).with.returns(instance = mock())
411
+ instance.expects(:run).with(params = mock()).returns(result = mock())
412
+ assert_equal result, @klass.run(params)
413
+ end
414
+
415
+ context '#run' do
416
+ setup do
417
+ @command = @klass.new
418
+ Hadupils::Runners::Hadoop.stubs(:base_runner).returns(@hadoop_path = mock().to_s + '-hadoop')
419
+ end
420
+
421
+ should 'provide invocation for withtmpdir if given parameters for shell subcommand' do
422
+ tmpdir_path = mock().to_s
423
+ run_common_subcommand_assertions_with tmpdir_path
424
+ Kernel.expects(:system).with({'HADUPILS_TMPDIR_PATH' => tmpdir_path}, '/path/to/my_wonderful_script.sh').returns(0)
425
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-rmr', tmpdir_path).returns(0)
426
+ assert_equal 0, @klass.run(['/path/to/my_wonderful_script.sh'])
427
+ end
428
+
429
+ should 'provide invocation for withtmpdir if given parameters for shell subcommand (another hadupils command)' do
430
+ tmpdir_path = mock().to_s
431
+ run_common_subcommand_assertions_with tmpdir_path
432
+ Kernel.expects(:system).with({'HADUPILS_TMPDIR_PATH' => tmpdir_path}, 'hadupils hadoop ls /tmp').returns(0)
433
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-rmr', tmpdir_path).returns('')
434
+ assert_equal 0, @klass.run(['hadupils hadoop ls /tmp'])
435
+ end
436
+
437
+ should 'provide invocation for withtmpdir if given parameters for shell subcommand with nil result' do
438
+ tmpdir_path = mock().to_s
439
+ run_common_subcommand_assertions_with tmpdir_path
440
+ Kernel.expects(:system).with({'HADUPILS_TMPDIR_PATH' => tmpdir_path}, '/path/to/my_wonderful_script.sh').returns(nil)
441
+ assert_equal 255, @klass.run(['/path/to/my_wonderful_script.sh'])
442
+ end
443
+ end
444
+ end
445
+ end
234
446
  end
235
447
  end
236
- end
237
448
 
449
+ def run_common_subcommand_assertions_with(tmpdir_path)
450
+ Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
451
+ Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
452
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-mkdir', tmpdir_path).returns(0)
453
+ Kernel.expects(:system).with(@hadoop_path, 'fs', '-chmod', '700', tmpdir_path).returns(0)
454
+ end
455
+
456
+ def run_handler_assertions_for(handlers)
457
+ handlers.each do |handler|
458
+ handler = handler.to_s.downcase
459
+ assert_same @klass, Hadupils::Commands.handler_for(handler.to_sym)
460
+ assert_same @klass, Hadupils::Commands.handler_for(handler.randcase.to_sym)
461
+ assert_same @klass, Hadupils::Commands.handler_for(handler)
462
+ assert_same @klass, Hadupils::Commands.handler_for(handler.randcase)
463
+ end
464
+ end
465
+ end
@@ -1,3 +1,6 @@
1
+ require 'rubygems/package'
2
+ require 'zlib'
3
+
1
4
  class Hadupils::ExtensionsTest < Test::Unit::TestCase
2
5
  context Hadupils::Extensions::Base do
3
6
  context 'initialization with nil path' do
@@ -234,8 +237,6 @@ class Hadupils::ExtensionsTest < Test::Unit::TestCase
234
237
 
235
238
  context 'ArchivePath extension' do
236
239
  setup do
237
- require 'rubygems/package'
238
- require 'zlib'
239
240
  @klass = Hadupils::Extensions::FlatArchivePath
240
241
  @bin_archives = %w(one two).collect do |name|
241
242
  @tempdir.file("zarchive-with-bin-#{name}.tar.gz") do |f|
@@ -1,3 +1,5 @@
1
+ require 'stringio'
2
+
1
3
  class Hadupils::Extensions::HiveTest < Test::Unit::TestCase
2
4
  shared_context :provide_hive_ext do
3
5
  setup do
@@ -235,8 +237,6 @@ class Hadupils::Extensions::HiveTest < Test::Unit::TestCase
235
237
  end
236
238
 
237
239
  context 'build_archive singleton method' do
238
- require 'stringio'
239
-
240
240
  setup do
241
241
  ::Dir.mkdir(@some_dir = @tempdir.full_path('some_stuff'))
242
242
  ::Dir.mkdir(@other_dir = @tempdir.full_path('other_stuff'))
@@ -85,6 +85,49 @@ class Hadupils::RunnersTest < Test::Unit::TestCase
85
85
  end
86
86
  end
87
87
 
88
+ context Hadupils::Runners::Hadoop do
89
+ setup do
90
+ @klass = Hadupils::Runners::Hadoop
91
+ end
92
+
93
+ should 'be a runner' do
94
+ assert_kind_of Hadupils::Runners::Base, @klass.new([])
95
+ end
96
+
97
+ should 'use $HADOOP_HOME/bin/hadoop as the base runner' do
98
+ ENV.expects(:[]).with('HADOOP_HOME').returns(home = mock().to_s)
99
+ assert_equal ::File.join(home, 'bin', 'hadoop'),
100
+ @klass.base_runner
101
+ end
102
+
103
+ context '#command' do
104
+ setup do
105
+ @klass.stubs(:base_runner).returns(@hadoop_path = mock().to_s + '-hadoop')
106
+ end
107
+
108
+ should 'provide invocation for bare hadoop if given empty parameters' do
109
+ assert_equal [@hadoop_path], @klass.new([]).command
110
+ end
111
+
112
+ should 'provide invocation for hadoop with all given parameters' do
113
+ params = [mock().to_s, mock().to_s, mock().to_s, mock().to_s]
114
+ assert_equal [@hadoop_path] + params,
115
+ @klass.new(params).command
116
+ end
117
+
118
+ should 'provide args for hadoop with :hadoop_opts on supporting params' do
119
+ p1 = mock()
120
+ p1.expects(:hadoop_opts).with.returns(p1_opts = ['-conf', mock().to_s])
121
+ p2 = mock()
122
+ p2.expects(:hadoop_opts).with.returns(p2_opts = ['-conf', mock().to_s])
123
+ s1 = mock().to_s
124
+ s2 = mock().to_s
125
+ assert_equal [@hadoop_path, s1] + p1_opts + p2_opts + [s2],
126
+ @klass.new([s1, p1, p2, s2]).command
127
+ end
128
+ end
129
+ end
130
+
88
131
  context Hadupils::Runners::Hive do
89
132
  setup do
90
133
  @klass = Hadupils::Runners::Hive
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hadupils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,24 +9,40 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-08-22 00:00:00.000000000 Z
12
+ date: 2013-10-11 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: uuid
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 2.3.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 2.3.0
14
30
  - !ruby/object:Gem::Dependency
15
31
  name: bundler
16
32
  requirement: !ruby/object:Gem::Requirement
17
33
  none: false
18
34
  requirements:
19
- - - ! '>='
35
+ - - ~>
20
36
  - !ruby/object:Gem::Version
21
- version: '0'
37
+ version: 1.3.5
22
38
  type: :development
23
39
  prerelease: false
24
40
  version_requirements: !ruby/object:Gem::Requirement
25
41
  none: false
26
42
  requirements:
27
- - - ! '>='
43
+ - - ~>
28
44
  - !ruby/object:Gem::Version
29
- version: '0'
45
+ version: 1.3.5
30
46
  - !ruby/object:Gem::Dependency
31
47
  name: mocha
32
48
  requirement: !ruby/object:Gem::Requirement
@@ -48,17 +64,17 @@ dependencies:
48
64
  requirement: !ruby/object:Gem::Requirement
49
65
  none: false
50
66
  requirements:
51
- - - ! '>='
67
+ - - ~>
52
68
  - !ruby/object:Gem::Version
53
- version: '0'
69
+ version: 10.1.0
54
70
  type: :development
55
71
  prerelease: false
56
72
  version_requirements: !ruby/object:Gem::Requirement
57
73
  none: false
58
74
  requirements:
59
- - - ! '>='
75
+ - - ~>
60
76
  - !ruby/object:Gem::Version
61
- version: '0'
77
+ version: 10.1.0
62
78
  - !ruby/object:Gem::Dependency
63
79
  name: shoulda-context
64
80
  requirement: !ruby/object:Gem::Requirement
@@ -87,6 +103,7 @@ files:
87
103
  - lib/hadupils/runners.rb
88
104
  - lib/hadupils/extensions/hive.rb
89
105
  - lib/hadupils/extensions.rb
106
+ - lib/hadupils/hacks.rb
90
107
  - lib/hadupils/assets.rb
91
108
  - lib/hadupils/util.rb
92
109
  - lib/hadupils.rb