RubyGems - hadupils - Versions diffs - 0.5.0 → 0.6.0 - Mend

hadupils 0.5.0 → 0.6.0

Files changed (13) hide show

data/CHANGELOG.md +13 -0
data/README.md +14 -2
data/bin/hadupils +1 -1
data/lib/hadupils/commands/options.rb +20 -0
data/lib/hadupils/commands.rb +145 -38
data/lib/hadupils/extensions/hive.rb +6 -1
data/lib/hadupils/extensions.rb +82 -2
data/lib/hadupils/helpers.rb +81 -0
data/lib/hadupils/runners.rb +8 -7
data/lib/hadupils.rb +3 -1
data/test/unit/commands_test.rb +185 -122
data/test/unit/runners_test.rb +12 -7
metadata +4 -2

data/CHANGELOG.md CHANGED Viewed

@@ -53,6 +53,19 @@
 * Introduced Hadupils::Extensions::Dfs::TmpFile
 * Introduced Hadupils::Hacks module for String Refinements (self.randcase)
   for Ruby 2+ and Monkey Patching for the String class for Ruby < 2.0
+* Introduced $HADUPILS_BASE_TMP_PATH and $HADUPILS_TMPDIR_PATH for use with
+  commands: mktemp, withtmpdir and rm
 * Some refactoring and fixed a bug with the specs for Mac OS X
 * Tweaked old unit tests and added new ones for the new features
 * Updated the README with examples
+### 0.6.0
+* Renamed $HADUPILS_BASE_TMP_PATH to $HADUPILS_TMP_PATH (less typing)
+* Introduced $HADUPILS_TMP_TTL for use with command: cleanup
+* Introduced Hadupils::Commands::Cleanup to identify and remove old hadupils tmp DFS
+  directories/files where all files within any hadupils-tmpdir* in $HADUPILS_TMP_PATH
+  are older than $HADUPILS_TMP_TTL, the TTL (Time.now.utc - $HADUPILS_TMP_TTL)
+* The Hadupils::Runnders::Base.new.execute! method now uses Open3.capture2 or Kernel.system
+* Fixed 1.8.7 compatibility bug with the Kernel.system call in
+  Hadupils::Extensions::Hive::AuxJarsPath.build_archive
+* Some refactoring

data/README.md CHANGED Viewed

@@ -4,20 +4,31 @@ hadupils
 Operating environment oriented utilities for hadoop (Hadoop + Utils => hadupils)
 ## Shell Environment Variables
-- $HADUPILS_BASE_TMP_PATH
+- $HADUPILS_TMP_PATH
     * This is the base path for DFS temporary file/directory creation
     * Defaults to '/tmp' on the DFS (only set this if you need another base directory)
+    * Command 'cleanup' will use this ENV var for the base tmp_path to look for /hadupils-tmp*/
+      tmpdirs if the tmp_path isn't set throught the command line
+    * Other commands that use this are: mktemp, withtmpdir
 - $HADUPILS_TMPDIR_PATH
     * Set when the subcommand is executed in a subshell via the hadupils 'withtmpdir' command
     * The value comes from the tmp directory that hadupils created for the subcommand
     * It will cleanup (remove) the directory if the subcommand returns an exitstatus of zero
+- $HADUPILS_TMP_TTL
+    * This is the Time-To-Live for hadupils DFS temporary files/directories (hadupils-tmp*)
+    * Defaults to '86400' (24 hours)
+    * Command 'cleanup' will use this ENV var to remove any /hadupils-tmp*/ tmpdirs within
+      $HADUPILS_TMP_PATH where all files within are older than TTL, (Time.now.utc - $HADUPILS_TMP_TTL)
+      if ttl isn't set through the command line
 ## Hadpuils' Commands
 - hive __command__ _options_
 - hadoop __command__ _options_
 - mktemp [-d]
 - withtmpdir __subshell_command__
-- rm [-r] __full_path_to_file_or_directory__
+- rm [-rR] __full_path_to_file_or_directory__
+- cleanup [-n] __full_path_to_tmp_dir__ __ttl__
 ### Example Usages
 ``` shell
@@ -26,4 +37,5 @@ hadupils hadoop fs -ls /tmp
 hadupils mktemp -d
 hadupils withtmpdir 'echo $HADUPILS_TMPDIR_PATH'
 hadupils rm -r /tmp/hadupils-tmp-e341afe01721013128c122000af92329
+hadupils cleanup -n
 ```

data/bin/hadupils CHANGED Viewed

@@ -4,4 +4,4 @@
 require 'hadupils'
-exit Hadupils::Commands.run ARGV[0], ARGV[1..-1]
+exit Hadupils::Commands.run(ARGV[0], ARGV[1..-1])[1]

data/lib/hadupils/commands/options.rb ADDED Viewed

@@ -0,0 +1,20 @@
+module Hadupils::Commands
+  module Options
+    # NOTE: Only a single option per command (known limitation for now)
+    module Directory
+      def perform_directory?
+        %w(-d --directory).include? params[0]
+      end
+    end
+    module DryRun
+      def perform_dry_run?
+        %w(-n --dry-run).include? params[0]
+      end
+    end
+    module Recursive
+      def perform_recursive?
+        %w(-r -R --recursive).include? params[0]
+      end
+    end
+  end
+end

data/lib/hadupils/commands.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+require 'hadupils/commands/options'
 module Hadupils::Commands
   def self.run(command, params=[])
     handler = handler_for command
@@ -18,8 +20,14 @@ module Hadupils::Commands
   end
   class SimpleCommand
+    attr_reader :params
+    def initialize(params=[])
+      @params = params
+    end
     def self.run(params=[])
-      self.new.run params
+      self.new(params).run
     end
     def successful?(exitstatus)
@@ -50,9 +58,9 @@ module Hadupils::Commands
     include UserConf
     def assemble_parameters(parameters)
-      @hadoop_ext = Hadupils::Extensions::Static.new(Hadupils::Search.hadoop_assets)
+      @hadoop_ext     = Hadupils::Extensions::Static.new(Hadupils::Search.hadoop_assets)
       hadoop_cmd      = parameters[0...1]
-      hadoop_cmd_opts  = parameters[1..-1] || []
+      hadoop_cmd_opts = parameters[1..-1] || []
       if %w(fs dfs).include? parameters[0]
         hadoop_cmd + user_config.hadoop_confs + hadoop_ext.hadoop_confs + hadoop_cmd_opts
@@ -62,8 +70,8 @@ module Hadupils::Commands
       end
     end
-    def run(parameters)
-      Hadupils::Runners::Hadoop.run assemble_parameters(parameters)
+    def run
+      Hadupils::Runners::Hadoop.run assemble_parameters(params)
     end
   end
@@ -78,61 +86,71 @@ module Hadupils::Commands
       user_config.hivercs + hadoop_ext.hivercs + hive_ext.hivercs + parameters
     end
-    def run(parameters)
-      Hadupils::Runners::Hive.run assemble_parameters(parameters), hive_ext.hive_aux_jars_path
+    def run
+      Hadupils::Runners::Hive.run assemble_parameters(params), hive_ext.hive_aux_jars_path
     end
   end
   register_handler :hive, Hive
   class MkTmpFile < SimpleCommand
-    def run(parameters)
-      # Creates a new tmpdir and puts the full tmpdir_path to STDOUT
+    include Options::Directory
+    attr_reader :tmpdir_path
+    def initialize(params)
+      super(params)
       Hadupils::Extensions::Dfs::TmpFile.reset_tmpfile!
-      tmpdir_path = Hadupils::Extensions::Dfs::TmpFile.tmpfile_path
+      @tmpdir_path = Hadupils::Extensions::Dfs::TmpFile.tmpfile_path
+    end
+    def run
       # Similar to shell mktemp, but for Hadoop DFS!
+      # Creates a new tmpdir and puts the full tmpdir_path to STDOUT
       # Makes a tmp file by default; a tmp directory with '-d' flag
-      fs_cmd = parameters[0] == '-d' ? '-mkdir' : '-touchz'
-      exitstatus = Hadupils::Commands::Hadoop.run ['fs', fs_cmd, tmpdir_path]
+      fs_cmd = perform_directory? ? '-mkdir' : '-touchz'
+      stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', fs_cmd, tmpdir_path]
       if successful? exitstatus
-        exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-chmod', '700', tmpdir_path]
+        stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-chmod', '700', tmpdir_path]
         if successful? exitstatus
           puts tmpdir_path
         else
-          $stderr.puts "Failed to chmod 700 dfs tmpdir: #{tmpdir_path}"
+          $stderr.puts "Failed to dfs -chmod 700 dfs tmpdir: #{tmpdir_path}"
         end
       else
         $stderr.puts "Failed creating dfs tmpdir: #{tmpdir_path}"
       end
-      exitstatus
+      [nil, exitstatus]
     end
   end
   register_handler :mktemp, MkTmpFile
   class RmFile < SimpleCommand
-    def run(parameters)
+    include Hadupils::Helpers::TextHelper
+    include Options::Recursive
+    def assemble_parameters(parameters)
+      perform_recursive? ? ['-rmr', parameters[1..-1]] : ['-rm', parameters[0..-1]]
+    end
+    def run
       # Similar to shell rm, but for Hadoop DFS!
       # Removes files by default; removes directories recursively with '-r' flag
-      fs_cmd, tmp_dirs =
-        if parameters[0] == '-r'
-          ['-rmr', parameters[1..-1]]
-        else
-          ['-rm', parameters[0..-1]]
-        end
+      fs_cmd, tmp_dirs = assemble_parameters(params)
       if tmp_dirs.empty?
         $stderr.puts 'Failed to remove unspecified tmpdir(s), please specify tmpdir_path'
-        255
+        [nil, 255]
       else
-        exitstatus = Hadupils::Commands::Hadoop.run ['fs', fs_cmd, tmp_dirs].flatten
-        if successful? exitstatus
-          Hadupils::Extensions::Dfs::TmpFile.reset_tmpfile!
-        else
-          $stderr.puts "Failed to remove dfs tmpdir: #{tmp_dirs.join(' ')}"
+        stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', fs_cmd, tmp_dirs].flatten
+        unless successful? exitstatus
+          $stderr.puts "Failed to remove #{pluralize(tmp_dirs.length, 'tmpdir', 'tmpdirs')}"
+          tmp_dirs.each do |tmp_dir|
+            $stderr.puts tmp_dir
+          end
         end
-        exitstatus
+        [nil, exitstatus]
       end
     end
   end
@@ -140,32 +158,121 @@ module Hadupils::Commands
   register_handler :rm, RmFile
   class WithTmpDir < SimpleCommand
-    def run(parameters)
+    def run
       # Runs provided subcommand with tmpdir and cleans up tmpdir on an exitstatus of zero
-      if parameters.empty?
+      if params.empty?
         $stderr.puts 'Yeeaaahhh... sooo... you failed to provide a subcommand...'
-        255
+        [nil, 255]
       else
         # Let's create the tmpdir
-        exitstatus = Hadupils::Commands::MkTmpFile.run ['-d']
+        stdout, exitstatus = Hadupils::Commands::MkTmpFile.run ['-d']
         if successful? exitstatus
           tmpdir_path = Hadupils::Extensions::Dfs::TmpFile.tmpfile_path
-          parameters.unshift({'HADUPILS_TMPDIR_PATH' => tmpdir_path})
+          params.unshift({'HADUPILS_TMPDIR_PATH' => tmpdir_path})
           # Let's run the shell subcommand!
-          exitstatus = Hadupils::Runners::Subcommand.run parameters
+          stdout, exitstatus = Hadupils::Runners::Subcommand.run params
           if successful? exitstatus
             # Let's attempt to cleanup tmpdir_path
-            exitstatus = Hadupils::Commands::RmFile.run ['-r', tmpdir_path]
+            stdout, exitstatus = Hadupils::Commands::RmFile.run ['-r', tmpdir_path]
           else
-            $stderr.puts "Failed to run shell subcommand: #{parameters}"
+            $stderr.puts "Failed to run shell subcommand: #{params}"
           end
         end
-        exitstatus
+        Hadupils::Extensions::Dfs::TmpFile.reset_tmpfile!
+        [nil, exitstatus]
       end
     end
   end
   register_handler :withtmpdir, WithTmpDir
+  class Cleanup < SimpleCommand
+    include Hadupils::Extensions::Dfs
+    include Hadupils::Extensions::Runners
+    include Hadupils::Helpers::Dfs
+    include Hadupils::Helpers::TextHelper
+    include Options::DryRun
+    attr_accessor :expired_exitstatuses
+    attr_accessor :rm_exitstatuses
+    attr_reader   :tmp_path
+    attr_reader   :tmp_ttl
+    def initialize(params)
+      super(params)
+      @expired_exitstatuses = []
+      @rm_exitstatuses      = []
+      @tmp_path             = (perform_dry_run? ? params[1] : params[0]) || TmpFile.tmp_path
+      @tmp_ttl              = ((perform_dry_run? ? params[2] : params[1]) || TmpFile.tmp_ttl).to_i
+    end
+    def run
+      # Removes old hadupils tmp files/dirs where all files within a tmpdir are also older than the TTL
+      # User configurable by setting the ENV variable $HADUPILS_TMP_TTL, defaults to 86400 (last 24 hours)
+      # User may also perform a dry-run via a -n or a --dry-run flag
+      # Silence the Runner's shell STDOUT noise
+      Shell.silence_stdout = true
+      # Get candidate directories
+      stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-ls', tmp_path]
+      if successful? exitstatus
+        rm_array = []
+        dir_candidates(hadupils_tmpfiles(parse_ls(stdout)), tmp_ttl).each do |dir_candidate|
+          next unless has_expired? dir_candidate, tmp_ttl
+          rm_array << dir_candidate
+        end
+        exitstatus = expired_exitstatuses.all? {|expired_exitstatus| expired_exitstatus == 0} ? 0 : 255
+        if successful? exitstatus
+          puts "Found #{pluralize(rm_array.length, 'item', 'items')} to be removed recursively"
+          rm_array.each {|rm_item| puts rm_item }
+          unless perform_dry_run?
+            # Now want the user to see the Runner's shell STDOUT
+            Shell.silence_stdout = false
+            puts 'Removing...'
+            rm_array.each do |dir|
+              rm_stdout, rm_exitstatus = Hadupils::Commands::RmFile.run ['-r', dir]
+              rm_exitstatuses << rm_exitstatus
+              $stderr.puts "Failed to recursively remove: #{dir}" unless successful? rm_exitstatus
+            end
+          end
+          exitstatus = rm_exitstatuses.all? {|rm_exitstatus| rm_exitstatus == 0} ? 0 : 255
+        end
+      end
+      [nil, exitstatus]
+    end
+    def has_expired?(dir_candidate, ttl)
+      stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-count', dir_candidate]
+      expired_exitstatuses << exitstatus
+      if successful? exitstatus
+        parsed_count = parse_count(stdout)
+        if parsed_count.empty?
+          $stderr.puts "Failed to parse dfs -count for stdout: #{stdout}"
+          expired_exitstatuses << 255
+        elsif dir_empty? parsed_count[:file_count]
+          true
+        else
+          stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-ls', File.join(dir_candidate, '**', '*')]
+          expired_exitstatuses << exitstatus
+          if successful? exitstatus
+            all_expired? parse_ls(stdout), ttl
+          else
+            $stderr.puts "Failed to perform dfs -ls on path: #{File.join(dir_candidate, '**', '*')}"
+            false
+          end
+        end
+      else
+        $stderr.puts "Failed to perform dfs -count on path: #{dir_candidate}"
+        false
+      end
+    end
+  end
+  register_handler :cleanup, Cleanup
 end

data/lib/hadupils/extensions/hive.rb CHANGED Viewed

@@ -223,7 +223,12 @@ module Hadupils::Extensions
         end
         ::Dir.chdir(workdir) do |p|
-          Kernel.system 'tar', 'cz', *basenames, :out => io
+          Open3.popen3('tar', 'cz', *basenames) do |i, o, e|
+            stderr = e.read
+            stdout = o.read
+            $stderr.puts stderr unless stderr.empty?
+            io << stdout
+          end
         end
       end
       true

data/lib/hadupils/extensions.rb CHANGED Viewed

@@ -1,7 +1,83 @@
 require 'uuid'
+require 'open3'
 require 'tempfile'
 module Hadupils::Extensions
+  # Tools for managing shell commands/output performed by the runners
+  module Runners
+    module Shell
+      def self.command(*command_list)
+        opts      = {}
+        stdout    = nil
+        stderr    = nil
+        status    = nil
+        begin
+          if RUBY_VERSION < '1.9'
+            Open3.popen3(*command_list) do |i, o, e|
+              stdout = o.read
+              stderr = e.read
+            end
+            status = $?
+            $stdout.puts stdout unless stdout.nil? || stdout.empty? || Shell.silence_stdout?
+            $stderr.puts stderr unless stderr.nil? || stderr.empty?
+            stdout = nil unless capture_stdout?
+            stderr = nil unless capture_stderr?
+          else
+            stdout_rd, stdout_wr  = IO.pipe     if capture_stdout?
+            stderr_rd, stderr_wr  = IO.pipe     if capture_stderr?
+            opts[:out]            = stdout_wr   if capture_stdout?
+            opts[:err]            = stderr_wr   if capture_stderr?
+            # NOTE: eval prevents Ruby 1.8.7 from throwing a syntax error on Ruby 1.9+ syntax
+            result = eval 'Kernel.system(*command_list, opts)'
+            status = result ? $? : nil
+            if capture_stdout?
+              stdout_wr.close
+              stdout = stdout_rd.read
+              stdout_rd.close
+              $stdout.puts stdout unless stdout.nil? || stdout.empty? || Shell.silence_stdout?
+            end
+            if capture_stderr?
+              stderr_wr.close
+              stderr = stderr_rd.read
+              stderr_rd.close
+              $stderr.puts stderr unless stderr.nil? || stderr.empty?
+            end
+          end
+          [stdout, stderr, status]
+        rescue Errno::ENOENT => e
+          $stderr.puts e
+          [stdout, stderr, nil]
+        end
+      end
+      def self.capture_stderr?
+        @capture_stderr
+      end
+      def self.capture_stderr=(value)
+        @capture_stderr = value
+      end
+      def self.capture_stdout?
+        @capture_stdout || Shell.silence_stdout?
+      end
+      def self.capture_stdout=(value)
+        @capture_stdout = value
+      end
+      def self.silence_stdout?
+        @silence_stdout
+      end
+      def self.silence_stdout=(value)
+        @silence_stdout = value
+      end
+    end
+  end
   # Tools for managing tmp files in the hadoop dfs
   module Dfs
     module TmpFile
@@ -9,12 +85,16 @@ module Hadupils::Extensions
         @uuid ||= UUID.new
       end
+      def self.tmp_ttl
+        @tmp_ttl ||= (ENV['HADUPILS_TMP_TTL'] || '86400').to_i
+      end
       def self.tmp_path
-        @tmp_path ||= (ENV['HADUPILS_BASE_TMP_PATH'] || '/tmp')
+        @tmp_path ||= (ENV['HADUPILS_TMP_PATH'] || '/tmp')
       end
       def self.tmpfile_path
-        @tmpdir_path ||= ::File.join(tmp_path, "hadupils-tmp-#{uuid.generate(:compact)}")
+        @tmpfile_path ||= ::File.join(tmp_path, "hadupils-tmp-#{uuid.generate(:compact)}")
       end
       def self.reset_tmpfile!

data/lib/hadupils/helpers.rb ADDED Viewed

@@ -0,0 +1,81 @@
+require 'time'
+module Hadupils::Helpers
+  module TextHelper
+    def pluralize(count, singular, plural=nil)
+      if count == 1
+        "1 #{singular}"
+      elsif plural
+        "#{count} #{plural}"
+      else
+        "#{count} #{singular}s"
+      end
+    end
+  end
+  module Dfs
+    def parse_count(stdout)
+      parsed_count = {}
+      if stdout
+        result = stdout.squeeze(' ').split
+        parsed_count =
+          begin
+            { :dir_count    => result[0],
+              :file_count   => result[1],
+              :content_size => result[2],
+              :file_name    => result[3] }
+          end if result.length == 4 # Check for proper # of dfs -count columns
+      end
+      parsed_count
+    end
+    def parse_ls(stdout)
+      parsed_ls = []
+      if stdout
+        result = stdout.split(/\n/)
+        parsed_ls =
+          result[1..-1].map do |line|
+            l = line.squeeze(' ').split
+            begin
+              l = l[-3..-1]
+              [Time.parse("#{l[0]} #{l[1]}Z"), l[2]]
+            rescue ArgumentError
+              nil
+            end if l.length == 8 # Check for proper # of dfs -ls columns
+          end.compact unless result.empty?
+      end
+      parsed_ls
+    end
+    def hadupils_tmpfile?(parsed_line)
+      parsed_line.match(/hadupils-tmp/)
+    end
+    def dir_candidates(parsed_ls, ttl)
+      parsed_ls.inject([]) do |dir_candidates, (file_time, file_path)|
+        if file_time < (Time.now.utc - ttl)
+          dir_candidates << file_path
+        end
+        dir_candidates
+      end
+    end
+    def dir_empty?(count)
+      count.to_i == 0
+    end
+    def all_expired?(parsed_ls, ttl)
+      parsed_ls.all? {|file_time, file_path| file_time < (Time.now.utc - ttl)}
+    end
+    def hadupils_tmpfiles(parsed_ls)
+      parsed_ls.map do |time, file_path|
+        if hadupils_tmpfile? file_path
+          [time, file_path]
+        else
+          nil
+        end
+      end.compact
+    end
+  end
+end

data/lib/hadupils/runners.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 module Hadupils::Runners
   class Base
-    attr_reader :params, :last_result, :last_status
+    include Hadupils::Extensions::Runners
+    attr_reader :params, :last_stdout, :last_stderr, :last_status
     def initialize(params)
       @params = params
@@ -14,6 +15,7 @@ module Hadupils::Runners
     def execute!
       command_list = command
       if RUBY_VERSION < '1.9' and command_list[0].kind_of? Hash
         deletes = []
         overrides = {}
@@ -26,24 +28,23 @@ module Hadupils::Runners
             end
             ::ENV[key] = val
           end
-          Kernel.system(*command_list[1..-1])
+          Shell.command(*command_list[1..-1])
         ensure
           overrides.each {|key, val| ::ENV[key] = val }
           deletes.each {|key| ::ENV.delete key }
         end
       else
-        Kernel.system(*command_list)
+        Shell.command(*command_list)
       end
     end
     def wait!
-      @last_result = execute!
-      @last_status = $?
-      last_exitstatus
+      @last_stdout, @last_stderr, @last_status = execute!
+      [@last_stdout, last_exitstatus]
     end
     def last_exitstatus
-      if @last_result.nil?
+      if @last_status.nil?
         255
       else
         @last_status.exitstatus

data/lib/hadupils.rb CHANGED Viewed

@@ -3,9 +3,11 @@ module Hadupils
 end
 require 'hadupils/assets'
-require 'hadupils/commands'
+require 'hadupils/helpers'
 require 'hadupils/extensions'
 require 'hadupils/runners'
 require 'hadupils/search'
 require 'hadupils/util'
 require 'hadupils/hacks'
+require 'hadupils/commands'

data/test/unit/commands_test.rb CHANGED Viewed

@@ -39,8 +39,9 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
       end
       should 'have a #run singleton method that dispatches to an instance #run' do
-        @klass.expects(:new).with.returns(instance = mock())
-        instance.expects(:run).with(params = mock()).returns(result = mock())
+        params = mock()
+        @klass.expects(:new).with(params).returns(instance = mock())
+        instance.expects(:run).with.returns(result = mock())
         assert_equal result, @klass.run(params)
       end
@@ -67,9 +68,8 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
       context '#run' do
         setup do
-          @command = @klass.new
-          @command.stubs(:user_config).with.returns(@user_config = mock())
-          @command.stubs(:hadoop_ext).with.returns(@hadoop_ext = mock())
+          @klass.any_instance.stubs(:user_config).with.returns(@user_config = mock())
+          @klass.any_instance.stubs(:hadoop_ext).with.returns(@hadoop_ext = mock())
           @runner_class = Hadupils::Runners::Hadoop
         end
@@ -82,7 +82,7 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
           should 'apply hadoop_conf options to hadoop runner call' do
             @runner_class.expects(:run).with(@user_config_hadoop_confs +
                                              @hadoop_ext_hadoop_confs).returns(result = mock())
-            assert_equal result, @command.run([])
+            assert_equal result, @klass.new([]).run
           end
           should 'insert hadoop_conf options into position 1 of given params array to hadoop runner call' do
@@ -91,7 +91,7 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
                                              @user_config_hadoop_confs +
                                              @hadoop_ext_hadoop_confs +
                                              params[1..-1]).returns(result = mock())
-            assert_equal result, @command.run(params)
+            assert_equal result, @klass.new(params).run
           end
         end
@@ -103,12 +103,12 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
           should 'pass params unchanged through to hadoop runner call' do
             @runner_class.expects(:run).with(params = [mock(), mock()]).returns(result = mock())
-            assert_equal result, @command.run(params)
+            assert_equal result, @klass.new(params).run
           end
           should 'handle empty params' do
             @runner_class.expects(:run).with([]).returns(result = mock())
-            assert_equal result, @command.run([])
+            assert_equal result, @klass.new([]).run
           end
         end
       end
@@ -125,8 +125,9 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
       end
       should 'have a #run singleton method that dispatches to an instance #run' do
-        @klass.expects(:new).with.returns(instance = mock())
-        instance.expects(:run).with(params = mock()).returns(result = mock())
+        params = mock()
+        @klass.expects(:new).with(params).returns(instance = mock())
+        instance.expects(:run).with.returns(result = mock())
         assert_equal result, @klass.run(params)
       end
@@ -160,10 +161,9 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
       context '#run' do
         setup do
-          @command = @klass.new
-          @command.stubs(:user_config).with.returns(@user_config = mock())
-          @command.stubs(:hadoop_ext).with.returns(@hadoop_ext = mock())
-          @command.stubs(:hive_ext).with.returns(@hive_ext = mock)
+          @klass.any_instance.stubs(:user_config).with.returns(@user_config = mock())
+          @klass.any_instance.stubs(:hadoop_ext).with.returns(@hadoop_ext = mock())
+          @klass.any_instance.stubs(:hive_ext).with.returns(@hive_ext = mock)
           @runner_class = Hadupils::Runners::Hive
         end
@@ -180,7 +180,7 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
                                              @hadoop_ext_hivercs +
                                              @hive_ext_hivercs,
                                              @hive_aux_jars_path).returns(result = mock())
-            assert_equal result, @command.run([])
+            assert_equal result, @klass.new([]).run
           end
           should 'prepend hiverc options before given params to hive runner call' do
@@ -190,7 +190,7 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
                                              @hive_ext_hivercs +
                                              params,
                                              @hive_aux_jars_path).returns(result = mock())
-            assert_equal result, @command.run(params)
+            assert_equal result, @klass.new(params).run
           end
         end
@@ -204,12 +204,12 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
           should 'pass params unchanged through to hive runner call along with aux jars path' do
             @runner_class.expects(:run).with(params = [mock(), mock()], '').returns(result = mock())
-            assert_equal result, @command.run(params)
+            assert_equal result, @klass.new(params).run
           end
           should 'handle empty params' do
             @runner_class.expects(:run).with([], '').returns(result = mock())
-            assert_equal result, @command.run([])
+            assert_equal result, @klass.new([]).run
           end
         end
       end
@@ -317,140 +317,203 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
           ::Dir.chdir @pwd
         end
       end
-      context 'MkTempFile' do
-        setup do
-          @klass = Hadupils::Commands::MkTmpFile
-        end
+    end
+    context 'MkTempFile' do
+      setup do
+        @klass = Hadupils::Commands::MkTmpFile
+      end
+      should 'register with :mktemp name' do
+        handlers = [:mktemp]
+        run_handler_assertions_for handlers
+      end
-        should 'register with :mktemp name' do
-          handlers = [:mktemp]
-          run_handler_assertions_for handlers
+      should 'have a #run singleton method that dispatches to an instance #run' do
+        params = mock()
+        @klass.expects(:new).with(params).returns(instance = mock())
+        instance.expects(:run).with.returns(result = mock())
+        assert_equal result, @klass.run(params)
+      end
+      context '#run' do
+        should 'provide invocation for bare mktemp if given empty parameters' do
+          tmpdir_path = mock().to_s
+          Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-touchz', tmpdir_path]).returns(['', 0])
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-chmod', '700', tmpdir_path]).returns(['', 0])
+          assert_equal [nil, 0], @klass.new([]).run
         end
-        should 'have a #run singleton method that dispatches to an instance #run' do
-          @klass.expects(:new).with.returns(instance = mock())
-          instance.expects(:run).with(params = mock()).returns(result = mock())
-          assert_equal result, @klass.run(params)
+        should 'provide invocation for mktemp if given with -d flag parameter' do
+          tmpdir_path = mock().to_s
+          Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-mkdir', tmpdir_path]).returns(['', 0])
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-chmod', '700', tmpdir_path]).returns(['', 0])
+          assert_equal [nil, 0], @klass.new(['-d']).run
         end
+      end
+    end
-        context '#run' do
-          setup do
-            @command = @klass.new
-            Hadupils::Runners::Hadoop.stubs(:base_runner).returns(@hadoop_path = mock().to_s + '-hadoop')
-          end
+    context 'RmFile' do
+      setup do
+        @klass = Hadupils::Commands::RmFile
+      end
-          should 'provide invocation for bare mktemp if given empty parameters' do
-            tmpdir_path = mock().to_s
-            Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
-            Kernel.expects(:system).with(@hadoop_path, 'fs', '-touchz', tmpdir_path).returns(0)
-            Kernel.expects(:system).with(@hadoop_path, 'fs', '-chmod', '700', tmpdir_path).returns(0)
-            assert_equal 0, @command.run([])
-          end
+      should 'register with :rm name' do
+        handlers = [:rm]
+        run_handler_assertions_for handlers
+      end
-          should 'provide invocation for mktemp if given with -d flag parameter' do
-            tmpdir_path = mock().to_s
-            Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
-            Kernel.expects(:system).with(@hadoop_path, 'fs', '-mkdir', tmpdir_path).returns(0)
-            Kernel.expects(:system).with(@hadoop_path, 'fs', '-chmod', '700', tmpdir_path).returns(0)
-            assert_equal 0, @command.run(['-d'])
-          end
-        end
+      should 'have a #run singleton method that dispatches to an instance #run' do
+        params = mock()
+        @klass.expects(:new).with(params).returns(instance = mock())
+        instance.expects(:run).with.returns(result = mock())
+        assert_equal result, @klass.run(params)
       end
-      context 'RmFile' do
-        setup do
-          @klass = Hadupils::Commands::RmFile
+      context '#run' do
+        should 'provide invocation for bare rm if given empty parameters' do
+          assert_equal [nil, 255], @klass.new([]).run
         end
-        should 'register with :rm name' do
-          handlers = [:rm]
-          run_handler_assertions_for handlers
+        should 'provide invocation for rm if just tmpdir_path parameter' do
+          tmpdir_path = mock().to_s
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-rm', tmpdir_path]).returns(['', 0])
+          assert_equal [nil, 0], @klass.new([tmpdir_path]).run
         end
-        should 'have a #run singleton method that dispatches to an instance #run' do
-          @klass.expects(:new).with.returns(instance = mock())
-          instance.expects(:run).with(params = mock()).returns(result = mock())
-          assert_equal result, @klass.run(params)
+        should 'provide invocation for hadoop if just tmpdir_path with -r flag parameter' do
+          tmpdir_path = mock().to_s
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-rmr', tmpdir_path]).returns(['', 0])
+          assert_equal [nil, 0], @klass.new(['-r', tmpdir_path]).run
         end
+      end
+    end
-        context '#run' do
-          setup do
-            @command = @klass.new
-            Hadupils::Runners::Hadoop.stubs(:base_runner).returns(@hadoop_path = mock().to_s + '-hadoop')
-          end
+    context 'WithTempDir' do
+      setup do
+        @klass = Hadupils::Commands::WithTmpDir
+      end
-          should 'provide invocation for bare rm if given empty parameters' do
-            assert_equal 255, @klass.run([])
-          end
+      should 'register with :withtmpdir name' do
+        handlers = [:withtmpdir]
+        run_handler_assertions_for handlers
+      end
-          should 'provide invocation for rm if just tmpdir_path parameter' do
-            tmpdir_path = mock().to_s
-            Kernel.expects(:system).with(@hadoop_path, 'fs', '-rm', tmpdir_path).returns(0)
-            assert_equal 0, @klass.run([tmpdir_path])
-          end
+      should 'have a #run singleton method that dispatches to an instance #run' do
+        params = mock()
+        @klass.expects(:new).with(params).returns(instance = mock())
+        instance.expects(:run).with.returns(result = mock())
+        assert_equal result, @klass.run(params)
+      end
-          should 'provide invocation for hadoop if just tmpdir_path with -r flag parameter' do
-            tmpdir_path = mock().to_s
-            Kernel.expects(:system).with(@hadoop_path, 'fs', '-rmr', tmpdir_path).returns(0)
-            assert_equal 0, @klass.run(['-r', tmpdir_path])
-          end
+      context '#run' do
+        should 'provide invocation for withtmpdir if given parameters for shell subcommand' do
+          tmpdir_path = mock().to_s
+          run_common_subcommand_assertions_with(tmpdir_path)
+          subcommand_params = [{'HADUPILS_TMPDIR_PATH' => tmpdir_path}, '/path/to/my_wonderful_script.sh']
+          Hadupils::Runners::Subcommand.expects(:run).with(subcommand_params).returns(['', 0])
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-rmr', tmpdir_path]).returns(['', 0])
+          assert_equal [nil, 0], @klass.new(['/path/to/my_wonderful_script.sh']).run
         end
-        context 'WithTempDir' do
-          setup do
-            @klass = Hadupils::Commands::WithTmpDir
-          end
-          should 'register with :withtmpdir name' do
-            handlers = [:withtmpdir]
-            run_handler_assertions_for handlers
-          end
+        should 'provide invocation for withtmpdir if given parameters for shell subcommand (another hadupils command)' do
+          tmpdir_path = mock().to_s
+          run_common_subcommand_assertions_with(tmpdir_path)
+          subcommand_params = [{'HADUPILS_TMPDIR_PATH' => tmpdir_path}, 'hadupils hadoop ls /tmp']
+          Hadupils::Runners::Subcommand.expects(:run).with(subcommand_params).returns(['', 0])
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-rmr', tmpdir_path]).returns(['', 0])
+          assert_equal [nil, 0], @klass.new(['hadupils hadoop ls /tmp']).run
+        end
-          should 'have a #run singleton method that dispatches to an instance #run' do
-            @klass.expects(:new).with.returns(instance = mock())
-            instance.expects(:run).with(params = mock()).returns(result = mock())
-            assert_equal result, @klass.run(params)
-          end
+        should 'provide invocation for withtmpdir if given parameters for shell subcommand with nil result' do
+          tmpdir_path = mock().to_s
+          subcommand_params = [{'HADUPILS_TMPDIR_PATH' => tmpdir_path}, '/path/to/my_wonderful_script.sh']
+          run_common_subcommand_assertions_with(tmpdir_path)
+          Hadupils::Runners::Subcommand.expects(:run).with(subcommand_params).returns(['', 255])
+          assert_equal [nil, 255], @klass.new(['/path/to/my_wonderful_script.sh']).run
+        end
+      end
+    end
+  end
-          context '#run' do
-            setup do
-              @command = @klass.new
-              Hadupils::Runners::Hadoop.stubs(:base_runner).returns(@hadoop_path = mock().to_s + '-hadoop')
-            end
+  context 'Cleanup' do
+    setup do
+      @klass = Hadupils::Commands::Cleanup
+    end
-            should 'provide invocation for withtmpdir if given parameters for shell subcommand' do
-              tmpdir_path = mock().to_s
-              run_common_subcommand_assertions_with tmpdir_path
-              Kernel.expects(:system).with({'HADUPILS_TMPDIR_PATH' => tmpdir_path}, '/path/to/my_wonderful_script.sh').returns(0)
-              Kernel.expects(:system).with(@hadoop_path, 'fs', '-rmr', tmpdir_path).returns(0)
-              assert_equal 0, @klass.run(['/path/to/my_wonderful_script.sh'])
-            end
+    should 'register with :cleanup name' do
+      handlers = [:cleanup]
+      run_handler_assertions_for handlers
+    end
-            should 'provide invocation for withtmpdir if given parameters for shell subcommand (another hadupils command)' do
-              tmpdir_path = mock().to_s
-              run_common_subcommand_assertions_with tmpdir_path
-              Kernel.expects(:system).with({'HADUPILS_TMPDIR_PATH' => tmpdir_path}, 'hadupils hadoop ls /tmp').returns(0)
-              Kernel.expects(:system).with(@hadoop_path, 'fs', '-rmr', tmpdir_path).returns('')
-              assert_equal 0, @klass.run(['hadupils hadoop ls /tmp'])
-            end
+    should 'have a #run singleton method that dispatches to an instance #run' do
+      params = mock()
+      @klass.expects(:new).with(params).returns(instance = mock())
+      instance.expects(:run).with.returns(result = mock())
+      assert_equal result, @klass.run(params)
+    end
-            should 'provide invocation for withtmpdir if given parameters for shell subcommand with nil result' do
-              tmpdir_path = mock().to_s
-              run_common_subcommand_assertions_with tmpdir_path
-              Kernel.expects(:system).with({'HADUPILS_TMPDIR_PATH' => tmpdir_path}, '/path/to/my_wonderful_script.sh').returns(nil)
-              assert_equal 255, @klass.run(['/path/to/my_wonderful_script.sh'])
-            end
-          end
-        end
-      end
+    context '#run' do
+     should 'provide invocation for bare cleanup if given empty parameters' do
+       tmp_path = '/tmp'
+       tmpdir1 = File.join(tmp_path, 'hadupils-tmp-064708701f180131f7ef3c0754617b34')
+       tmpdir2 = File.join(tmp_path, 'hadupils-tmp-0e5175901f180131f7f03c0754617b34')
+       run_common_cleanup_assertions_with(tmp_path, tmpdir1, tmpdir2)
+       instance = @klass.new([])
+       assert_equal [nil, 0], instance.run
+       assert_equal 86400, instance.tmp_ttl
+       assert_equal '/tmp', instance.tmp_path
+     end
+     should 'provide invocation for cleanup if just tmp_path parameter' do
+       tmp_path = mock().to_s
+       tmpdir1 = File.join(tmp_path, 'hadupils-tmp-064708701f180131f7ef3c0754617b34')
+       tmpdir2 = File.join(tmp_path, 'hadupils-tmp-0e5175901f180131f7f03c0754617b34')
+       run_common_cleanup_assertions_with(tmp_path, tmpdir1, tmpdir2)
+       instance = @klass.new([tmp_path])
+       assert_equal [nil, 0], instance.run
+       assert_equal 86400, instance.tmp_ttl
+       assert_equal tmp_path, instance.tmp_path
+     end
+     should 'provide invocation for cleanup with tmp_path and ttl parameter' do
+       tmp_path = mock().to_s
+       tmpdir1 = File.join(tmp_path, 'hadupils-tmp-064708701f180131f7ef3c0754617b34')
+       tmpdir2 = File.join(tmp_path, 'hadupils-tmp-0e5175901f180131f7f03c0754617b34')
+       run_common_cleanup_assertions_with(tmp_path, tmpdir1, tmpdir2)
+       instance = @klass.new([tmp_path, '0'])
+       assert_equal [nil, 0], instance.run
+       assert_equal 0, instance.tmp_ttl
+       assert_equal tmp_path, instance.tmp_path
+     end
     end
   end
   def run_common_subcommand_assertions_with(tmpdir_path)
     Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
     Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
-    Kernel.expects(:system).with(@hadoop_path, 'fs', '-mkdir', tmpdir_path).returns(0)
-    Kernel.expects(:system).with(@hadoop_path, 'fs', '-chmod', '700', tmpdir_path).returns(0)
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-mkdir', tmpdir_path]).returns(['', 0])
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-chmod', '700', tmpdir_path]).returns(['', 0])
+  end
+  def run_common_cleanup_assertions_with(tmp_path, tmpdir1, tmpdir2)
+    ls_stdout =
+      "Found 2 items\n" +
+      "drwx------   - willdrew supergroup          0 2013-10-24 16:23 #{tmpdir1}\n" +
+      "drwx------   - willdrew supergroup          0 2013-10-24 16:23 #{tmpdir2}\n"
+    count_stdout1 = "           1            0                  0 hdfs://localhost:9000#{tmpdir1}\n"
+    count_stdout2 = "           1            1                  0 hdfs://localhost:9000#{tmpdir2}\n"
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-ls', tmp_path]).returns([ls_stdout, 0])
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-count', tmpdir1]).returns([count_stdout1, 0])
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-count', tmpdir2]).returns([count_stdout2, 0])
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-ls', File.join(tmpdir2, '**', '*')]).returns(['', 0])
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-rmr', tmpdir1]).returns(['', 0])
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-rmr', tmpdir2]).returns(['', 0])
   end
   def run_handler_assertions_for(handlers)

data/test/unit/runners_test.rb CHANGED Viewed

@@ -1,4 +1,6 @@
 class Hadupils::RunnersTest < Test::Unit::TestCase
+  include Hadupils::Extensions::Runners
   context Hadupils::Runners::Base do
     setup do
       @runner = Hadupils::Runners::Base.new(@params = mock())
@@ -21,20 +23,22 @@ class Hadupils::RunnersTest < Test::Unit::TestCase
         end
         should 'assemble system call via command method' do
-          Kernel.expects(:system).with(*@command).returns(true)
           $?.stubs(:exitstatus).with.returns(mock())
+          last_status = $?
+          Shell.stubs(:command).with(*@command).returns([nil, nil, last_status])
           @runner.wait!
         end
         should 'return 255 when system returns nil' do
-          Kernel.stubs(:system).returns(nil)
-          assert_equal 255, @runner.wait!
+          Shell.stubs(:command).returns([nil, nil, nil])
+          assert_equal [nil, 255], @runner.wait!
         end
         should 'return Process::Status#exitstatus when non-nil system result' do
-          Kernel.stubs(:system).returns(true)
           $?.stubs(:exitstatus).with.returns(status = mock())
-          assert_equal status, @runner.wait!
+          last_status = $?
+          Shell.stubs(:command).returns([nil, nil, last_status])
+          assert_equal [nil, status], @runner.wait!
         end
       end
@@ -50,7 +54,7 @@ class Hadupils::RunnersTest < Test::Unit::TestCase
         should 'handle command without env hash normally' do
           @runner.expects(:command).with.returns(@command)
-          Kernel.expects(:system).with(*@command).returns(true)
+          Open3.expects(:popen3).with(*@command)
           $?.stubs(:exitstatus).with.returns(mock)
           @runner.wait!
         end
@@ -66,7 +70,8 @@ class Hadupils::RunnersTest < Test::Unit::TestCase
           $?.stubs(:exitstatus).with.returns(mock)
           begin
             # Environment variable is overridden during system call
-            matcher = Kernel.expects(:system).with do |*args|
+            last_status = $?
+            matcher = Shell.stubs(:command).returns([nil, nil, last_status]).with do |*args|
               args == @command and ::ENV[var] == replacement and ::ENV[to_be_removed] == removal_val
             end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: hadupils
 version: !ruby/object:Gem::Version
-  version: 0.5.0
+  version: 0.6.0
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-10-11 00:00:00.000000000 Z
+date: 2013-10-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: uuid
@@ -102,6 +102,8 @@ files:
 - lib/hadupils/commands.rb
 - lib/hadupils/runners.rb
 - lib/hadupils/extensions/hive.rb
+- lib/hadupils/helpers.rb
+- lib/hadupils/commands/options.rb
 - lib/hadupils/extensions.rb
 - lib/hadupils/hacks.rb
 - lib/hadupils/assets.rb