RubyGems - hadupils - Versions diffs - 0.5.0 → 0.6.0 - Mend

hadupils 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/CHANGELOG.md +13 -0
data/README.md +14 -2
data/bin/hadupils +1 -1
data/lib/hadupils/commands/options.rb +20 -0
data/lib/hadupils/commands.rb +145 -38
data/lib/hadupils/extensions/hive.rb +6 -1
data/lib/hadupils/extensions.rb +82 -2
data/lib/hadupils/helpers.rb +81 -0
data/lib/hadupils/runners.rb +8 -7
data/lib/hadupils.rb +3 -1
data/test/unit/commands_test.rb +185 -122
data/test/unit/runners_test.rb +12 -7
metadata +4 -2

data/CHANGELOG.md CHANGED Viewed

@@ -53,6 +53,19 @@
 * Introduced Hadupils::Extensions::Dfs::TmpFile
 * Introduced Hadupils::Hacks module for String Refinements (self.randcase)
   for Ruby 2+ and Monkey Patching for the String class for Ruby < 2.0
+* Introduced $HADUPILS_BASE_TMP_PATH and $HADUPILS_TMPDIR_PATH for use with
+  commands: mktemp, withtmpdir and rm
 * Some refactoring and fixed a bug with the specs for Mac OS X
 * Tweaked old unit tests and added new ones for the new features
 * Updated the README with examples
+### 0.6.0
+* Renamed $HADUPILS_BASE_TMP_PATH to $HADUPILS_TMP_PATH (less typing)
+* Introduced $HADUPILS_TMP_TTL for use with command: cleanup
+* Introduced Hadupils::Commands::Cleanup to identify and remove old hadupils tmp DFS
+  directories/files where all files within any hadupils-tmpdir* in $HADUPILS_TMP_PATH
+  are older than $HADUPILS_TMP_TTL, the TTL (Time.now.utc - $HADUPILS_TMP_TTL)
+* The Hadupils::Runnders::Base.new.execute! method now uses Open3.capture2 or Kernel.system
+* Fixed 1.8.7 compatibility bug with the Kernel.system call in
+  Hadupils::Extensions::Hive::AuxJarsPath.build_archive
+* Some refactoring

data/README.md CHANGED Viewed

@@ -4,20 +4,31 @@ hadupils
 Operating environment oriented utilities for hadoop (Hadoop + Utils => hadupils)
 ## Shell Environment Variables
-- $HADUPILS_BASE_TMP_PATH
+- $HADUPILS_TMP_PATH
     * This is the base path for DFS temporary file/directory creation
     * Defaults to '/tmp' on the DFS (only set this if you need another base directory)
+    * Command 'cleanup' will use this ENV var for the base tmp_path to look for /hadupils-tmp*/
+      tmpdirs if the tmp_path isn't set throught the command line
+    * Other commands that use this are: mktemp, withtmpdir
 - $HADUPILS_TMPDIR_PATH
     * Set when the subcommand is executed in a subshell via the hadupils 'withtmpdir' command
     * The value comes from the tmp directory that hadupils created for the subcommand
     * It will cleanup (remove) the directory if the subcommand returns an exitstatus of zero
+- $HADUPILS_TMP_TTL
+    * This is the Time-To-Live for hadupils DFS temporary files/directories (hadupils-tmp*)
+    * Defaults to '86400' (24 hours)
+    * Command 'cleanup' will use this ENV var to remove any /hadupils-tmp*/ tmpdirs within
+      $HADUPILS_TMP_PATH where all files within are older than TTL, (Time.now.utc - $HADUPILS_TMP_TTL)
+      if ttl isn't set through the command line
 ## Hadpuils' Commands
 - hive __command__ _options_
 - hadoop __command__ _options_
 - mktemp [-d]
 - withtmpdir __subshell_command__
-- rm [-r] __full_path_to_file_or_directory__
+- rm [-rR] __full_path_to_file_or_directory__
+- cleanup [-n] __full_path_to_tmp_dir__ __ttl__
 ### Example Usages
 ``` shell
@@ -26,4 +37,5 @@ hadupils hadoop fs -ls /tmp
 hadupils mktemp -d
 hadupils withtmpdir 'echo $HADUPILS_TMPDIR_PATH'
 hadupils rm -r /tmp/hadupils-tmp-e341afe01721013128c122000af92329
+hadupils cleanup -n
 ```

data/bin/hadupils CHANGED Viewed

@@ -4,4 +4,4 @@
 require 'hadupils'
-exit Hadupils::Commands.run ARGV[0], ARGV[1..-1]
+exit Hadupils::Commands.run(ARGV[0], ARGV[1..-1])[1]

data/lib/hadupils/commands/options.rb ADDED Viewed

@@ -0,0 +1,20 @@
+module Hadupils::Commands
+  module Options
+    # NOTE: Only a single option per command (known limitation for now)
+    module Directory
+      def perform_directory?
+        %w(-d --directory).include? params[0]
+      end
+    end
+    module DryRun
+      def perform_dry_run?
+        %w(-n --dry-run).include? params[0]
+      end
+    end
+    module Recursive
+      def perform_recursive?
+        %w(-r -R --recursive).include? params[0]
+      end
+    end
+  end
+end

data/lib/hadupils/commands.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+require 'hadupils/commands/options'
 module Hadupils::Commands
   def self.run(command, params=[])
     handler = handler_for command
@@ -18,8 +20,14 @@ module Hadupils::Commands
   end
   class SimpleCommand
+    attr_reader :params
+    def initialize(params=[])
+      @params = params
+    end
     def self.run(params=[])
-      self.new.run params
+      self.new(params).run
     end
     def successful?(exitstatus)
@@ -50,9 +58,9 @@ module Hadupils::Commands
     include UserConf
     def assemble_parameters(parameters)
-      @hadoop_ext = Hadupils::Extensions::Static.new(Hadupils::Search.hadoop_assets)
+      @hadoop_ext     = Hadupils::Extensions::Static.new(Hadupils::Search.hadoop_assets)
       hadoop_cmd      = parameters[0...1]
-      hadoop_cmd_opts  = parameters[1..-1] || []
+      hadoop_cmd_opts = parameters[1..-1] || []
       if %w(fs dfs).include? parameters[0]
         hadoop_cmd + user_config.hadoop_confs + hadoop_ext.hadoop_confs + hadoop_cmd_opts
@@ -62,8 +70,8 @@ module Hadupils::Commands
       end
     end
-    def run(parameters)
-      Hadupils::Runners::Hadoop.run assemble_parameters(parameters)
+    def run
+      Hadupils::Runners::Hadoop.run assemble_parameters(params)
     end
   end
@@ -78,61 +86,71 @@ module Hadupils::Commands
       user_config.hivercs + hadoop_ext.hivercs + hive_ext.hivercs + parameters
     end
-    def run(parameters)
-      Hadupils::Runners::Hive.run assemble_parameters(parameters), hive_ext.hive_aux_jars_path
+    def run
+      Hadupils::Runners::Hive.run assemble_parameters(params), hive_ext.hive_aux_jars_path
     end
   end
   register_handler :hive, Hive
   class MkTmpFile < SimpleCommand
-    def run(parameters)
-      # Creates a new tmpdir and puts the full tmpdir_path to STDOUT
+    include Options::Directory
+    attr_reader :tmpdir_path
+    def initialize(params)
+      super(params)
       Hadupils::Extensions::Dfs::TmpFile.reset_tmpfile!
-      tmpdir_path = Hadupils::Extensions::Dfs::TmpFile.tmpfile_path
+      @tmpdir_path = Hadupils::Extensions::Dfs::TmpFile.tmpfile_path
+    end
+    def run
       # Similar to shell mktemp, but for Hadoop DFS!
+      # Creates a new tmpdir and puts the full tmpdir_path to STDOUT
       # Makes a tmp file by default; a tmp directory with '-d' flag
-      fs_cmd = parameters[0] == '-d' ? '-mkdir' : '-touchz'
-      exitstatus = Hadupils::Commands::Hadoop.run ['fs', fs_cmd, tmpdir_path]
+      fs_cmd = perform_directory? ? '-mkdir' : '-touchz'
+      stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', fs_cmd, tmpdir_path]
       if successful? exitstatus
-        exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-chmod', '700', tmpdir_path]
+        stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-chmod', '700', tmpdir_path]
         if successful? exitstatus
           puts tmpdir_path
         else
-          $stderr.puts "Failed to chmod 700 dfs tmpdir: #{tmpdir_path}"
+          $stderr.puts "Failed to dfs -chmod 700 dfs tmpdir: #{tmpdir_path}"
         end
       else
         $stderr.puts "Failed creating dfs tmpdir: #{tmpdir_path}"
       end
-      exitstatus
+      [nil, exitstatus]
     end
   end
   register_handler :mktemp, MkTmpFile
   class RmFile < SimpleCommand
-    def run(parameters)
+    include Hadupils::Helpers::TextHelper
+    include Options::Recursive
+    def assemble_parameters(parameters)
+      perform_recursive? ? ['-rmr', parameters[1..-1]] : ['-rm', parameters[0..-1]]
+    end
+    def run
       # Similar to shell rm, but for Hadoop DFS!
       # Removes files by default; removes directories recursively with '-r' flag
-      fs_cmd, tmp_dirs =
-        if parameters[0] == '-r'
-          ['-rmr', parameters[1..-1]]
-        else
-          ['-rm', parameters[0..-1]]
-        end
+      fs_cmd, tmp_dirs = assemble_parameters(params)
       if tmp_dirs.empty?
         $stderr.puts 'Failed to remove unspecified tmpdir(s), please specify tmpdir_path'
-        255
+        [nil, 255]
       else
-        exitstatus = Hadupils::Commands::Hadoop.run ['fs', fs_cmd, tmp_dirs].flatten
-        if successful? exitstatus
-          Hadupils::Extensions::Dfs::TmpFile.reset_tmpfile!
-        else
-          $stderr.puts "Failed to remove dfs tmpdir: #{tmp_dirs.join(' ')}"
+        stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', fs_cmd, tmp_dirs].flatten
+        unless successful? exitstatus
+          $stderr.puts "Failed to remove #{pluralize(tmp_dirs.length, 'tmpdir', 'tmpdirs')}"
+          tmp_dirs.each do |tmp_dir|
+            $stderr.puts tmp_dir
+          end
         end
-        exitstatus
+        [nil, exitstatus]
       end
     end
   end
@@ -140,32 +158,121 @@ module Hadupils::Commands
   register_handler :rm, RmFile
   class WithTmpDir < SimpleCommand
-    def run(parameters)
+    def run
       # Runs provided subcommand with tmpdir and cleans up tmpdir on an exitstatus of zero
-      if parameters.empty?
+      if params.empty?
         $stderr.puts 'Yeeaaahhh... sooo... you failed to provide a subcommand...'
-        255
+        [nil, 255]
       else
         # Let's create the tmpdir
-        exitstatus = Hadupils::Commands::MkTmpFile.run ['-d']
+        stdout, exitstatus = Hadupils::Commands::MkTmpFile.run ['-d']
         if successful? exitstatus
           tmpdir_path = Hadupils::Extensions::Dfs::TmpFile.tmpfile_path
-          parameters.unshift({'HADUPILS_TMPDIR_PATH' => tmpdir_path})
+          params.unshift({'HADUPILS_TMPDIR_PATH' => tmpdir_path})
           # Let's run the shell subcommand!
-          exitstatus = Hadupils::Runners::Subcommand.run parameters
+          stdout, exitstatus = Hadupils::Runners::Subcommand.run params
           if successful? exitstatus
             # Let's attempt to cleanup tmpdir_path
-            exitstatus = Hadupils::Commands::RmFile.run ['-r', tmpdir_path]
+            stdout, exitstatus = Hadupils::Commands::RmFile.run ['-r', tmpdir_path]
           else
-            $stderr.puts "Failed to run shell subcommand: #{parameters}"
+            $stderr.puts "Failed to run shell subcommand: #{params}"
           end
         end
-        exitstatus
+        Hadupils::Extensions::Dfs::TmpFile.reset_tmpfile!
+        [nil, exitstatus]
       end
     end
   end
   register_handler :withtmpdir, WithTmpDir
+  class Cleanup < SimpleCommand
+    include Hadupils::Extensions::Dfs
+    include Hadupils::Extensions::Runners
+    include Hadupils::Helpers::Dfs
+    include Hadupils::Helpers::TextHelper
+    include Options::DryRun
+    attr_accessor :expired_exitstatuses
+    attr_accessor :rm_exitstatuses
+    attr_reader   :tmp_path
+    attr_reader   :tmp_ttl
+    def initialize(params)
+      super(params)
+      @expired_exitstatuses = []
+      @rm_exitstatuses      = []
+      @tmp_path             = (perform_dry_run? ? params[1] : params[0]) || TmpFile.tmp_path
+      @tmp_ttl              = ((perform_dry_run? ? params[2] : params[1]) || TmpFile.tmp_ttl).to_i
+    end
+    def run
+      # Removes old hadupils tmp files/dirs where all files within a tmpdir are also older than the TTL
+      # User configurable by setting the ENV variable $HADUPILS_TMP_TTL, defaults to 86400 (last 24 hours)
+      # User may also perform a dry-run via a -n or a --dry-run flag
+      # Silence the Runner's shell STDOUT noise
+      Shell.silence_stdout = true
+      # Get candidate directories
+      stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-ls', tmp_path]
+      if successful? exitstatus
+        rm_array = []
+        dir_candidates(hadupils_tmpfiles(parse_ls(stdout)), tmp_ttl).each do |dir_candidate|
+          next unless has_expired? dir_candidate, tmp_ttl
+          rm_array << dir_candidate
+        end
+        exitstatus = expired_exitstatuses.all? {|expired_exitstatus| expired_exitstatus == 0} ? 0 : 255
+        if successful? exitstatus
+          puts "Found #{pluralize(rm_array.length, 'item', 'items')} to be removed recursively"
+          rm_array.each {|rm_item| puts rm_item }
+          unless perform_dry_run?
+            # Now want the user to see the Runner's shell STDOUT
+            Shell.silence_stdout = false
+            puts 'Removing...'
+            rm_array.each do |dir|
+              rm_stdout, rm_exitstatus = Hadupils::Commands::RmFile.run ['-r', dir]
+              rm_exitstatuses << rm_exitstatus
+              $stderr.puts "Failed to recursively remove: #{dir}" unless successful? rm_exitstatus
+            end
+          end
+          exitstatus = rm_exitstatuses.all? {|rm_exitstatus| rm_exitstatus == 0} ? 0 : 255
+        end
+      end
+      [nil, exitstatus]
+    end
+    def has_expired?(dir_candidate, ttl)
+      stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-count', dir_candidate]
+      expired_exitstatuses << exitstatus
+      if successful? exitstatus
+        parsed_count = parse_count(stdout)
+        if parsed_count.empty?
+          $stderr.puts "Failed to parse dfs -count for stdout: #{stdout}"
+          expired_exitstatuses << 255
+        elsif dir_empty? parsed_count[:file_count]
+          true
+        else
+          stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-ls', File.join(dir_candidate, '**', '*')]
+          expired_exitstatuses << exitstatus
+          if successful? exitstatus
+            all_expired? parse_ls(stdout), ttl
+          else
+            $stderr.puts "Failed to perform dfs -ls on path: #{File.join(dir_candidate, '**', '*')}"
+            false
+          end
+        end
+      else
+        $stderr.puts "Failed to perform dfs -count on path: #{dir_candidate}"
+        false
+      end
+    end
+  end
+  register_handler :cleanup, Cleanup
 end

data/lib/hadupils/extensions/hive.rb CHANGED Viewed

@@ -223,7 +223,12 @@ module Hadupils::Extensions
         end
         ::Dir.chdir(workdir) do |p|
-          Kernel.system 'tar', 'cz', *basenames, :out => io
+          Open3.popen3('tar', 'cz', *basenames) do |i, o, e|
+            stderr = e.read
+            stdout = o.read
+            $stderr.puts stderr unless stderr.empty?
+            io << stdout
+          end
         end
       end
       true

data/lib/hadupils/extensions.rb CHANGED Viewed

@@ -1,7 +1,83 @@
 require 'uuid'
+require 'open3'
 require 'tempfile'
 module Hadupils::Extensions
+  # Tools for managing shell commands/output performed by the runners
+  module Runners
+    module Shell
+      def self.command(*command_list)
+        opts      = {}
+        stdout    = nil
+        stderr    = nil
+        status    = nil
+        begin
+          if RUBY_VERSION < '1.9'
+            Open3.popen3(*command_list) do |i, o, e|
+              stdout = o.read
+              stderr = e.read
+            end
+            status = $?
+            $stdout.puts stdout unless stdout.nil? || stdout.empty? || Shell.silence_stdout?
+            $stderr.puts stderr unless stderr.nil? || stderr.empty?
+            stdout = nil unless capture_stdout?
+            stderr = nil unless capture_stderr?
+          else
+            stdout_rd, stdout_wr  = IO.pipe     if capture_stdout?
+            stderr_rd, stderr_wr  = IO.pipe     if capture_stderr?
+            opts[:out]            = stdout_wr   if capture_stdout?
+            opts[:err]            = stderr_wr   if capture_stderr?
+            # NOTE: eval prevents Ruby 1.8.7 from throwing a syntax error on Ruby 1.9+ syntax
+            result = eval 'Kernel.system(*command_list, opts)'
+            status = result ? $? : nil
+            if capture_stdout?
+              stdout_wr.close
+              stdout = stdout_rd.read
+              stdout_rd.close
+              $stdout.puts stdout unless stdout.nil? || stdout.empty? || Shell.silence_stdout?
+            end
+            if capture_stderr?
+              stderr_wr.close
+              stderr = stderr_rd.read
+              stderr_rd.close
+              $stderr.puts stderr unless stderr.nil? || stderr.empty?
+            end
+          end
+          [stdout, stderr, status]
+        rescue Errno::ENOENT => e
+          $stderr.puts e
+          [stdout, stderr, nil]
+        end
+      end
+      def self.capture_stderr?
+        @capture_stderr
+      end
+      def self.capture_stderr=(value)
+        @capture_stderr = value
+      end
+      def self.capture_stdout?
+        @capture_stdout || Shell.silence_stdout?
+      end
+      def self.capture_stdout=(value)
+        @capture_stdout = value
+      end
+      def self.silence_stdout?
+        @silence_stdout
+      end
+      def self.silence_stdout=(value)
+        @silence_stdout = value
+      end
+    end
+  end
   # Tools for managing tmp files in the hadoop dfs
   module Dfs
     module TmpFile
@@ -9,12 +85,16 @@ module Hadupils::Extensions
         @uuid ||= UUID.new
       end
+      def self.tmp_ttl
+        @tmp_ttl ||= (ENV['HADUPILS_TMP_TTL'] || '86400').to_i
+      end
       def self.tmp_path
-        @tmp_path ||= (ENV['HADUPILS_BASE_TMP_PATH'] || '/tmp')
+        @tmp_path ||= (ENV['HADUPILS_TMP_PATH'] || '/tmp')
       end
       def self.tmpfile_path
-        @tmpdir_path ||= ::File.join(tmp_path, "hadupils-tmp-#{uuid.generate(:compact)}")
+        @tmpfile_path ||= ::File.join(tmp_path, "hadupils-tmp-#{uuid.generate(:compact)}")
       end
       def self.reset_tmpfile!

data/lib/hadupils/helpers.rb ADDED Viewed

@@ -0,0 +1,81 @@
+require 'time'
+module Hadupils::Helpers
+  module TextHelper
+    def pluralize(count, singular, plural=nil)
+      if count == 1
+        "1 #{singular}"
+      elsif plural
+        "#{count} #{plural}"
+      else
+        "#{count} #{singular}s"
+      end
+    end
+  end
+  module Dfs
+    def parse_count(stdout)
+      parsed_count = {}
+      if stdout
+        result = stdout.squeeze(' ').split
+        parsed_count =
+          begin
+            { :dir_count    => result[0],
+              :file_count   => result[1],
+              :content_size => result[2],
+              :file_name    => result[3] }
+          end if result.length == 4 # Check for proper # of dfs -count columns
+      end
+      parsed_count
+    end
+    def parse_ls(stdout)
+      parsed_ls = []
+      if stdout
+        result = stdout.split(/\n/)
+        parsed_ls =
+          result[1..-1].map do |line|
+            l = line.squeeze(' ').split
+            begin
+              l = l[-3..-1]
+              [Time.parse("#{l[0]} #{l[1]}Z"), l[2]]
+            rescue ArgumentError
+              nil
+            end if l.length == 8 # Check for proper # of dfs -ls columns
+          end.compact unless result.empty?
+      end
+      parsed_ls
+    end
+    def hadupils_tmpfile?(parsed_line)
+      parsed_line.match(/hadupils-tmp/)
+    end
+    def dir_candidates(parsed_ls, ttl)
+      parsed_ls.inject([]) do |dir_candidates, (file_time, file_path)|
+        if file_time < (Time.now.utc - ttl)
+          dir_candidates << file_path
+        end
+        dir_candidates
+      end
+    end
+    def dir_empty?(count)
+      count.to_i == 0
+    end
+    def all_expired?(parsed_ls, ttl)
+      parsed_ls.all? {|file_time, file_path| file_time < (Time.now.utc - ttl)}
+    end
+    def hadupils_tmpfiles(parsed_ls)
+      parsed_ls.map do |time, file_path|
+        if hadupils_tmpfile? file_path
+          [time, file_path]
+        else
+          nil
+        end
+      end.compact
+    end
+  end
+end

data/lib/hadupils/runners.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 module Hadupils::Runners
   class Base
-    attr_reader :params, :last_result, :last_status
+    include Hadupils::Extensions::Runners
+    attr_reader :params, :last_stdout, :last_stderr, :last_status
     def initialize(params)
       @params = params
@@ -14,6 +15,7 @@ module Hadupils::Runners
     def execute!
       command_list = command
       if RUBY_VERSION < '1.9' and command_list[0].kind_of? Hash
         deletes = []
         overrides = {}
@@ -26,24 +28,23 @@ module Hadupils::Runners
             end
             ::ENV[key] = val
           end
-          Kernel.system(*command_list[1..-1])
+          Shell.command(*command_list[1..-1])
         ensure
           overrides.each {|key, val| ::ENV[key] = val }
           deletes.each {|key| ::ENV.delete key }
         end
       else
-        Kernel.system(*command_list)
+        Shell.command(*command_list)
       end
     end
     def wait!
-      @last_result = execute!
-      @last_status = $?
-      last_exitstatus
+      @last_stdout, @last_stderr, @last_status = execute!
+      [@last_stdout, last_exitstatus]
     end
     def last_exitstatus
-      if @last_result.nil?
+      if @last_status.nil?
         255
       else
         @last_status.exitstatus

data/lib/hadupils.rb CHANGED Viewed

@@ -3,9 +3,11 @@ module Hadupils
 end
 require 'hadupils/assets'
-require 'hadupils/commands'
+require 'hadupils/helpers'
 require 'hadupils/extensions'
 require 'hadupils/runners'
 require 'hadupils/search'
 require 'hadupils/util'
 require 'hadupils/hacks'
+require 'hadupils/commands'

data/test/unit/commands_test.rb CHANGED Viewed

@@ -39,8 +39,9 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
       end
       should 'have a #run singleton method that dispatches to an instance #run' do
-        @klass.expects(:new).with.returns(instance = mock())
-        instance.expects(:run).with(params = mock()).returns(result = mock())
+        params = mock()
+        @klass.expects(:new).with(params).returns(instance = mock())
+        instance.expects(:run).with.returns(result = mock())
         assert_equal result, @klass.run(params)
       end
@@ -67,9 +68,8 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
       context '#run' do
         setup do
-          @command = @klass.new
-          @command.stubs(:user_config).with.returns(@user_config = mock())
-          @command.stubs(:hadoop_ext).with.returns(@hadoop_ext = mock())
+          @klass.any_instance.stubs(:user_config).with.returns(@user_config = mock())
+          @klass.any_instance.stubs(:hadoop_ext).with.returns(@hadoop_ext = mock())
           @runner_class = Hadupils::Runners::Hadoop
         end
@@ -82,7 +82,7 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
           should 'apply hadoop_conf options to hadoop runner call' do
             @runner_class.expects(:run).with(@user_config_hadoop_confs +
                                              @hadoop_ext_hadoop_confs).returns(result = mock())
-            assert_equal result, @command.run([])
+            assert_equal result, @klass.new([]).run
           end
           should 'insert hadoop_conf options into position 1 of given params array to hadoop runner call' do
@@ -91,7 +91,7 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
                                              @user_config_hadoop_confs +
                                              @hadoop_ext_hadoop_confs +
                                              params[1..-1]).returns(result = mock())
-            assert_equal result, @command.run(params)
+            assert_equal result, @klass.new(params).run
           end
         end
@@ -103,12 +103,12 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
           should 'pass params unchanged through to hadoop runner call' do
             @runner_class.expects(:run).with(params = [mock(), mock()]).returns(result = mock())
-            assert_equal result, @command.run(params)
+            assert_equal result, @klass.new(params).run
           end
           should 'handle empty params' do
             @runner_class.expects(:run).with([]).returns(result = mock())
-            assert_equal result, @command.run([])
+            assert_equal result, @klass.new([]).run
           end
         end
       end
@@ -125,8 +125,9 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
       end
       should 'have a #run singleton method that dispatches to an instance #run' do
-        @klass.expects(:new).with.returns(instance = mock())
-        instance.expects(:run).with(params = mock()).returns(result = mock())
+        params = mock()
+        @klass.expects(:new).with(params).returns(instance = mock())
+        instance.expects(:run).with.returns(result = mock())
         assert_equal result, @klass.run(params)
       end
@@ -160,10 +161,9 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
       context '#run' do
         setup do
-          @command = @klass.new
-          @command.stubs(:user_config).with.returns(@user_config = mock())
-          @command.stubs(:hadoop_ext).with.returns(@hadoop_ext = mock())
-          @command.stubs(:hive_ext).with.returns(@hive_ext = mock)
+          @klass.any_instance.stubs(:user_config).with.returns(@user_config = mock())
+          @klass.any_instance.stubs(:hadoop_ext).with.returns(@hadoop_ext = mock())
+          @klass.any_instance.stubs(:hive_ext).with.returns(@hive_ext = mock)
           @runner_class = Hadupils::Runners::Hive
         end
@@ -180,7 +180,7 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
                                              @hadoop_ext_hivercs +
                                              @hive_ext_hivercs,
                                              @hive_aux_jars_path).returns(result = mock())
-            assert_equal result, @command.run([])
+            assert_equal result, @klass.new([]).run
           end
           should 'prepend hiverc options before given params to hive runner call' do
@@ -190,7 +190,7 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
                                              @hive_ext_hivercs +
                                              params,
                                              @hive_aux_jars_path).returns(result = mock())
-            assert_equal result, @command.run(params)
+            assert_equal result, @klass.new(params).run
           end
         end
@@ -204,12 +204,12 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
           should 'pass params unchanged through to hive runner call along with aux jars path' do
             @runner_class.expects(:run).with(params = [mock(), mock()], '').returns(result = mock())
-            assert_equal result, @command.run(params)
+            assert_equal result, @klass.new(params).run
           end
           should 'handle empty params' do
             @runner_class.expects(:run).with([], '').returns(result = mock())
-            assert_equal result, @command.run([])
+            assert_equal result, @klass.new([]).run
           end
         end
       end
@@ -317,140 +317,203 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
           ::Dir.chdir @pwd
         end
       end
-      context 'MkTempFile' do
-        setup do
-          @klass = Hadupils::Commands::MkTmpFile
-        end
+    end
+    context 'MkTempFile' do
+      setup do
+        @klass = Hadupils::Commands::MkTmpFile
+      end
+      should 'register with :mktemp name' do
+        handlers = [:mktemp]
+        run_handler_assertions_for handlers
+      end
-        should 'register with :mktemp name' do
-          handlers = [:mktemp]
-          run_handler_assertions_for handlers
+      should 'have a #run singleton method that dispatches to an instance #run' do
+        params = mock()
+        @klass.expects(:new).with(params).returns(instance = mock())
+        instance.expects(:run).with.returns(result = mock())
+        assert_equal result, @klass.run(params)
+      end
+      context '#run' do
+        should 'provide invocation for bare mktemp if given empty parameters' do
+          tmpdir_path = mock().to_s
+          Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-touchz', tmpdir_path]).returns(['', 0])
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-chmod', '700', tmpdir_path]).returns(['', 0])
+          assert_equal [nil, 0], @klass.new([]).run
         end
-        should 'have a #run singleton method that dispatches to an instance #run' do
-          @klass.expects(:new).with.returns(instance = mock())
-          instance.expects(:run).with(params = mock()).returns(result = mock())
-          assert_equal result, @klass.run(params)
+        should 'provide invocation for mktemp if given with -d flag parameter' do
+          tmpdir_path = mock().to_s
+          Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-mkdir', tmpdir_path]).returns(['', 0])
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-chmod', '700', tmpdir_path]).returns(['', 0])
+          assert_equal [nil, 0], @klass.new(['-d']).run
         end
+      end
+    end
-        context '#run' do
-          setup do
-            @command = @klass.new
-            Hadupils::Runners::Hadoop.stubs(:base_runner).returns(@hadoop_path = mock().to_s + '-hadoop')
-          end
+    context 'RmFile' do
+      setup do
+        @klass = Hadupils::Commands::RmFile
+      end
-          should 'provide invocation for bare mktemp if given empty parameters' do
-            tmpdir_path = mock().to_s
-            Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
-            Kernel.expects(:system).with(@hadoop_path, 'fs', '-touchz', tmpdir_path).returns(0)
-            Kernel.expects(:system).with(@hadoop_path, 'fs', '-chmod', '700', tmpdir_path).returns(0)
-            assert_equal 0, @command.run([])
-          end
+      should 'register with :rm name' do
+        handlers = [:rm]
+        run_handler_assertions_for handlers
+      end
-          should 'provide invocation for mktemp if given with -d flag parameter' do
-            tmpdir_path = mock().to_s
-            Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
-            Kernel.expects(:system).with(@hadoop_path, 'fs', '-mkdir', tmpdir_path).returns(0)
-            Kernel.expects(:system).with(@hadoop_path, 'fs', '-chmod', '700', tmpdir_path).returns(0)
-            assert_equal 0, @command.run(['-d'])
-          end
-        end
+      should 'have a #run singleton method that dispatches to an instance #run' do
+        params = mock()
+        @klass.expects(:new).with(params).returns(instance = mock())
+        instance.expects(:run).with.returns(result = mock())
+        assert_equal result, @klass.run(params)
       end
-      context 'RmFile' do
-        setup do
-          @klass = Hadupils::Commands::RmFile
+      context '#run' do
+        should 'provide invocation for bare rm if given empty parameters' do
+          assert_equal [nil, 255], @klass.new([]).run
         end
-        should 'register with :rm name' do
-          handlers = [:rm]
-          run_handler_assertions_for handlers
+        should 'provide invocation for rm if just tmpdir_path parameter' do
+          tmpdir_path = mock().to_s
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-rm', tmpdir_path]).returns(['', 0])
+          assert_equal [nil, 0], @klass.new([tmpdir_path]).run
         end
-        should 'have a #run singleton method that dispatches to an instance #run' do
-          @klass.expects(:new).with.returns(instance = mock())
-          instance.expects(:run).with(params = mock()).returns(result = mock())
-          assert_equal result, @klass.run(params)
+        should 'provide invocation for hadoop if just tmpdir_path with -r flag parameter' do
+          tmpdir_path = mock().to_s
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-rmr', tmpdir_path]).returns(['', 0])
+          assert_equal [nil, 0], @klass.new(['-r', tmpdir_path]).run
         end
+      end
+    end
-        context '#run' do
-          setup do
-            @command = @klass.new
-            Hadupils::Runners::Hadoop.stubs(:base_runner).returns(@hadoop_path = mock().to_s + '-hadoop')
-          end
+    context 'WithTempDir' do
+      setup do
+        @klass = Hadupils::Commands::WithTmpDir
+      end
-          should 'provide invocation for bare rm if given empty parameters' do
-            assert_equal 255, @klass.run([])
-          end
+      should 'register with :withtmpdir name' do
+        handlers = [:withtmpdir]
+        run_handler_assertions_for handlers
+      end
-          should 'provide invocation for rm if just tmpdir_path parameter' do
-            tmpdir_path = mock().to_s
-            Kernel.expects(:system).with(@hadoop_path, 'fs', '-rm', tmpdir_path).returns(0)
-            assert_equal 0, @klass.run([tmpdir_path])
-          end
+      should 'have a #run singleton method that dispatches to an instance #run' do
+        params = mock()
+        @klass.expects(:new).with(params).returns(instance = mock())
+        instance.expects(:run).with.returns(result = mock())
+        assert_equal result, @klass.run(params)
+      end
-          should 'provide invocation for hadoop if just tmpdir_path with -r flag parameter' do
-            tmpdir_path = mock().to_s
-            Kernel.expects(:system).with(@hadoop_path, 'fs', '-rmr', tmpdir_path).returns(0)
-            assert_equal 0, @klass.run(['-r', tmpdir_path])
-          end
+      context '#run' do
+        should 'provide invocation for withtmpdir if given parameters for shell subcommand' do
+          tmpdir_path = mock().to_s
+          run_common_subcommand_assertions_with(tmpdir_path)
+          subcommand_params = [{'HADUPILS_TMPDIR_PATH' => tmpdir_path}, '/path/to/my_wonderful_script.sh']
+          Hadupils::Runners::Subcommand.expects(:run).with(subcommand_params).returns(['', 0])
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-rmr', tmpdir_path]).returns(['', 0])
+          assert_equal [nil, 0], @klass.new(['/path/to/my_wonderful_script.sh']).run
         end
-        context 'WithTempDir' do
-          setup do
-            @klass = Hadupils::Commands::WithTmpDir
-          end
-          should 'register with :withtmpdir name' do
-            handlers = [:withtmpdir]
-            run_handler_assertions_for handlers
-          end
+        should 'provide invocation for withtmpdir if given parameters for shell subcommand (another hadupils command)' do
+          tmpdir_path = mock().to_s
+          run_common_subcommand_assertions_with(tmpdir_path)
+          subcommand_params = [{'HADUPILS_TMPDIR_PATH' => tmpdir_path}, 'hadupils hadoop ls /tmp']
+          Hadupils::Runners::Subcommand.expects(:run).with(subcommand_params).returns(['', 0])
+          Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-rmr', tmpdir_path]).returns(['', 0])
+          assert_equal [nil, 0], @klass.new(['hadupils hadoop ls /tmp']).run
+        end
-          should 'have a #run singleton method that dispatches to an instance #run' do
-            @klass.expects(:new).with.returns(instance = mock())
-            instance.expects(:run).with(params = mock()).returns(result = mock())
-            assert_equal result, @klass.run(params)
-          end
+        should 'provide invocation for withtmpdir if given parameters for shell subcommand with nil result' do
+          tmpdir_path = mock().to_s
+          subcommand_params = [{'HADUPILS_TMPDIR_PATH' => tmpdir_path}, '/path/to/my_wonderful_script.sh']
+          run_common_subcommand_assertions_with(tmpdir_path)
+          Hadupils::Runners::Subcommand.expects(:run).with(subcommand_params).returns(['', 255])
+          assert_equal [nil, 255], @klass.new(['/path/to/my_wonderful_script.sh']).run
+        end
+      end
+    end
+  end
-          context '#run' do
-            setup do
-              @command = @klass.new
-              Hadupils::Runners::Hadoop.stubs(:base_runner).returns(@hadoop_path = mock().to_s + '-hadoop')
-            end
+  context 'Cleanup' do
+    setup do
+      @klass = Hadupils::Commands::Cleanup
+    end
-            should 'provide invocation for withtmpdir if given parameters for shell subcommand' do
-              tmpdir_path = mock().to_s
-              run_common_subcommand_assertions_with tmpdir_path
-              Kernel.expects(:system).with({'HADUPILS_TMPDIR_PATH' => tmpdir_path}, '/path/to/my_wonderful_script.sh').returns(0)
-              Kernel.expects(:system).with(@hadoop_path, 'fs', '-rmr', tmpdir_path).returns(0)
-              assert_equal 0, @klass.run(['/path/to/my_wonderful_script.sh'])
-            end
+    should 'register with :cleanup name' do
+      handlers = [:cleanup]
+      run_handler_assertions_for handlers
+    end
-            should 'provide invocation for withtmpdir if given parameters for shell subcommand (another hadupils command)' do
-              tmpdir_path = mock().to_s
-              run_common_subcommand_assertions_with tmpdir_path
-              Kernel.expects(:system).with({'HADUPILS_TMPDIR_PATH' => tmpdir_path}, 'hadupils hadoop ls /tmp').returns(0)
-              Kernel.expects(:system).with(@hadoop_path, 'fs', '-rmr', tmpdir_path).returns('')
-              assert_equal 0, @klass.run(['hadupils hadoop ls /tmp'])
-            end
+    should 'have a #run singleton method that dispatches to an instance #run' do
+      params = mock()
+      @klass.expects(:new).with(params).returns(instance = mock())
+      instance.expects(:run).with.returns(result = mock())
+      assert_equal result, @klass.run(params)
+    end
-            should 'provide invocation for withtmpdir if given parameters for shell subcommand with nil result' do
-              tmpdir_path = mock().to_s
-              run_common_subcommand_assertions_with tmpdir_path
-              Kernel.expects(:system).with({'HADUPILS_TMPDIR_PATH' => tmpdir_path}, '/path/to/my_wonderful_script.sh').returns(nil)
-              assert_equal 255, @klass.run(['/path/to/my_wonderful_script.sh'])
-            end
-          end
-        end
-      end
+    context '#run' do
+     should 'provide invocation for bare cleanup if given empty parameters' do
+       tmp_path = '/tmp'
+       tmpdir1 = File.join(tmp_path, 'hadupils-tmp-064708701f180131f7ef3c0754617b34')
+       tmpdir2 = File.join(tmp_path, 'hadupils-tmp-0e5175901f180131f7f03c0754617b34')
+       run_common_cleanup_assertions_with(tmp_path, tmpdir1, tmpdir2)
+       instance = @klass.new([])
+       assert_equal [nil, 0], instance.run
+       assert_equal 86400, instance.tmp_ttl
+       assert_equal '/tmp', instance.tmp_path
+     end
+     should 'provide invocation for cleanup if just tmp_path parameter' do
+       tmp_path = mock().to_s
+       tmpdir1 = File.join(tmp_path, 'hadupils-tmp-064708701f180131f7ef3c0754617b34')
+       tmpdir2 = File.join(tmp_path, 'hadupils-tmp-0e5175901f180131f7f03c0754617b34')
+       run_common_cleanup_assertions_with(tmp_path, tmpdir1, tmpdir2)
+       instance = @klass.new([tmp_path])
+       assert_equal [nil, 0], instance.run
+       assert_equal 86400, instance.tmp_ttl
+       assert_equal tmp_path, instance.tmp_path
+     end
+     should 'provide invocation for cleanup with tmp_path and ttl parameter' do
+       tmp_path = mock().to_s
+       tmpdir1 = File.join(tmp_path, 'hadupils-tmp-064708701f180131f7ef3c0754617b34')
+       tmpdir2 = File.join(tmp_path, 'hadupils-tmp-0e5175901f180131f7f03c0754617b34')
+       run_common_cleanup_assertions_with(tmp_path, tmpdir1, tmpdir2)
+       instance = @klass.new([tmp_path, '0'])
+       assert_equal [nil, 0], instance.run
+       assert_equal 0, instance.tmp_ttl
+       assert_equal tmp_path, instance.tmp_path
+     end
     end
   end
   def run_common_subcommand_assertions_with(tmpdir_path)
     Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
     Hadupils::Extensions::Dfs::TmpFile.expects(:tmpfile_path).returns(tmpdir_path)
-    Kernel.expects(:system).with(@hadoop_path, 'fs', '-mkdir', tmpdir_path).returns(0)
-    Kernel.expects(:system).with(@hadoop_path, 'fs', '-chmod', '700', tmpdir_path).returns(0)
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-mkdir', tmpdir_path]).returns(['', 0])
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-chmod', '700', tmpdir_path]).returns(['', 0])
+  end
+  def run_common_cleanup_assertions_with(tmp_path, tmpdir1, tmpdir2)
+    ls_stdout =
+      "Found 2 items\n" +
+      "drwx------   - willdrew supergroup          0 2013-10-24 16:23 #{tmpdir1}\n" +
+      "drwx------   - willdrew supergroup          0 2013-10-24 16:23 #{tmpdir2}\n"
+    count_stdout1 = "           1            0                  0 hdfs://localhost:9000#{tmpdir1}\n"
+    count_stdout2 = "           1            1                  0 hdfs://localhost:9000#{tmpdir2}\n"
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-ls', tmp_path]).returns([ls_stdout, 0])
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-count', tmpdir1]).returns([count_stdout1, 0])
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-count', tmpdir2]).returns([count_stdout2, 0])
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-ls', File.join(tmpdir2, '**', '*')]).returns(['', 0])
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-rmr', tmpdir1]).returns(['', 0])
+    Hadupils::Runners::Hadoop.expects(:run).with(['fs', '-rmr', tmpdir2]).returns(['', 0])
   end
   def run_handler_assertions_for(handlers)

data/test/unit/runners_test.rb CHANGED Viewed

@@ -1,4 +1,6 @@
 class Hadupils::RunnersTest < Test::Unit::TestCase
+  include Hadupils::Extensions::Runners
   context Hadupils::Runners::Base do
     setup do
       @runner = Hadupils::Runners::Base.new(@params = mock())
@@ -21,20 +23,22 @@ class Hadupils::RunnersTest < Test::Unit::TestCase
         end
         should 'assemble system call via command method' do
-          Kernel.expects(:system).with(*@command).returns(true)
           $?.stubs(:exitstatus).with.returns(mock())
+          last_status = $?
+          Shell.stubs(:command).with(*@command).returns([nil, nil, last_status])
           @runner.wait!
         end
         should 'return 255 when system returns nil' do
-          Kernel.stubs(:system).returns(nil)
-          assert_equal 255, @runner.wait!
+          Shell.stubs(:command).returns([nil, nil, nil])
+          assert_equal [nil, 255], @runner.wait!
         end
         should 'return Process::Status#exitstatus when non-nil system result' do
-          Kernel.stubs(:system).returns(true)
           $?.stubs(:exitstatus).with.returns(status = mock())
-          assert_equal status, @runner.wait!
+          last_status = $?
+          Shell.stubs(:command).returns([nil, nil, last_status])
+          assert_equal [nil, status], @runner.wait!
         end
       end
@@ -50,7 +54,7 @@ class Hadupils::RunnersTest < Test::Unit::TestCase
         should 'handle command without env hash normally' do
           @runner.expects(:command).with.returns(@command)
-          Kernel.expects(:system).with(*@command).returns(true)
+          Open3.expects(:popen3).with(*@command)
           $?.stubs(:exitstatus).with.returns(mock)
           @runner.wait!
         end
@@ -66,7 +70,8 @@ class Hadupils::RunnersTest < Test::Unit::TestCase
           $?.stubs(:exitstatus).with.returns(mock)
           begin
             # Environment variable is overridden during system call
-            matcher = Kernel.expects(:system).with do |*args|
+            last_status = $?
+            matcher = Shell.stubs(:command).returns([nil, nil, last_status]).with do |*args|
               args == @command and ::ENV[var] == replacement and ::ENV[to_be_removed] == removal_val
             end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: hadupils
 version: !ruby/object:Gem::Version
-  version: 0.5.0
+  version: 0.6.0
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-10-11 00:00:00.000000000 Z
+date: 2013-10-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: uuid
@@ -102,6 +102,8 @@ files:
 - lib/hadupils/commands.rb
 - lib/hadupils/runners.rb
 - lib/hadupils/extensions/hive.rb
+- lib/hadupils/helpers.rb
+- lib/hadupils/commands/options.rb
 - lib/hadupils/extensions.rb
 - lib/hadupils/hacks.rb
 - lib/hadupils/assets.rb