RubyGems - rubydoop - Versions diffs - 1.1.3 → 1.2.0 - Mend

rubydoop 1.1.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: f4e36e0505e17eeb71201cd7803446ba6c56ad7a
-  data.tar.gz: 510fe7c8d2ea01d1218ad65fdd7519e5e7dd6d62
+  metadata.gz: 82bf7b52baa55faaa4787d61890c72ad3eb7a9e1
+  data.tar.gz: f48b41c4268d3bc970ef942b845bcab414a6714f
 SHA512:
-  metadata.gz: 77376833f4e51901ed1a3995fcf9f6d1a3e8f516c3c851331b9c7526c412dbb544cb9525b6bc6a8176374c02d5ee1a122fdeae5fc3ad5fab1b9bd8ec9dd73d52
-  data.tar.gz: 594b079d9246b12f599f570f445e8734c1201cc35901cb76d167728a62977e51e86a994d1493b10954451f288dde45948cbca8586a17c76ddcd775ae73eb4236
+  metadata.gz: 30b7c232ed09dc1425d1798f5828da8f9b109769f5fcc3dd9a1c97a71e35aad3605f49c3a84b3a52b879119df37357e9ee8bb81f6d85ba3f051f82c71b4b4624
+  data.tar.gz: 218dc633c5038de6b8964bcfc57c2d8d582032d2a3216e715ac755807aa1585946ceac944417d738376b24b757a441d99f7d9e695b66c8cc606c8f4a46314a0a

data/lib/rubydoop.jar CHANGED Viewed

Binary file

data/lib/rubydoop.rb CHANGED Viewed

@@ -10,49 +10,7 @@ require 'hadoop'
 # {Package} for the packaging documentation, or the {file:README.md README}
 # for a getting started guide.
 module Rubydoop
-  # @private
-  def self.create_mapper(conf)
-    create_instance(conf.get(MAPPER_KEY))
-  end
-  # @private
-  def self.create_reducer(conf)
-    create_instance(conf.get(REDUCER_KEY))
-  end
-  # @private
-  def self.create_combiner(conf)
-    create_instance(conf.get(COMBINER_KEY))
-  end
-  # @private
-  def self.create_partitioner(conf)
-    create_instance(conf.get(PARTITIONER_KEY))
-  end
-  # @private
-  def self.create_grouping_comparator(conf)
-    create_instance(conf.get(GROUPING_COMPARATOR_KEY))
-  end
-  # @private
-  def self.create_sort_comparator(conf)
-    create_instance(conf.get(SORT_COMPARATOR_KEY))
-  end
-  private
-  MAPPER_KEY = 'rubydoop.mapper'.freeze
-  REDUCER_KEY = 'rubydoop.reducer'.freeze
-  COMBINER_KEY = 'rubydoop.combiner'.freeze
-  PARTITIONER_KEY = 'rubydoop.partitioner'.freeze
-  GROUPING_COMPARATOR_KEY = 'rubydoop.grouping_comparator'.freeze
-  SORT_COMPARATOR_KEY = 'rubydoop.sort_comparator'.freeze
-  def self.create_instance(const_path)
-    cls = const_path.split('::').reduce(Object) { |host, name| host.const_get(name) }
-    cls.new
-  end
+  include_package 'rubydoop'
 end
 require 'rubydoop/dsl'

data/lib/rubydoop/dsl.rb CHANGED Viewed

@@ -62,6 +62,14 @@ module Rubydoop
       job.instance_exec(&block)
       job
     end
+    def parallel(&block)
+      @context.parallel(&block)
+    end
+    def sequence(&block)
+      @context.sequence(&block)
+    end
   end
   # Job configuration DSL.
@@ -94,11 +102,15 @@ module Rubydoop
         class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "InputFormat"
         format = Hadoop::Mapreduce::Lib::Input.const_get(class_name)
       end
+      unless format <= Hadoop::Mapreduce::InputFormat
+        @job.configuration.set(Rubydoop::InputFormatProxy::RUBY_CLASS_KEY, format.name)
+        format = Rubydoop::InputFormatProxy
+      end
       format.set_input_paths(@job, paths)
       @job.set_input_format_class(format)
     end
-    # Sets the output path of the job.
+    # Sets or gets the output path of the job.
     #
     # Calls `setOutputFormatClass` on the Hadoop job and uses the static
     # `setOutputPath` on the output format to set the job's output path.
@@ -108,14 +120,27 @@ module Rubydoop
     # @param [String] dir The output path
     # @param [Hash] options
     # @option options [JavaClass] :format The output format to use, defaults to `TextOutputFormat`
-    def output(dir, options={})
-      format = options.fetch(:format, :text)
-      unless format.is_a?(Class)
-        class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "OutputFormat"
-        format = Hadoop::Mapreduce::Lib::Output.const_get(class_name)
+    def output(dir=nil, options={})
+      if dir
+        if dir.is_a?(Hash)
+          options = dir
+          if options[:intermediate]
+            dir = @job.job_name
+          else
+            raise ArgumentError, sprintf('neither dir nor intermediate: true was specified')
+          end
+        end
+        dir = sprintf('%s-%010d-%05d', dir, Time.now, rand(1e5)) if options[:intermediate]
+        @output_dir = dir
+        format = options.fetch(:format, :text)
+        unless format.is_a?(Class)
+          class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "OutputFormat"
+          format = Hadoop::Mapreduce::Lib::Output.const_get(class_name)
+        end
+        format.set_output_path(@job, Hadoop::Fs::Path.new(@output_dir))
+        @job.set_output_format_class(format)
       end
-      format.set_output_path(@job, Hadoop::Fs::Path.new(dir))
-      @job.set_output_format_class(format)
+      @output_dir
     end
     # Sets a job property.
@@ -161,8 +186,8 @@ module Rubydoop
     # @param [Class] cls The (Ruby) mapper class.
     def mapper(cls=nil)
       if cls
-        @job.configuration.set(MAPPER_KEY, cls.name)
-        @job.set_mapper_class(@context.proxy_class(:mapper))
+        @job.configuration.set(Rubydoop::MapperProxy::RUBY_CLASS_KEY, cls.name)
+        @job.set_mapper_class(Rubydoop::MapperProxy)
         @mapper = cls
       end
       @mapper
@@ -187,8 +212,8 @@ module Rubydoop
     # @param [Class] cls The (Ruby) reducer class.
     def reducer(cls=nil)
       if cls
-        @job.configuration.set(REDUCER_KEY, cls.name)
-        @job.set_reducer_class(@context.proxy_class(:reducer))
+        @job.configuration.set(Rubydoop::ReducerProxy::RUBY_CLASS_KEY, cls.name)
+        @job.set_reducer_class(Rubydoop::ReducerProxy)
         @reducer = cls
       end
       @reducer
@@ -208,8 +233,8 @@ module Rubydoop
     # @param [Class] cls The (Ruby) combiner class.
     def combiner(cls=nil)
       if cls
-        @job.configuration.set(COMBINER_KEY, cls.name)
-        @job.set_combiner_class(@context.proxy_class(:combiner))
+        @job.configuration.set(Rubydoop::CombinerProxy::RUBY_CLASS_KEY, cls.name)
+        @job.set_combiner_class(Rubydoop::CombinerProxy)
         @combiner = cls
       end
       @combiner
@@ -230,8 +255,8 @@ module Rubydoop
     # @param [Class] cls The (Ruby) partitioner class.
     def partitioner(cls=nil)
       if cls
-        @job.configuration.set(PARTITIONER_KEY, cls.name)
-        @job.set_partitioner_class(@context.proxy_class(:partitioner))
+        @job.configuration.set(Rubydoop::PartitionerProxy::RUBY_CLASS_KEY, cls.name)
+        @job.set_partitioner_class(Rubydoop::PartitionerProxy)
         @partitioner = cls
       end
       @partitioner
@@ -249,8 +274,8 @@ module Rubydoop
     # @param [Class] cls The (Ruby) comparator class.
     def grouping_comparator(cls=nil)
       if cls
-        @job.configuration.set(GROUPING_COMPARATOR_KEY, cls.name)
-        @job.set_grouping_comparator_class(@context.proxy_class(:grouping_comparator))
+        @job.configuration.set(Rubydoop::GroupingComparatorProxy::RUBY_CLASS_KEY, cls.name)
+        @job.set_grouping_comparator_class(Rubydoop::GroupingComparatorProxy)
         @grouping_comparator = cls
       end
       @grouping_comparator
@@ -268,8 +293,8 @@ module Rubydoop
     # @param [Class] cls The (Ruby) comparator class.
     def sort_comparator(cls=nil)
       if cls
-        @job.configuration.set(SORT_COMPARATOR_KEY, cls.name)
-        @job.set_sort_comparator_class(@context.proxy_class(:sort_comparator))
+        @job.configuration.set(Rubydoop::SortComparatorProxy::RUBY_CLASS_KEY, cls.name)
+        @job.set_sort_comparator_class(Rubydoop::SortComparatorProxy)
         @sort_comparator = cls
       end
       @sort_comparator
@@ -292,13 +317,13 @@ module Rubydoop
     def self.class_setter(dsl_name)
       define_method(dsl_name) do |cls|
         if cls
-          @job.send("set_#{dsl_name}_class", cls.java_class)
+          @job.send("set_#{dsl_name}_class", cls.to_java(Java::JavaLang::Class))
           instance_variable_set(:"@#{dsl_name}", cls)
         end
         instance_variable_get(:"@#{dsl_name}")
       end
       define_method("#{dsl_name}=") do |cls|
-        @job.send("set_#{dsl_name}_class", cls.java_class)
+        @job.send("set_#{dsl_name}_class", cls.to_java(Java::JavaLang::Class))
       end
     end
@@ -343,23 +368,77 @@ module Rubydoop
   # @private
   class Context
-    attr_reader :jobs, :arguments
+    attr_reader :arguments
-    def initialize(conf, proxy_classes, arguments)
+    def initialize(conf, arguments)
       @conf = conf
-      @proxy_classes = proxy_classes
       @arguments = arguments.to_a
-      @jobs = []
+      @job_stack = [Jobs::Sequence.new]
     end
     def create_job(name)
       hadoop_job = Hadoop::Mapreduce::Job.new(@conf, name)
-      @jobs << hadoop_job
+      @job_stack.last.add(hadoop_job)
       hadoop_job
     end
-    def proxy_class(type)
-      @proxy_classes[type.to_s]
+    def wait_for_completion(verbose)
+      @job_stack.first.wait_for_completion(verbose)
+    end
+    def parallel
+      push(Jobs::Parallel.new)
+      if block_given?
+        yield
+        pop
+      end
+    end
+    def sequence
+      push(Jobs::Sequence.new)
+      if block_given?
+        yield
+        pop
+      end
+    end
+    def push(job_list)
+      @job_stack.last.add(job_list)
+      @job_stack.push(job_list)
+    end
+    def pop
+      @job_stack.pop
+    end
+    class Jobs
+      attr_reader :jobs
+      def initialize
+        @jobs = []
+      end
+      def add(job)
+        @jobs.push(job)
+      end
+      class Sequence < Jobs
+        def wait_for_completion(verbose)
+          @jobs.all? do |job|
+            job.wait_for_completion(verbose)
+          end
+        end
+      end
+      class Parallel < Jobs
+        def wait_for_completion(verbose)
+          @jobs.map do |job|
+            Thread.new do
+              job.wait_for_completion(verbose)
+            end
+          end.map!(&:value).all?
+        end
+      end
     end
   end
 end

data/lib/rubydoop/version.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 module Rubydoop
   # @private
-  VERSION = '1.1.3'
+  VERSION = '1.2.0'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rubydoop
 version: !ruby/object:Gem::Version
-  version: 1.1.3
+  version: 1.2.0
 platform: ruby
 authors:
 - Theo Hultberg
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-02-25 00:00:00.000000000 Z
+date: 2015-06-12 00:00:00.000000000 Z
 dependencies: []
 description: Rubydoop embeds a JRuby runtime in Hadoop, letting you write map reduce code in Ruby without using the streaming APIs
 email:
@@ -42,7 +42,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project: rubydoop
-rubygems_version: 2.2.2
+rubygems_version: 2.4.6
 signing_key:
 specification_version: 4
 summary: Write Hadoop jobs in Ruby