RubyGems - rubydoop - Versions diffs - 1.0.0-java → 2.0.0.pre1-java - Mend

rubydoop 1.0.0-java → 2.0.0.pre1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 0e1f088d9982e3f4cfa2083e3c9817e96a9f586f
+  data.tar.gz: b909d0ee95492428d0b2a29418fe84cb67fa8adb
+SHA512:
+  metadata.gz: 2f49a006f7d5ed891af12f3e1da488e375bc4730da15f8488e920a433fbf45ba9d4d3a3a318e8eacac3fc726b524402ce266abc970bd9b18e37474f0068986ba
+  data.tar.gz: 4e740a1ad1a7294f98e0710ec57160b3dce03c1d3b179805d6ce18c3a5040de82e4fa06b175f83a9531b2e98631752dea6717d4513a4804d48a331ebec500c16

data/lib/rubydoop.jar CHANGED

Binary file

data/lib/rubydoop.rb CHANGED

@@ -1,58 +1,14 @@
 # encoding: utf-8
-$LOAD_PATH << File.expand_path('..', __FILE__)
 require 'hadoop'
+require 'rubydoop.jar'
-# See {Rubydoop.configure} for the job configuration DSL documentation,
-# {Package} for the packaging documentation, or the {file:README.md README}
+# See {Rubydoop.run} for the job configuration DSL documentation,
+# {Package} for the packaging documentation, or the {file:README.md README}
 # for a getting started guide.
 module Rubydoop
-  # @private
-  def self.create_mapper(conf)
-    create_instance(conf.get(MAPPER_KEY))
-  end
-  # @private
-  def self.create_reducer(conf)
-    create_instance(conf.get(REDUCER_KEY))
-  end
-  # @private
-  def self.create_combiner(conf)
-    create_instance(conf.get(COMBINER_KEY))
-  end
-  # @private
-  def self.create_partitioner(conf)
-    create_instance(conf.get(PARTITIONER_KEY))
-  end
-  # @private
-  def self.create_grouping_comparator(conf)
-    create_instance(conf.get(GROUPING_COMPARATOR_KEY))
-  end
-  # @private
-  def self.create_sort_comparator(conf)
-    create_instance(conf.get(SORT_COMPARATOR_KEY))
-  end
-  private
-  MAPPER_KEY = 'rubydoop.mapper'.freeze
-  REDUCER_KEY = 'rubydoop.reducer'.freeze
-  COMBINER_KEY = 'rubydoop.combiner'.freeze
-  PARTITIONER_KEY = 'rubydoop.partitioner'.freeze
-  GROUPING_COMPARATOR_KEY = 'rubydoop.grouping_comparator'.freeze
-  SORT_COMPARATOR_KEY = 'rubydoop.sort_comparator'.freeze
-  def self.create_instance(const_path)
-    cls = const_path.split('::').reduce(Object) { |host, name| host.const_get(name) }
-    cls.new
-  end
+  include_package 'rubydoop'
 end
 require 'rubydoop/dsl'
+require 'rubydoop/job_runner'

data/lib/rubydoop/dsl.rb CHANGED

@@ -3,9 +3,9 @@
 module Rubydoop
   # Main entrypoint into the configuration DSL.
   #
-  # @example Configuring a job
+  # @example Running a job
   #
-  #   Rubydoop.configure do |*args|
+  #   Rubydoop.run do |*args|
   #     job 'word_count' do
   #       input args[0]
   #       output args[1]
@@ -18,67 +18,69 @@ module Rubydoop
   #     end
   #   end
   #
-  # Within a configure block you can specify one or more jobs, the `job`
-  # blocks are run in the context of a {JobDefinition} instance, so look
-  # at that class for documentation about the available properties. The
-  # `configure` block is run within the context of a {ConfigurationDefinition}
-  # instance. The arguments to the `configure` block is the command line
-  # arguments, minus those handled by Hadoop's `ToolRunner`.
+  # Within a run block you can specify one or more jobs, the `job` blocks
+  # are run in the context of a {JobDefinition} instance, so look at that
+  # class for documentation about the available properties. The `run` block
+  # is run within the context of a {ConfigurationDefinition} instance. The
+  # arguments to the `run` block is the command line arguments, minus those
+  # handled by Hadoop's `ToolRunner`.
   #
   # @yieldparam [Array<String>] *arguments The command line arguments
   #
-  # @note The tool runner will set the global variable `$rubydoop_context`
-  #   to an object that contains references to the necessary Hadoop
-  #   configuration. Unless this global variable is set the configuration
-  #   block is not run (this is a feature, it means that the configuration
-  #   block doesn't run in mappers and reducers).
-  #
-  def self.configure(impl=ConfigurationDefinition, &block)
-    impl.new($rubydoop_context, &block) if $rubydoop_context
+  def self.run(args=ARGV, &block)
+    return if $rubydoop_embedded
+    JobRunner.run(args, &block)
   end
-  # Lower level API for configuring jobs.
-  #
-  # @example Configuring a job
+  # @ see {Rubydoop.run}
+  def self.configure(&block)
+    run(&block)
+  end
+  # Configuration DSL.
   #
-  #     cc = ConfigurationDefinition.new
-  #     cc.job 'word_count' do
-  #       # same DSL as shown in the documentation for Rubydoop.configure
-  #     end
+  # `Rubydoop.run` blocks are run within the context of an instance of this
+  # class. These are the methods available in those blocks.
   #
   class ConfigurationDefinition
-    def initialize(context=$rubydoop_context, &block)
+    # @private
+    def initialize(context)
       @context = context
-      instance_exec(*arguments, &block) if @context && block_given?
-    end
-    def arguments
-      @context.arguments
     end
     def job(name, &block)
-      return nil unless @context
-      job = JobDefinition.new(@context, @context.create_job(name))
+      job = JobDefinition.new(@context.create_job(name))
       job.instance_exec(&block)
       job
     end
+    def parallel(&block)
+      @context.parallel(&block)
+    end
+    def sequence(&block)
+      @context.sequence(&block)
+    end
+    def wait_for_completion(verbose)
+      @context.wait_for_completion(verbose)
+    end
   end
   # Job configuration DSL.
   #
-  # `Rubydoop.configure` blocks are run within the context of an instance of
-  # this class. These are the methods available in those blocks.
+  # `job` blocks are run within the context of an instance of this
+  # class. These are the methods available in those blocks.
   #
   class JobDefinition
     # @private
-    def initialize(context, job)
-      @context = context
+    def initialize(job)
       @job = job
     end
     # Sets the input paths of the job.
     #
-    # Calls `setInputFormatClass` on the Hadoop job and uses the static
+    # Calls `setInputFormatClass` on the Hadoop job and uses the static
     # `setInputPaths` on the input format to set the job's input path.
     #
     # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setInputFormatClass(java.lang.Class) Hadoop's Job#setInputFormatClass
@@ -89,12 +91,20 @@ module Rubydoop
     # @option options [JavaClass] :format The input format to use, defaults to `TextInputFormat`
     def input(paths, options={})
       paths = paths.join(',') if paths.is_a?(Enumerable)
-      format = options[:format] || Hadoop::Mapreduce::Lib::Input::TextInputFormat
+      format = options.fetch(:format, :text)
+      unless format.is_a?(Class)
+        class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "InputFormat"
+        format = Hadoop::Mapreduce::Lib::Input.const_get(class_name)
+      end
+      unless format <= Hadoop::Mapreduce::InputFormat
+        @job.configuration.set(Rubydoop::InputFormatProxy::RUBY_CLASS_KEY, format.name)
+        format = Rubydoop::InputFormatProxy
+      end
       format.set_input_paths(@job, paths)
       @job.set_input_format_class(format)
     end
-    # Sets the output path of the job.
+    # Sets or gets the output path of the job.
     #
     # Calls `setOutputFormatClass` on the Hadoop job and uses the static
     # `setOutputPath` on the output format to set the job's output path.
@@ -104,15 +114,35 @@ module Rubydoop
     # @param [String] dir The output path
     # @param [Hash] options
     # @option options [JavaClass] :format The output format to use, defaults to `TextOutputFormat`
-    def output(dir, options={})
-      format = options[:format] || Hadoop::Mapreduce::Lib::Output::TextOutputFormat
-      format.set_output_path(@job, Hadoop::Fs::Path.new(dir))
-      @job.set_output_format_class(format)
+    def output(dir=nil, options={})
+      if dir
+        if dir.is_a?(Hash)
+          options = dir
+          if options[:intermediate]
+            dir = @job.job_name
+          else
+            raise ArgumentError, sprintf('neither dir nor intermediate: true was specified')
+          end
+        end
+        dir = sprintf('%s-%010d-%05d', dir, Time.now, rand(1e5)) if options[:intermediate]
+        @output_dir = dir
+        format = options.fetch(:format, :text)
+        unless format.is_a?(Class)
+          class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "OutputFormat"
+          format = Hadoop::Mapreduce::Lib::Output.const_get(class_name)
+        end
+        format.set_output_path(@job, Hadoop::Fs::Path.new(@output_dir))
+        @job.set_output_format_class(format)
+        if options[:lazy]
+          Hadoop::Mapreduce::Lib::Output::LazyOutputFormat.set_output_format_class(@job, format)
+        end
+      end
+      @output_dir
     end
     # Sets a job property.
     #
-    # Calls `set`/`setBoolean`/`setLong`/`setFloat` on the Hadoop Job's
+    # Calls `set`/`setBoolean`/`setLong`/`setFloat` on the Hadoop Job's
     # configuration (exact method depends on the type of the value).
     #
     # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String,%20java.lang.String) Hadoop's Configuration#set
@@ -144,7 +174,7 @@ module Rubydoop
     # The class only needs to implement the method `map`, which will be called
     # exactly like a Java mapper class' `map` method would be called.
     #
-    # You can optionally implement `setup` and `cleanup`, which mirrors the
+    # You can optionally implement `setup` and `cleanup`, which mirrors the
     # methods of the same name in Java mappers.
     #
     # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Mapper.html Hadoop's Mapper
@@ -153,8 +183,8 @@ module Rubydoop
     # @param [Class] cls The (Ruby) mapper class.
     def mapper(cls=nil)
       if cls
-        @job.configuration.set(MAPPER_KEY, cls.name)
-        @job.set_mapper_class(@context.proxy_class(:mapper))
+        @job.configuration.set(Rubydoop::MapperProxy::RUBY_CLASS_KEY, cls.name)
+        @job.set_mapper_class(Rubydoop::MapperProxy)
         @mapper = cls
       end
       @mapper
@@ -170,7 +200,7 @@ module Rubydoop
     # The class only needs to implement the method `reduce`, which will be called
     # exactly like a Java reducer class' `reduce` method would be called.
     #
-    # You can optionally implement `setup` and `cleanup`, which mirrors the
+    # You can optionally implement `setup` and `cleanup`, which mirrors the
     # methods of the same name in Java reducers.
     #
     # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Reducer.html Hadoop's Reducer
@@ -179,8 +209,8 @@ module Rubydoop
     # @param [Class] cls The (Ruby) reducer class.
     def reducer(cls=nil)
       if cls
-        @job.configuration.set(REDUCER_KEY, cls.name)
-        @job.set_reducer_class(@context.proxy_class(:reducer))
+        @job.configuration.set(Rubydoop::ReducerProxy::RUBY_CLASS_KEY, cls.name)
+        @job.set_reducer_class(Rubydoop::ReducerProxy)
         @reducer = cls
       end
       @reducer
@@ -200,8 +230,8 @@ module Rubydoop
     # @param [Class] cls The (Ruby) combiner class.
     def combiner(cls=nil)
       if cls
-        @job.configuration.set(COMBINER_KEY, cls.name)
-        @job.set_combiner_class(@context.proxy_class(:combiner))
+        @job.configuration.set(Rubydoop::CombinerProxy::RUBY_CLASS_KEY, cls.name)
+        @job.set_combiner_class(Rubydoop::CombinerProxy)
         @combiner = cls
       end
       @combiner
@@ -222,8 +252,8 @@ module Rubydoop
     # @param [Class] cls The (Ruby) partitioner class.
     def partitioner(cls=nil)
       if cls
-        @job.configuration.set(PARTITIONER_KEY, cls.name)
-        @job.set_partitioner_class(@context.proxy_class(:partitioner))
+        @job.configuration.set(Rubydoop::PartitionerProxy::RUBY_CLASS_KEY, cls.name)
+        @job.set_partitioner_class(Rubydoop::PartitionerProxy)
         @partitioner = cls
       end
       @partitioner
@@ -232,7 +262,7 @@ module Rubydoop
     # Sets a custom grouping comparator.
     #
-    # The equivalent of calling `setGroupingComparatorClass` on a Hadoop job,
+    # The equivalent of calling `setGroupingComparatorClass` on a Hadoop job,
     # but instead of a Java class you pass a Ruby class and Rubydoop will wrap
     # it in a way that works with Hadoop.
     #
@@ -241,8 +271,8 @@ module Rubydoop
     # @param [Class] cls The (Ruby) comparator class.
     def grouping_comparator(cls=nil)
       if cls
-        @job.configuration.set(GROUPING_COMPARATOR_KEY, cls.name)
-        @job.set_grouping_comparator_class(@context.proxy_class(:grouping_comparator))
+        @job.configuration.set(Rubydoop::GroupingComparatorProxy::RUBY_CLASS_KEY, cls.name)
+        @job.set_grouping_comparator_class(Rubydoop::GroupingComparatorProxy)
         @grouping_comparator = cls
       end
       @grouping_comparator
@@ -251,7 +281,7 @@ module Rubydoop
     # Sets a custom sort comparator.
     #
-    # The equivalent of calling `setSortComparatorClass` on a Hadoop job,
+    # The equivalent of calling `setSortComparatorClass` on a Hadoop job,
     # but instead of a Java class you pass a Ruby class and Rubydoop will wrap
     # it in a way that works with Hadoop.
     #
@@ -260,8 +290,8 @@ module Rubydoop
     # @param [Class] cls The (Ruby) comparator class.
     def sort_comparator(cls=nil)
       if cls
-        @job.configuration.set(SORT_COMPARATOR_KEY, cls.name)
-        @job.set_sort_comparator_class(@context.proxy_class(:sort_comparator))
+        @job.configuration.set(Rubydoop::SortComparatorProxy::RUBY_CLASS_KEY, cls.name)
+        @job.set_sort_comparator_class(Rubydoop::SortComparatorProxy)
         @sort_comparator = cls
       end
       @sort_comparator
@@ -284,13 +314,13 @@ module Rubydoop
     def self.class_setter(dsl_name)
       define_method(dsl_name) do |cls|
         if cls
-          @job.send("set_#{dsl_name}_class", cls.java_class)
+          @job.send("set_#{dsl_name}_class", cls.to_java(Java::JavaLang::Class))
           instance_variable_set(:"@#{dsl_name}", cls)
         end
         instance_variable_get(:"@#{dsl_name}")
       end
       define_method("#{dsl_name}=") do |cls|
-        @job.send("set_#{dsl_name}_class", cls.java_class)
+        @job.send("set_#{dsl_name}_class", cls.to_java(Java::JavaLang::Class))
       end
     end
@@ -317,7 +347,7 @@ module Rubydoop
     # @!method output_key(cls)
     #
     # Sets the reducer's output key type.
-    #
+    #
     # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setOutputKeyClass(java.lang.Class) Hadoop's Job#setOutputKeyClass
     #
     # @param [Class] cls The reducer's output key type
@@ -335,23 +365,74 @@ module Rubydoop
   # @private
   class Context
-    attr_reader :jobs, :arguments
-    def initialize(conf, proxy_classes, arguments)
+    def initialize(conf)
       @conf = conf
-      @proxy_classes = proxy_classes
-      @arguments = arguments
-      @jobs = []
+      @job_stack = [Jobs::Sequence.new]
     end
     def create_job(name)
       hadoop_job = Hadoop::Mapreduce::Job.new(@conf, name)
-      @jobs << hadoop_job
+      @job_stack.last.add(hadoop_job)
       hadoop_job
     end
-    def proxy_class(type)
-      @proxy_classes[type]
+    def wait_for_completion(verbose)
+      @job_stack.first.wait_for_completion(verbose)
+    end
+    def parallel
+      push(Jobs::Parallel.new)
+      if block_given?
+        yield
+        pop
+      end
+    end
+    def sequence
+      push(Jobs::Sequence.new)
+      if block_given?
+        yield
+        pop
+      end
+    end
+    def push(job_list)
+      @job_stack.last.add(job_list)
+      @job_stack.push(job_list)
+    end
+    def pop
+      @job_stack.pop
+    end
+    class Jobs
+      attr_reader :jobs
+      def initialize
+        @jobs = []
+      end
+      def add(job)
+        @jobs.push(job)
+      end
+      class Sequence < Jobs
+        def wait_for_completion(verbose)
+          @jobs.all? do |job|
+            job.wait_for_completion(verbose)
+          end
+        end
+      end
+      class Parallel < Jobs
+        def wait_for_completion(verbose)
+          @jobs.map do |job|
+            Thread.new do
+              job.wait_for_completion(verbose)
+            end
+          end.map!(&:value).all?
+        end
+      end
     end
   end
 end

data/lib/rubydoop/job_runner.rb ADDED

@@ -0,0 +1,50 @@
+# encoding: utf-8
+module Rubydoop
+  # @private
+  class JobRunner < Java::OrgApacheHadoopConf::Configured
+    include Java::OrgApacheHadoopUtil::Tool
+    def initialize(setup_script=$0, &block)
+      @setup_script = setup_script
+      @block = block
+    end
+    def run(args)
+      conf = Java::OrgApacheHadoopMapred::JobConf.new(get_conf)
+      conf.set(Java::Rubydoop::InstanceContainer::JOB_SETUP_SCRIPT_KEY, File.basename(@setup_script))
+      conf.jar = containing_jar
+      context = Context.new(conf)
+      configuration_definition = ConfigurationDefinition.new(context)
+      begin
+        configuration_definition.instance_exec(*args, &@block)
+      rescue => e
+        raise JobRunnerError, sprintf('Could not load job setup script (%s): %s', @setup_script.inspect, e.message.inspect), e.backtrace
+      end
+      configuration_definition.wait_for_completion(true) ? 0 : 1
+    end
+    def self.run(args, &block)
+      Java::JavaLang::System.exit(Java::OrgApacheHadoopUtil::ToolRunner.run(new(&block), args.to_java(:string)))
+    end
+    private
+    def containing_jar
+      @containing_jar ||= begin
+        relative_setup_script = @setup_script[/(?<=#{PUCK_ROOT}).+\Z/]
+        class_loader = JRuby.runtime.jruby_class_loader
+        if (url = class_loader.get_resources(relative_setup_script).find { |url| url.protocol == 'jar' })
+          path = url.path
+          path.slice!(/\Afile:/)
+          path = Java::JavaNet::URLDecoder.decode(path, 'UTF-8')
+          path.slice!(/!.*\Z/)
+          path
+        end
+      end
+    end
+  end
+  JobRunnerError = Class.new(StandardError)
+end

data/lib/rubydoop/package.rb CHANGED

@@ -1,11 +1,7 @@
 # encoding: utf-8
 require 'bundler'
-require 'open-uri'
-require 'ant'
-require 'fileutils'
-require 'set'
+require 'puck'
 module Rubydoop
   # Utility for making a job JAR that works with Hadoop.
@@ -25,15 +21,14 @@ module Rubydoop
     # @option options [String]        :project_base_dir The project's base dir, defaults to the current directory (the assumption is that Package will be used from a Rake task)
     # @option options [String]        :project_name     The name of the JAR file (minus .jar), defaults to the directory name of the `:project_base_dir`
     # @option options [String]        :build_dir        The directory to put the final JAR into, defaults to `:project_base_dir + '/build'`
+    # @option options [String]        :jruby_jar_path   The path to a local copy of `jruby-complete.jar`, unless specified you need to have `jruby-jars` in your `Gemfile`
     # @option options [Array<String>] :gem_groups       All gems from these Gemfile groups will be included, defaults to `[:default]` (the top-level group of a Gemfile)
     # @option options [Array<String>] :lib_jars         Paths to extra JAR files to include in the JAR's lib directory (where they will be on the classpath when the job is run)
-    # @option options [String]        :jruby_version    The JRuby version to package, defaults to `JRUBY_VERSION`
-    # @option options [String]        :jruby_jar_path   The path to a local copy of `jruby-complete.jar`, defaults to downloading and caching a version defined by `:jruby_version`
     def initialize(options={})
       @options = default_options.merge(options)
-      @options[:project_name] = File.basename(@options[:project_base_dir]) unless @options[:project_name]
-      @options[:build_dir] = File.join(@options[:project_base_dir], 'build') unless @options[:build_dir]
-      @options[:jruby_jar_path] = File.join(@options[:build_dir], "jruby-complete-#{@options[:jruby_version]}.jar") unless @options[:jruby_jar_path]
+      @options[:project_name] ||= File.basename(@options[:project_base_dir])
+      @options[:build_dir] ||= File.join(@options[:project_base_dir], 'build')
+      @options[:jar_path] ||= "#{@options[:project_name]}.jar"
     end
     # Create the JAR package, see {Package#initialize} for configuration options.
@@ -42,9 +37,15 @@ module Rubydoop
     # (`jruby-complete.jar`) and locally cached, but if you already have a
     # copy in a local Ivy or Maven repository that will be used instead.
     def create!
-      create_directories!
-      fetch_jruby!
-      build_jar!
+      Puck::Jar.new(
+        app_dir: @options[:project_base_dir],
+        app_name: @options[:project_name],
+        build_dir: @options[:build_dir],
+        jar_name: @options[:jar_path],
+        gem_groups: @options[:gem_groups],
+        extra_files: lib_jars,
+        jruby_complete: @options[:jruby_jar_path]
+      ).create
     end
     # A shortcut for `Package.new(options).create!`.
@@ -52,11 +53,25 @@ module Rubydoop
       new(options).create!
     end
+    def respond_to?(name)
+      @options.key?(name) or super
+    end
+    def method_missing(name, *args)
+      @options[name] or super
+    end
+    def lib_jars
+      extra_files = { File.join(rubydoop_base_dir, 'lib/rubydoop.jar') => 'lib/rubydoop.jar' }
+      @options[:lib_jars].each_with_object(extra_files) do |jar, extra_files|
+        extra_files[jar] = File.join('lib', File.basename(jar))
+      end
+    end
     private
     def default_options
       defaults = {
-        :main_class => 'rubydoop.RubydoopJobRunner',
         :rubydoop_base_dir => File.expand_path('../../..', __FILE__),
         :project_base_dir => Dir.getwd,
         :gem_groups => [:default],
@@ -64,60 +79,5 @@ module Rubydoop
         :jruby_version => JRUBY_VERSION
       }
     end
-    def create_directories!
-      FileUtils.mkdir_p(@options[:build_dir])
-    end
-    def fetch_jruby!
-      return if File.exists?(@options[:jruby_jar_path])
-      local_maven_path = File.expand_path("~/.m2/repository/org/jruby/jruby-complete/#{@options[:jruby_version]}/jruby-complete-#{@options[:jruby_version]}.jar")
-      local_ivy_path = File.expand_path("~/.ivy2/cache/org.jruby/jruby-complete/jars/jruby-complete-#{@options[:jruby_version]}.jar")
-      remote_maven_url = "http://central.maven.org/maven2/org/jruby/jruby-complete/#{@options[:jruby_version]}/jruby-complete-#{@options[:jruby_version]}.jar"
-      if File.exists?(local_maven_path)
-        $stderr.puts("Using #{File.basename(local_maven_path)} from local Maven cache")
-        @options[:jruby_jar_path] = local_maven_path
-      elsif File.exists?(local_ivy_path)
-        $stderr.puts("Using #{File.basename(local_maven_path)} from local Ivy2 cache")
-        @options[:jruby_jar_path] = local_ivy_path
-      else
-        $stderr.puts("Downloading #{remote_maven_url} to #{@options[:jruby_jar_path]}")
-        jruby_complete_bytes = open(remote_maven_url).read
-        File.open(@options[:jruby_jar_path], 'wb') do |io|
-          io.write(jruby_complete_bytes)
-        end
-      end
-    end
-    def build_jar!
-      # the ant block is instance_exec'ed so instance variables and methods are not in scope
-      options = @options
-      bundled_gems = load_path
-      lib_jars = [options[:jruby_jar_path], *options[:lib_jars]]
-      ant do
-        jar :destfile => "#{options[:build_dir]}/#{options[:project_name]}.jar" do
-          manifest { attribute :name => 'Main-Class', :value => options[:main_class] }
-          zipfileset :src => "#{options[:rubydoop_base_dir]}/lib/rubydoop.jar"
-          fileset :dir => "#{options[:rubydoop_base_dir]}/lib", :includes => '**/*.rb', :excludes => '*.jar'
-          fileset :dir => "#{options[:project_base_dir]}/lib"
-          bundled_gems.each { |path| fileset :dir => path }
-          lib_jars.each { |extra_jar| zipfileset :dir => File.dirname(extra_jar), :includes => File.basename(extra_jar), :prefix => 'lib' }
-        end
-      end
-    end
-    def load_path
-      Bundler.definition.specs_for(@options[:gem_groups]).flat_map do |spec|
-        if spec.full_name !~ /^(?:bundler|rubydoop)-\d+/
-          spec.require_paths.map do |rp|
-            "#{spec.full_gem_path}/#{rp}"
-          end
-        else
-          []
-        end
-      end
-    end
   end
 end

data/lib/rubydoop/version.rb CHANGED

@@ -1,4 +1,4 @@
 module Rubydoop
   # @private
-  VERSION = '1.0.0'
-end
+  VERSION = '2.0.0.pre1'
+end

metadata CHANGED

@@ -1,16 +1,29 @@
 --- !ruby/object:Gem::Specification
 name: rubydoop
 version: !ruby/object:Gem::Version
-  prerelease:
-  version: 1.0.0
+  version: 2.0.0.pre1
 platform: java
 authors:
 - Theo Hultberg
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-10-01 00:00:00.000000000Z
-dependencies: []
+date: 2016-01-28 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.2'
+  name: puck
+  prerelease: false
+  type: :runtime
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.2'
 description: Rubydoop embeds a JRuby runtime in Hadoop, letting you write map reduce code in Ruby without using the streaming APIs
 email:
 - theo@iconara.net
@@ -19,35 +32,35 @@ extensions: []
 extra_rdoc_files: []
 files:
 - lib/hadoop.rb
+- lib/rubydoop.jar
 - lib/rubydoop.rb
 - lib/rubydoop/dsl.rb
+- lib/rubydoop/job_runner.rb
 - lib/rubydoop/package.rb
 - lib/rubydoop/version.rb
-- lib/rubydoop.jar
 homepage: http://github.com/iconara/rubydoop
-licenses: []
+licenses:
+- Apache License 2.0
+metadata: {}
 post_install_message:
 rdoc_options: []
 require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ! '>='
+  - - '>='
     - !ruby/object:Gem::Version
       version: '0'
-  none: false
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ! '>='
+  - - '>'
     - !ruby/object:Gem::Version
-      version: '0'
-  none: false
+      version: 1.3.1
 requirements: []
 rubyforge_project: rubydoop
-rubygems_version: 1.8.15
+rubygems_version: 2.4.5
 signing_key:
-specification_version: 3
+specification_version: 4
 summary: Write Hadoop jobs in Ruby
 test_files: []
 has_rdoc:
-...