rubydoop 1.0.0-java → 2.0.0.pre1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0e1f088d9982e3f4cfa2083e3c9817e96a9f586f
4
+ data.tar.gz: b909d0ee95492428d0b2a29418fe84cb67fa8adb
5
+ SHA512:
6
+ metadata.gz: 2f49a006f7d5ed891af12f3e1da488e375bc4730da15f8488e920a433fbf45ba9d4d3a3a318e8eacac3fc726b524402ce266abc970bd9b18e37474f0068986ba
7
+ data.tar.gz: 4e740a1ad1a7294f98e0710ec57160b3dce03c1d3b179805d6ce18c3a5040de82e4fa06b175f83a9531b2e98631752dea6717d4513a4804d48a331ebec500c16
Binary file
@@ -1,58 +1,14 @@
1
1
  # encoding: utf-8
2
2
 
3
- $LOAD_PATH << File.expand_path('..', __FILE__)
4
-
5
-
6
3
  require 'hadoop'
4
+ require 'rubydoop.jar'
7
5
 
8
-
9
- # See {Rubydoop.configure} for the job configuration DSL documentation,
10
- # {Package} for the packaging documentation, or the {file:README.md README}
6
+ # See {Rubydoop.run} for the job configuration DSL documentation,
7
+ # {Package} for the packaging documentation, or the {file:README.md README}
11
8
  # for a getting started guide.
12
9
  module Rubydoop
13
- # @private
14
- def self.create_mapper(conf)
15
- create_instance(conf.get(MAPPER_KEY))
16
- end
17
-
18
- # @private
19
- def self.create_reducer(conf)
20
- create_instance(conf.get(REDUCER_KEY))
21
- end
22
-
23
- # @private
24
- def self.create_combiner(conf)
25
- create_instance(conf.get(COMBINER_KEY))
26
- end
27
-
28
- # @private
29
- def self.create_partitioner(conf)
30
- create_instance(conf.get(PARTITIONER_KEY))
31
- end
32
-
33
- # @private
34
- def self.create_grouping_comparator(conf)
35
- create_instance(conf.get(GROUPING_COMPARATOR_KEY))
36
- end
37
-
38
- # @private
39
- def self.create_sort_comparator(conf)
40
- create_instance(conf.get(SORT_COMPARATOR_KEY))
41
- end
42
-
43
- private
44
-
45
- MAPPER_KEY = 'rubydoop.mapper'.freeze
46
- REDUCER_KEY = 'rubydoop.reducer'.freeze
47
- COMBINER_KEY = 'rubydoop.combiner'.freeze
48
- PARTITIONER_KEY = 'rubydoop.partitioner'.freeze
49
- GROUPING_COMPARATOR_KEY = 'rubydoop.grouping_comparator'.freeze
50
- SORT_COMPARATOR_KEY = 'rubydoop.sort_comparator'.freeze
51
-
52
- def self.create_instance(const_path)
53
- cls = const_path.split('::').reduce(Object) { |host, name| host.const_get(name) }
54
- cls.new
55
- end
10
+ include_package 'rubydoop'
56
11
  end
57
12
 
58
13
  require 'rubydoop/dsl'
14
+ require 'rubydoop/job_runner'
@@ -3,9 +3,9 @@
3
3
  module Rubydoop
4
4
  # Main entrypoint into the configuration DSL.
5
5
  #
6
- # @example Configuring a job
6
+ # @example Running a job
7
7
  #
8
- # Rubydoop.configure do |*args|
8
+ # Rubydoop.run do |*args|
9
9
  # job 'word_count' do
10
10
  # input args[0]
11
11
  # output args[1]
@@ -18,67 +18,69 @@ module Rubydoop
18
18
  # end
19
19
  # end
20
20
  #
21
- # Within a configure block you can specify one or more jobs, the `job`
22
- # blocks are run in the context of a {JobDefinition} instance, so look
23
- # at that class for documentation about the available properties. The
24
- # `configure` block is run within the context of a {ConfigurationDefinition}
25
- # instance. The arguments to the `configure` block is the command line
26
- # arguments, minus those handled by Hadoop's `ToolRunner`.
21
+ # Within a run block you can specify one or more jobs, the `job` blocks
22
+ # are run in the context of a {JobDefinition} instance, so look at that
23
+ # class for documentation about the available properties. The `run` block
24
+ # is run within the context of a {ConfigurationDefinition} instance. The
25
+ # arguments to the `run` block is the command line arguments, minus those
26
+ # handled by Hadoop's `ToolRunner`.
27
27
  #
28
28
  # @yieldparam [Array<String>] *arguments The command line arguments
29
29
  #
30
- # @note The tool runner will set the global variable `$rubydoop_context`
31
- # to an object that contains references to the necessary Hadoop
32
- # configuration. Unless this global variable is set the configuration
33
- # block is not run (this is a feature, it means that the configuration
34
- # block doesn't run in mappers and reducers).
35
- #
36
- def self.configure(impl=ConfigurationDefinition, &block)
37
- impl.new($rubydoop_context, &block) if $rubydoop_context
30
+ def self.run(args=ARGV, &block)
31
+ return if $rubydoop_embedded
32
+ JobRunner.run(args, &block)
38
33
  end
39
34
 
40
- # Lower level API for configuring jobs.
41
- #
42
- # @example Configuring a job
35
+ # @ see {Rubydoop.run}
36
+ def self.configure(&block)
37
+ run(&block)
38
+ end
39
+
40
+ # Configuration DSL.
43
41
  #
44
- # cc = ConfigurationDefinition.new
45
- # cc.job 'word_count' do
46
- # # same DSL as shown in the documentation for Rubydoop.configure
47
- # end
42
+ # `Rubydoop.run` blocks are run within the context of an instance of this
43
+ # class. These are the methods available in those blocks.
48
44
  #
49
45
  class ConfigurationDefinition
50
- def initialize(context=$rubydoop_context, &block)
46
+ # @private
47
+ def initialize(context)
51
48
  @context = context
52
- instance_exec(*arguments, &block) if @context && block_given?
53
- end
54
-
55
- def arguments
56
- @context.arguments
57
49
  end
58
50
 
59
51
  def job(name, &block)
60
- return nil unless @context
61
- job = JobDefinition.new(@context, @context.create_job(name))
52
+ job = JobDefinition.new(@context.create_job(name))
62
53
  job.instance_exec(&block)
63
54
  job
64
55
  end
56
+
57
+ def parallel(&block)
58
+ @context.parallel(&block)
59
+ end
60
+
61
+ def sequence(&block)
62
+ @context.sequence(&block)
63
+ end
64
+
65
+ def wait_for_completion(verbose)
66
+ @context.wait_for_completion(verbose)
67
+ end
65
68
  end
66
69
 
67
70
  # Job configuration DSL.
68
71
  #
69
- # `Rubydoop.configure` blocks are run within the context of an instance of
70
- # this class. These are the methods available in those blocks.
72
+ # `job` blocks are run within the context of an instance of this
73
+ # class. These are the methods available in those blocks.
71
74
  #
72
75
  class JobDefinition
73
76
  # @private
74
- def initialize(context, job)
75
- @context = context
77
+ def initialize(job)
76
78
  @job = job
77
79
  end
78
80
 
79
81
  # Sets the input paths of the job.
80
82
  #
81
- # Calls `setInputFormatClass` on the Hadoop job and uses the static
83
+ # Calls `setInputFormatClass` on the Hadoop job and uses the static
82
84
  # `setInputPaths` on the input format to set the job's input path.
83
85
  #
84
86
  # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setInputFormatClass(java.lang.Class) Hadoop's Job#setInputFormatClass
@@ -89,12 +91,20 @@ module Rubydoop
89
91
  # @option options [JavaClass] :format The input format to use, defaults to `TextInputFormat`
90
92
  def input(paths, options={})
91
93
  paths = paths.join(',') if paths.is_a?(Enumerable)
92
- format = options[:format] || Hadoop::Mapreduce::Lib::Input::TextInputFormat
94
+ format = options.fetch(:format, :text)
95
+ unless format.is_a?(Class)
96
+ class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "InputFormat"
97
+ format = Hadoop::Mapreduce::Lib::Input.const_get(class_name)
98
+ end
99
+ unless format <= Hadoop::Mapreduce::InputFormat
100
+ @job.configuration.set(Rubydoop::InputFormatProxy::RUBY_CLASS_KEY, format.name)
101
+ format = Rubydoop::InputFormatProxy
102
+ end
93
103
  format.set_input_paths(@job, paths)
94
104
  @job.set_input_format_class(format)
95
105
  end
96
106
 
97
- # Sets the output path of the job.
107
+ # Sets or gets the output path of the job.
98
108
  #
99
109
  # Calls `setOutputFormatClass` on the Hadoop job and uses the static
100
110
  # `setOutputPath` on the output format to set the job's output path.
@@ -104,15 +114,35 @@ module Rubydoop
104
114
  # @param [String] dir The output path
105
115
  # @param [Hash] options
106
116
  # @option options [JavaClass] :format The output format to use, defaults to `TextOutputFormat`
107
- def output(dir, options={})
108
- format = options[:format] || Hadoop::Mapreduce::Lib::Output::TextOutputFormat
109
- format.set_output_path(@job, Hadoop::Fs::Path.new(dir))
110
- @job.set_output_format_class(format)
117
+ def output(dir=nil, options={})
118
+ if dir
119
+ if dir.is_a?(Hash)
120
+ options = dir
121
+ if options[:intermediate]
122
+ dir = @job.job_name
123
+ else
124
+ raise ArgumentError, sprintf('neither dir nor intermediate: true was specified')
125
+ end
126
+ end
127
+ dir = sprintf('%s-%010d-%05d', dir, Time.now, rand(1e5)) if options[:intermediate]
128
+ @output_dir = dir
129
+ format = options.fetch(:format, :text)
130
+ unless format.is_a?(Class)
131
+ class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "OutputFormat"
132
+ format = Hadoop::Mapreduce::Lib::Output.const_get(class_name)
133
+ end
134
+ format.set_output_path(@job, Hadoop::Fs::Path.new(@output_dir))
135
+ @job.set_output_format_class(format)
136
+ if options[:lazy]
137
+ Hadoop::Mapreduce::Lib::Output::LazyOutputFormat.set_output_format_class(@job, format)
138
+ end
139
+ end
140
+ @output_dir
111
141
  end
112
142
 
113
143
  # Sets a job property.
114
144
  #
115
- # Calls `set`/`setBoolean`/`setLong`/`setFloat` on the Hadoop Job's
145
+ # Calls `set`/`setBoolean`/`setLong`/`setFloat` on the Hadoop Job's
116
146
  # configuration (exact method depends on the type of the value).
117
147
  #
118
148
  # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String,%20java.lang.String) Hadoop's Configuration#set
@@ -144,7 +174,7 @@ module Rubydoop
144
174
  # The class only needs to implement the method `map`, which will be called
145
175
  # exactly like a Java mapper class' `map` method would be called.
146
176
  #
147
- # You can optionally implement `setup` and `cleanup`, which mirrors the
177
+ # You can optionally implement `setup` and `cleanup`, which mirrors the
148
178
  # methods of the same name in Java mappers.
149
179
  #
150
180
  # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Mapper.html Hadoop's Mapper
@@ -153,8 +183,8 @@ module Rubydoop
153
183
  # @param [Class] cls The (Ruby) mapper class.
154
184
  def mapper(cls=nil)
155
185
  if cls
156
- @job.configuration.set(MAPPER_KEY, cls.name)
157
- @job.set_mapper_class(@context.proxy_class(:mapper))
186
+ @job.configuration.set(Rubydoop::MapperProxy::RUBY_CLASS_KEY, cls.name)
187
+ @job.set_mapper_class(Rubydoop::MapperProxy)
158
188
  @mapper = cls
159
189
  end
160
190
  @mapper
@@ -170,7 +200,7 @@ module Rubydoop
170
200
  # The class only needs to implement the method `reduce`, which will be called
171
201
  # exactly like a Java reducer class' `reduce` method would be called.
172
202
  #
173
- # You can optionally implement `setup` and `cleanup`, which mirrors the
203
+ # You can optionally implement `setup` and `cleanup`, which mirrors the
174
204
  # methods of the same name in Java reducers.
175
205
  #
176
206
  # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Reducer.html Hadoop's Reducer
@@ -179,8 +209,8 @@ module Rubydoop
179
209
  # @param [Class] cls The (Ruby) reducer class.
180
210
  def reducer(cls=nil)
181
211
  if cls
182
- @job.configuration.set(REDUCER_KEY, cls.name)
183
- @job.set_reducer_class(@context.proxy_class(:reducer))
212
+ @job.configuration.set(Rubydoop::ReducerProxy::RUBY_CLASS_KEY, cls.name)
213
+ @job.set_reducer_class(Rubydoop::ReducerProxy)
184
214
  @reducer = cls
185
215
  end
186
216
  @reducer
@@ -200,8 +230,8 @@ module Rubydoop
200
230
  # @param [Class] cls The (Ruby) combiner class.
201
231
  def combiner(cls=nil)
202
232
  if cls
203
- @job.configuration.set(COMBINER_KEY, cls.name)
204
- @job.set_combiner_class(@context.proxy_class(:combiner))
233
+ @job.configuration.set(Rubydoop::CombinerProxy::RUBY_CLASS_KEY, cls.name)
234
+ @job.set_combiner_class(Rubydoop::CombinerProxy)
205
235
  @combiner = cls
206
236
  end
207
237
  @combiner
@@ -222,8 +252,8 @@ module Rubydoop
222
252
  # @param [Class] cls The (Ruby) partitioner class.
223
253
  def partitioner(cls=nil)
224
254
  if cls
225
- @job.configuration.set(PARTITIONER_KEY, cls.name)
226
- @job.set_partitioner_class(@context.proxy_class(:partitioner))
255
+ @job.configuration.set(Rubydoop::PartitionerProxy::RUBY_CLASS_KEY, cls.name)
256
+ @job.set_partitioner_class(Rubydoop::PartitionerProxy)
227
257
  @partitioner = cls
228
258
  end
229
259
  @partitioner
@@ -232,7 +262,7 @@ module Rubydoop
232
262
 
233
263
  # Sets a custom grouping comparator.
234
264
  #
235
- # The equivalent of calling `setGroupingComparatorClass` on a Hadoop job,
265
+ # The equivalent of calling `setGroupingComparatorClass` on a Hadoop job,
236
266
  # but instead of a Java class you pass a Ruby class and Rubydoop will wrap
237
267
  # it in a way that works with Hadoop.
238
268
  #
@@ -241,8 +271,8 @@ module Rubydoop
241
271
  # @param [Class] cls The (Ruby) comparator class.
242
272
  def grouping_comparator(cls=nil)
243
273
  if cls
244
- @job.configuration.set(GROUPING_COMPARATOR_KEY, cls.name)
245
- @job.set_grouping_comparator_class(@context.proxy_class(:grouping_comparator))
274
+ @job.configuration.set(Rubydoop::GroupingComparatorProxy::RUBY_CLASS_KEY, cls.name)
275
+ @job.set_grouping_comparator_class(Rubydoop::GroupingComparatorProxy)
246
276
  @grouping_comparator = cls
247
277
  end
248
278
  @grouping_comparator
@@ -251,7 +281,7 @@ module Rubydoop
251
281
 
252
282
  # Sets a custom sort comparator.
253
283
  #
254
- # The equivalent of calling `setSortComparatorClass` on a Hadoop job,
284
+ # The equivalent of calling `setSortComparatorClass` on a Hadoop job,
255
285
  # but instead of a Java class you pass a Ruby class and Rubydoop will wrap
256
286
  # it in a way that works with Hadoop.
257
287
  #
@@ -260,8 +290,8 @@ module Rubydoop
260
290
  # @param [Class] cls The (Ruby) comparator class.
261
291
  def sort_comparator(cls=nil)
262
292
  if cls
263
- @job.configuration.set(SORT_COMPARATOR_KEY, cls.name)
264
- @job.set_sort_comparator_class(@context.proxy_class(:sort_comparator))
293
+ @job.configuration.set(Rubydoop::SortComparatorProxy::RUBY_CLASS_KEY, cls.name)
294
+ @job.set_sort_comparator_class(Rubydoop::SortComparatorProxy)
265
295
  @sort_comparator = cls
266
296
  end
267
297
  @sort_comparator
@@ -284,13 +314,13 @@ module Rubydoop
284
314
  def self.class_setter(dsl_name)
285
315
  define_method(dsl_name) do |cls|
286
316
  if cls
287
- @job.send("set_#{dsl_name}_class", cls.java_class)
317
+ @job.send("set_#{dsl_name}_class", cls.to_java(Java::JavaLang::Class))
288
318
  instance_variable_set(:"@#{dsl_name}", cls)
289
319
  end
290
320
  instance_variable_get(:"@#{dsl_name}")
291
321
  end
292
322
  define_method("#{dsl_name}=") do |cls|
293
- @job.send("set_#{dsl_name}_class", cls.java_class)
323
+ @job.send("set_#{dsl_name}_class", cls.to_java(Java::JavaLang::Class))
294
324
  end
295
325
  end
296
326
 
@@ -317,7 +347,7 @@ module Rubydoop
317
347
  # @!method output_key(cls)
318
348
  #
319
349
  # Sets the reducer's output key type.
320
- #
350
+ #
321
351
  # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setOutputKeyClass(java.lang.Class) Hadoop's Job#setOutputKeyClass
322
352
  #
323
353
  # @param [Class] cls The reducer's output key type
@@ -335,23 +365,74 @@ module Rubydoop
335
365
 
336
366
  # @private
337
367
  class Context
338
- attr_reader :jobs, :arguments
339
-
340
- def initialize(conf, proxy_classes, arguments)
368
+ def initialize(conf)
341
369
  @conf = conf
342
- @proxy_classes = proxy_classes
343
- @arguments = arguments
344
- @jobs = []
370
+ @job_stack = [Jobs::Sequence.new]
345
371
  end
346
372
 
347
373
  def create_job(name)
348
374
  hadoop_job = Hadoop::Mapreduce::Job.new(@conf, name)
349
- @jobs << hadoop_job
375
+ @job_stack.last.add(hadoop_job)
350
376
  hadoop_job
351
377
  end
352
378
 
353
- def proxy_class(type)
354
- @proxy_classes[type]
379
+ def wait_for_completion(verbose)
380
+ @job_stack.first.wait_for_completion(verbose)
381
+ end
382
+
383
+ def parallel
384
+ push(Jobs::Parallel.new)
385
+ if block_given?
386
+ yield
387
+ pop
388
+ end
389
+ end
390
+
391
+ def sequence
392
+ push(Jobs::Sequence.new)
393
+ if block_given?
394
+ yield
395
+ pop
396
+ end
397
+ end
398
+
399
+ def push(job_list)
400
+ @job_stack.last.add(job_list)
401
+ @job_stack.push(job_list)
402
+ end
403
+
404
+ def pop
405
+ @job_stack.pop
406
+ end
407
+
408
+ class Jobs
409
+ attr_reader :jobs
410
+
411
+ def initialize
412
+ @jobs = []
413
+ end
414
+
415
+ def add(job)
416
+ @jobs.push(job)
417
+ end
418
+
419
+ class Sequence < Jobs
420
+ def wait_for_completion(verbose)
421
+ @jobs.all? do |job|
422
+ job.wait_for_completion(verbose)
423
+ end
424
+ end
425
+ end
426
+
427
+ class Parallel < Jobs
428
+ def wait_for_completion(verbose)
429
+ @jobs.map do |job|
430
+ Thread.new do
431
+ job.wait_for_completion(verbose)
432
+ end
433
+ end.map!(&:value).all?
434
+ end
435
+ end
355
436
  end
356
437
  end
357
438
  end
@@ -0,0 +1,50 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module Rubydoop
5
+ # @private
6
+ class JobRunner < Java::OrgApacheHadoopConf::Configured
7
+ include Java::OrgApacheHadoopUtil::Tool
8
+
9
+ def initialize(setup_script=$0, &block)
10
+ @setup_script = setup_script
11
+ @block = block
12
+ end
13
+
14
+ def run(args)
15
+ conf = Java::OrgApacheHadoopMapred::JobConf.new(get_conf)
16
+ conf.set(Java::Rubydoop::InstanceContainer::JOB_SETUP_SCRIPT_KEY, File.basename(@setup_script))
17
+ conf.jar = containing_jar
18
+ context = Context.new(conf)
19
+ configuration_definition = ConfigurationDefinition.new(context)
20
+ begin
21
+ configuration_definition.instance_exec(*args, &@block)
22
+ rescue => e
23
+ raise JobRunnerError, sprintf('Could not load job setup script (%s): %s', @setup_script.inspect, e.message.inspect), e.backtrace
24
+ end
25
+ configuration_definition.wait_for_completion(true) ? 0 : 1
26
+ end
27
+
28
+ def self.run(args, &block)
29
+ Java::JavaLang::System.exit(Java::OrgApacheHadoopUtil::ToolRunner.run(new(&block), args.to_java(:string)))
30
+ end
31
+
32
+ private
33
+
34
+ def containing_jar
35
+ @containing_jar ||= begin
36
+ relative_setup_script = @setup_script[/(?<=#{PUCK_ROOT}).+\Z/]
37
+ class_loader = JRuby.runtime.jruby_class_loader
38
+ if (url = class_loader.get_resources(relative_setup_script).find { |url| url.protocol == 'jar' })
39
+ path = url.path
40
+ path.slice!(/\Afile:/)
41
+ path = Java::JavaNet::URLDecoder.decode(path, 'UTF-8')
42
+ path.slice!(/!.*\Z/)
43
+ path
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ JobRunnerError = Class.new(StandardError)
50
+ end
@@ -1,11 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require 'bundler'
4
- require 'open-uri'
5
- require 'ant'
6
- require 'fileutils'
7
- require 'set'
8
-
4
+ require 'puck'
9
5
 
10
6
  module Rubydoop
11
7
  # Utility for making a job JAR that works with Hadoop.
@@ -25,15 +21,14 @@ module Rubydoop
25
21
  # @option options [String] :project_base_dir The project's base dir, defaults to the current directory (the assumption is that Package will be used from a Rake task)
26
22
  # @option options [String] :project_name The name of the JAR file (minus .jar), defaults to the directory name of the `:project_base_dir`
27
23
  # @option options [String] :build_dir The directory to put the final JAR into, defaults to `:project_base_dir + '/build'`
24
+ # @option options [String] :jruby_jar_path The path to a local copy of `jruby-complete.jar`, unless specified you need to have `jruby-jars` in your `Gemfile`
28
25
  # @option options [Array<String>] :gem_groups All gems from these Gemfile groups will be included, defaults to `[:default]` (the top-level group of a Gemfile)
29
26
  # @option options [Array<String>] :lib_jars Paths to extra JAR files to include in the JAR's lib directory (where they will be on the classpath when the job is run)
30
- # @option options [String] :jruby_version The JRuby version to package, defaults to `JRUBY_VERSION`
31
- # @option options [String] :jruby_jar_path The path to a local copy of `jruby-complete.jar`, defaults to downloading and caching a version defined by `:jruby_version`
32
27
  def initialize(options={})
33
28
  @options = default_options.merge(options)
34
- @options[:project_name] = File.basename(@options[:project_base_dir]) unless @options[:project_name]
35
- @options[:build_dir] = File.join(@options[:project_base_dir], 'build') unless @options[:build_dir]
36
- @options[:jruby_jar_path] = File.join(@options[:build_dir], "jruby-complete-#{@options[:jruby_version]}.jar") unless @options[:jruby_jar_path]
29
+ @options[:project_name] ||= File.basename(@options[:project_base_dir])
30
+ @options[:build_dir] ||= File.join(@options[:project_base_dir], 'build')
31
+ @options[:jar_path] ||= "#{@options[:project_name]}.jar"
37
32
  end
38
33
 
39
34
  # Create the JAR package, see {Package#initialize} for configuration options.
@@ -42,9 +37,15 @@ module Rubydoop
42
37
  # (`jruby-complete.jar`) and locally cached, but if you already have a
43
38
  # copy in a local Ivy or Maven repository that will be used instead.
44
39
  def create!
45
- create_directories!
46
- fetch_jruby!
47
- build_jar!
40
+ Puck::Jar.new(
41
+ app_dir: @options[:project_base_dir],
42
+ app_name: @options[:project_name],
43
+ build_dir: @options[:build_dir],
44
+ jar_name: @options[:jar_path],
45
+ gem_groups: @options[:gem_groups],
46
+ extra_files: lib_jars,
47
+ jruby_complete: @options[:jruby_jar_path]
48
+ ).create
48
49
  end
49
50
 
50
51
  # A shortcut for `Package.new(options).create!`.
@@ -52,11 +53,25 @@ module Rubydoop
52
53
  new(options).create!
53
54
  end
54
55
 
56
+ def respond_to?(name)
57
+ @options.key?(name) or super
58
+ end
59
+
60
+ def method_missing(name, *args)
61
+ @options[name] or super
62
+ end
63
+
64
+ def lib_jars
65
+ extra_files = { File.join(rubydoop_base_dir, 'lib/rubydoop.jar') => 'lib/rubydoop.jar' }
66
+ @options[:lib_jars].each_with_object(extra_files) do |jar, extra_files|
67
+ extra_files[jar] = File.join('lib', File.basename(jar))
68
+ end
69
+ end
70
+
55
71
  private
56
72
 
57
73
  def default_options
58
74
  defaults = {
59
- :main_class => 'rubydoop.RubydoopJobRunner',
60
75
  :rubydoop_base_dir => File.expand_path('../../..', __FILE__),
61
76
  :project_base_dir => Dir.getwd,
62
77
  :gem_groups => [:default],
@@ -64,60 +79,5 @@ module Rubydoop
64
79
  :jruby_version => JRUBY_VERSION
65
80
  }
66
81
  end
67
-
68
- def create_directories!
69
- FileUtils.mkdir_p(@options[:build_dir])
70
- end
71
-
72
- def fetch_jruby!
73
- return if File.exists?(@options[:jruby_jar_path])
74
-
75
- local_maven_path = File.expand_path("~/.m2/repository/org/jruby/jruby-complete/#{@options[:jruby_version]}/jruby-complete-#{@options[:jruby_version]}.jar")
76
- local_ivy_path = File.expand_path("~/.ivy2/cache/org.jruby/jruby-complete/jars/jruby-complete-#{@options[:jruby_version]}.jar")
77
- remote_maven_url = "http://central.maven.org/maven2/org/jruby/jruby-complete/#{@options[:jruby_version]}/jruby-complete-#{@options[:jruby_version]}.jar"
78
-
79
- if File.exists?(local_maven_path)
80
- $stderr.puts("Using #{File.basename(local_maven_path)} from local Maven cache")
81
- @options[:jruby_jar_path] = local_maven_path
82
- elsif File.exists?(local_ivy_path)
83
- $stderr.puts("Using #{File.basename(local_maven_path)} from local Ivy2 cache")
84
- @options[:jruby_jar_path] = local_ivy_path
85
- else
86
- $stderr.puts("Downloading #{remote_maven_url} to #{@options[:jruby_jar_path]}")
87
- jruby_complete_bytes = open(remote_maven_url).read
88
- File.open(@options[:jruby_jar_path], 'wb') do |io|
89
- io.write(jruby_complete_bytes)
90
- end
91
- end
92
- end
93
-
94
- def build_jar!
95
- # the ant block is instance_exec'ed so instance variables and methods are not in scope
96
- options = @options
97
- bundled_gems = load_path
98
- lib_jars = [options[:jruby_jar_path], *options[:lib_jars]]
99
- ant do
100
- jar :destfile => "#{options[:build_dir]}/#{options[:project_name]}.jar" do
101
- manifest { attribute :name => 'Main-Class', :value => options[:main_class] }
102
- zipfileset :src => "#{options[:rubydoop_base_dir]}/lib/rubydoop.jar"
103
- fileset :dir => "#{options[:rubydoop_base_dir]}/lib", :includes => '**/*.rb', :excludes => '*.jar'
104
- fileset :dir => "#{options[:project_base_dir]}/lib"
105
- bundled_gems.each { |path| fileset :dir => path }
106
- lib_jars.each { |extra_jar| zipfileset :dir => File.dirname(extra_jar), :includes => File.basename(extra_jar), :prefix => 'lib' }
107
- end
108
- end
109
- end
110
-
111
- def load_path
112
- Bundler.definition.specs_for(@options[:gem_groups]).flat_map do |spec|
113
- if spec.full_name !~ /^(?:bundler|rubydoop)-\d+/
114
- spec.require_paths.map do |rp|
115
- "#{spec.full_gem_path}/#{rp}"
116
- end
117
- else
118
- []
119
- end
120
- end
121
- end
122
82
  end
123
83
  end
@@ -1,4 +1,4 @@
1
1
  module Rubydoop
2
2
  # @private
3
- VERSION = '1.0.0'
4
- end
3
+ VERSION = '2.0.0.pre1'
4
+ end
metadata CHANGED
@@ -1,16 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubydoop
3
3
  version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 1.0.0
4
+ version: 2.0.0.pre1
6
5
  platform: java
7
6
  authors:
8
7
  - Theo Hultberg
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-10-01 00:00:00.000000000Z
13
- dependencies: []
11
+ date: 2016-01-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ~>
17
+ - !ruby/object:Gem::Version
18
+ version: '1.2'
19
+ name: puck
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.2'
14
27
  description: Rubydoop embeds a JRuby runtime in Hadoop, letting you write map reduce code in Ruby without using the streaming APIs
15
28
  email:
16
29
  - theo@iconara.net
@@ -19,35 +32,35 @@ extensions: []
19
32
  extra_rdoc_files: []
20
33
  files:
21
34
  - lib/hadoop.rb
35
+ - lib/rubydoop.jar
22
36
  - lib/rubydoop.rb
23
37
  - lib/rubydoop/dsl.rb
38
+ - lib/rubydoop/job_runner.rb
24
39
  - lib/rubydoop/package.rb
25
40
  - lib/rubydoop/version.rb
26
- - lib/rubydoop.jar
27
41
  homepage: http://github.com/iconara/rubydoop
28
- licenses: []
42
+ licenses:
43
+ - Apache License 2.0
44
+ metadata: {}
29
45
  post_install_message:
30
46
  rdoc_options: []
31
47
  require_paths:
32
48
  - lib
33
49
  required_ruby_version: !ruby/object:Gem::Requirement
34
50
  requirements:
35
- - - ! '>='
51
+ - - '>='
36
52
  - !ruby/object:Gem::Version
37
53
  version: '0'
38
- none: false
39
54
  required_rubygems_version: !ruby/object:Gem::Requirement
40
55
  requirements:
41
- - - ! '>='
56
+ - - '>'
42
57
  - !ruby/object:Gem::Version
43
- version: '0'
44
- none: false
58
+ version: 1.3.1
45
59
  requirements: []
46
60
  rubyforge_project: rubydoop
47
- rubygems_version: 1.8.15
61
+ rubygems_version: 2.4.5
48
62
  signing_key:
49
- specification_version: 3
63
+ specification_version: 4
50
64
  summary: Write Hadoop jobs in Ruby
51
65
  test_files: []
52
66
  has_rdoc:
53
- ...