rubydoop 1.0.0-java → 2.0.0.pre1-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0e1f088d9982e3f4cfa2083e3c9817e96a9f586f
4
+ data.tar.gz: b909d0ee95492428d0b2a29418fe84cb67fa8adb
5
+ SHA512:
6
+ metadata.gz: 2f49a006f7d5ed891af12f3e1da488e375bc4730da15f8488e920a433fbf45ba9d4d3a3a318e8eacac3fc726b524402ce266abc970bd9b18e37474f0068986ba
7
+ data.tar.gz: 4e740a1ad1a7294f98e0710ec57160b3dce03c1d3b179805d6ce18c3a5040de82e4fa06b175f83a9531b2e98631752dea6717d4513a4804d48a331ebec500c16
Binary file
@@ -1,58 +1,14 @@
1
1
  # encoding: utf-8
2
2
 
3
- $LOAD_PATH << File.expand_path('..', __FILE__)
4
-
5
-
6
3
  require 'hadoop'
4
+ require 'rubydoop.jar'
7
5
 
8
-
9
- # See {Rubydoop.configure} for the job configuration DSL documentation,
10
- # {Package} for the packaging documentation, or the {file:README.md README}
6
+ # See {Rubydoop.run} for the job configuration DSL documentation,
7
+ # {Package} for the packaging documentation, or the {file:README.md README}
11
8
  # for a getting started guide.
12
9
  module Rubydoop
13
- # @private
14
- def self.create_mapper(conf)
15
- create_instance(conf.get(MAPPER_KEY))
16
- end
17
-
18
- # @private
19
- def self.create_reducer(conf)
20
- create_instance(conf.get(REDUCER_KEY))
21
- end
22
-
23
- # @private
24
- def self.create_combiner(conf)
25
- create_instance(conf.get(COMBINER_KEY))
26
- end
27
-
28
- # @private
29
- def self.create_partitioner(conf)
30
- create_instance(conf.get(PARTITIONER_KEY))
31
- end
32
-
33
- # @private
34
- def self.create_grouping_comparator(conf)
35
- create_instance(conf.get(GROUPING_COMPARATOR_KEY))
36
- end
37
-
38
- # @private
39
- def self.create_sort_comparator(conf)
40
- create_instance(conf.get(SORT_COMPARATOR_KEY))
41
- end
42
-
43
- private
44
-
45
- MAPPER_KEY = 'rubydoop.mapper'.freeze
46
- REDUCER_KEY = 'rubydoop.reducer'.freeze
47
- COMBINER_KEY = 'rubydoop.combiner'.freeze
48
- PARTITIONER_KEY = 'rubydoop.partitioner'.freeze
49
- GROUPING_COMPARATOR_KEY = 'rubydoop.grouping_comparator'.freeze
50
- SORT_COMPARATOR_KEY = 'rubydoop.sort_comparator'.freeze
51
-
52
- def self.create_instance(const_path)
53
- cls = const_path.split('::').reduce(Object) { |host, name| host.const_get(name) }
54
- cls.new
55
- end
10
+ include_package 'rubydoop'
56
11
  end
57
12
 
58
13
  require 'rubydoop/dsl'
14
+ require 'rubydoop/job_runner'
@@ -3,9 +3,9 @@
3
3
  module Rubydoop
4
4
  # Main entrypoint into the configuration DSL.
5
5
  #
6
- # @example Configuring a job
6
+ # @example Running a job
7
7
  #
8
- # Rubydoop.configure do |*args|
8
+ # Rubydoop.run do |*args|
9
9
  # job 'word_count' do
10
10
  # input args[0]
11
11
  # output args[1]
@@ -18,67 +18,69 @@ module Rubydoop
18
18
  # end
19
19
  # end
20
20
  #
21
- # Within a configure block you can specify one or more jobs, the `job`
22
- # blocks are run in the context of a {JobDefinition} instance, so look
23
- # at that class for documentation about the available properties. The
24
- # `configure` block is run within the context of a {ConfigurationDefinition}
25
- # instance. The arguments to the `configure` block is the command line
26
- # arguments, minus those handled by Hadoop's `ToolRunner`.
21
+ # Within a run block you can specify one or more jobs, the `job` blocks
22
+ # are run in the context of a {JobDefinition} instance, so look at that
23
+ # class for documentation about the available properties. The `run` block
24
+ # is run within the context of a {ConfigurationDefinition} instance. The
25
+ # arguments to the `run` block is the command line arguments, minus those
26
+ # handled by Hadoop's `ToolRunner`.
27
27
  #
28
28
  # @yieldparam [Array<String>] *arguments The command line arguments
29
29
  #
30
- # @note The tool runner will set the global variable `$rubydoop_context`
31
- # to an object that contains references to the necessary Hadoop
32
- # configuration. Unless this global variable is set the configuration
33
- # block is not run (this is a feature, it means that the configuration
34
- # block doesn't run in mappers and reducers).
35
- #
36
- def self.configure(impl=ConfigurationDefinition, &block)
37
- impl.new($rubydoop_context, &block) if $rubydoop_context
30
+ def self.run(args=ARGV, &block)
31
+ return if $rubydoop_embedded
32
+ JobRunner.run(args, &block)
38
33
  end
39
34
 
40
- # Lower level API for configuring jobs.
41
- #
42
- # @example Configuring a job
35
+ # @ see {Rubydoop.run}
36
+ def self.configure(&block)
37
+ run(&block)
38
+ end
39
+
40
+ # Configuration DSL.
43
41
  #
44
- # cc = ConfigurationDefinition.new
45
- # cc.job 'word_count' do
46
- # # same DSL as shown in the documentation for Rubydoop.configure
47
- # end
42
+ # `Rubydoop.run` blocks are run within the context of an instance of this
43
+ # class. These are the methods available in those blocks.
48
44
  #
49
45
  class ConfigurationDefinition
50
- def initialize(context=$rubydoop_context, &block)
46
+ # @private
47
+ def initialize(context)
51
48
  @context = context
52
- instance_exec(*arguments, &block) if @context && block_given?
53
- end
54
-
55
- def arguments
56
- @context.arguments
57
49
  end
58
50
 
59
51
  def job(name, &block)
60
- return nil unless @context
61
- job = JobDefinition.new(@context, @context.create_job(name))
52
+ job = JobDefinition.new(@context.create_job(name))
62
53
  job.instance_exec(&block)
63
54
  job
64
55
  end
56
+
57
+ def parallel(&block)
58
+ @context.parallel(&block)
59
+ end
60
+
61
+ def sequence(&block)
62
+ @context.sequence(&block)
63
+ end
64
+
65
+ def wait_for_completion(verbose)
66
+ @context.wait_for_completion(verbose)
67
+ end
65
68
  end
66
69
 
67
70
  # Job configuration DSL.
68
71
  #
69
- # `Rubydoop.configure` blocks are run within the context of an instance of
70
- # this class. These are the methods available in those blocks.
72
+ # `job` blocks are run within the context of an instance of this
73
+ # class. These are the methods available in those blocks.
71
74
  #
72
75
  class JobDefinition
73
76
  # @private
74
- def initialize(context, job)
75
- @context = context
77
+ def initialize(job)
76
78
  @job = job
77
79
  end
78
80
 
79
81
  # Sets the input paths of the job.
80
82
  #
81
- # Calls `setInputFormatClass` on the Hadoop job and uses the static
83
+ # Calls `setInputFormatClass` on the Hadoop job and uses the static
82
84
  # `setInputPaths` on the input format to set the job's input path.
83
85
  #
84
86
  # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setInputFormatClass(java.lang.Class) Hadoop's Job#setInputFormatClass
@@ -89,12 +91,20 @@ module Rubydoop
89
91
  # @option options [JavaClass] :format The input format to use, defaults to `TextInputFormat`
90
92
  def input(paths, options={})
91
93
  paths = paths.join(',') if paths.is_a?(Enumerable)
92
- format = options[:format] || Hadoop::Mapreduce::Lib::Input::TextInputFormat
94
+ format = options.fetch(:format, :text)
95
+ unless format.is_a?(Class)
96
+ class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "InputFormat"
97
+ format = Hadoop::Mapreduce::Lib::Input.const_get(class_name)
98
+ end
99
+ unless format <= Hadoop::Mapreduce::InputFormat
100
+ @job.configuration.set(Rubydoop::InputFormatProxy::RUBY_CLASS_KEY, format.name)
101
+ format = Rubydoop::InputFormatProxy
102
+ end
93
103
  format.set_input_paths(@job, paths)
94
104
  @job.set_input_format_class(format)
95
105
  end
96
106
 
97
- # Sets the output path of the job.
107
+ # Sets or gets the output path of the job.
98
108
  #
99
109
  # Calls `setOutputFormatClass` on the Hadoop job and uses the static
100
110
  # `setOutputPath` on the output format to set the job's output path.
@@ -104,15 +114,35 @@ module Rubydoop
104
114
  # @param [String] dir The output path
105
115
  # @param [Hash] options
106
116
  # @option options [JavaClass] :format The output format to use, defaults to `TextOutputFormat`
107
- def output(dir, options={})
108
- format = options[:format] || Hadoop::Mapreduce::Lib::Output::TextOutputFormat
109
- format.set_output_path(@job, Hadoop::Fs::Path.new(dir))
110
- @job.set_output_format_class(format)
117
+ def output(dir=nil, options={})
118
+ if dir
119
+ if dir.is_a?(Hash)
120
+ options = dir
121
+ if options[:intermediate]
122
+ dir = @job.job_name
123
+ else
124
+ raise ArgumentError, sprintf('neither dir nor intermediate: true was specified')
125
+ end
126
+ end
127
+ dir = sprintf('%s-%010d-%05d', dir, Time.now, rand(1e5)) if options[:intermediate]
128
+ @output_dir = dir
129
+ format = options.fetch(:format, :text)
130
+ unless format.is_a?(Class)
131
+ class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "OutputFormat"
132
+ format = Hadoop::Mapreduce::Lib::Output.const_get(class_name)
133
+ end
134
+ format.set_output_path(@job, Hadoop::Fs::Path.new(@output_dir))
135
+ @job.set_output_format_class(format)
136
+ if options[:lazy]
137
+ Hadoop::Mapreduce::Lib::Output::LazyOutputFormat.set_output_format_class(@job, format)
138
+ end
139
+ end
140
+ @output_dir
111
141
  end
112
142
 
113
143
  # Sets a job property.
114
144
  #
115
- # Calls `set`/`setBoolean`/`setLong`/`setFloat` on the Hadoop Job's
145
+ # Calls `set`/`setBoolean`/`setLong`/`setFloat` on the Hadoop Job's
116
146
  # configuration (exact method depends on the type of the value).
117
147
  #
118
148
  # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String,%20java.lang.String) Hadoop's Configuration#set
@@ -144,7 +174,7 @@ module Rubydoop
144
174
  # The class only needs to implement the method `map`, which will be called
145
175
  # exactly like a Java mapper class' `map` method would be called.
146
176
  #
147
- # You can optionally implement `setup` and `cleanup`, which mirrors the
177
+ # You can optionally implement `setup` and `cleanup`, which mirrors the
148
178
  # methods of the same name in Java mappers.
149
179
  #
150
180
  # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Mapper.html Hadoop's Mapper
@@ -153,8 +183,8 @@ module Rubydoop
153
183
  # @param [Class] cls The (Ruby) mapper class.
154
184
  def mapper(cls=nil)
155
185
  if cls
156
- @job.configuration.set(MAPPER_KEY, cls.name)
157
- @job.set_mapper_class(@context.proxy_class(:mapper))
186
+ @job.configuration.set(Rubydoop::MapperProxy::RUBY_CLASS_KEY, cls.name)
187
+ @job.set_mapper_class(Rubydoop::MapperProxy)
158
188
  @mapper = cls
159
189
  end
160
190
  @mapper
@@ -170,7 +200,7 @@ module Rubydoop
170
200
  # The class only needs to implement the method `reduce`, which will be called
171
201
  # exactly like a Java reducer class' `reduce` method would be called.
172
202
  #
173
- # You can optionally implement `setup` and `cleanup`, which mirrors the
203
+ # You can optionally implement `setup` and `cleanup`, which mirrors the
174
204
  # methods of the same name in Java reducers.
175
205
  #
176
206
  # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Reducer.html Hadoop's Reducer
@@ -179,8 +209,8 @@ module Rubydoop
179
209
  # @param [Class] cls The (Ruby) reducer class.
180
210
  def reducer(cls=nil)
181
211
  if cls
182
- @job.configuration.set(REDUCER_KEY, cls.name)
183
- @job.set_reducer_class(@context.proxy_class(:reducer))
212
+ @job.configuration.set(Rubydoop::ReducerProxy::RUBY_CLASS_KEY, cls.name)
213
+ @job.set_reducer_class(Rubydoop::ReducerProxy)
184
214
  @reducer = cls
185
215
  end
186
216
  @reducer
@@ -200,8 +230,8 @@ module Rubydoop
200
230
  # @param [Class] cls The (Ruby) combiner class.
201
231
  def combiner(cls=nil)
202
232
  if cls
203
- @job.configuration.set(COMBINER_KEY, cls.name)
204
- @job.set_combiner_class(@context.proxy_class(:combiner))
233
+ @job.configuration.set(Rubydoop::CombinerProxy::RUBY_CLASS_KEY, cls.name)
234
+ @job.set_combiner_class(Rubydoop::CombinerProxy)
205
235
  @combiner = cls
206
236
  end
207
237
  @combiner
@@ -222,8 +252,8 @@ module Rubydoop
222
252
  # @param [Class] cls The (Ruby) partitioner class.
223
253
  def partitioner(cls=nil)
224
254
  if cls
225
- @job.configuration.set(PARTITIONER_KEY, cls.name)
226
- @job.set_partitioner_class(@context.proxy_class(:partitioner))
255
+ @job.configuration.set(Rubydoop::PartitionerProxy::RUBY_CLASS_KEY, cls.name)
256
+ @job.set_partitioner_class(Rubydoop::PartitionerProxy)
227
257
  @partitioner = cls
228
258
  end
229
259
  @partitioner
@@ -232,7 +262,7 @@ module Rubydoop
232
262
 
233
263
  # Sets a custom grouping comparator.
234
264
  #
235
- # The equivalent of calling `setGroupingComparatorClass` on a Hadoop job,
265
+ # The equivalent of calling `setGroupingComparatorClass` on a Hadoop job,
236
266
  # but instead of a Java class you pass a Ruby class and Rubydoop will wrap
237
267
  # it in a way that works with Hadoop.
238
268
  #
@@ -241,8 +271,8 @@ module Rubydoop
241
271
  # @param [Class] cls The (Ruby) comparator class.
242
272
  def grouping_comparator(cls=nil)
243
273
  if cls
244
- @job.configuration.set(GROUPING_COMPARATOR_KEY, cls.name)
245
- @job.set_grouping_comparator_class(@context.proxy_class(:grouping_comparator))
274
+ @job.configuration.set(Rubydoop::GroupingComparatorProxy::RUBY_CLASS_KEY, cls.name)
275
+ @job.set_grouping_comparator_class(Rubydoop::GroupingComparatorProxy)
246
276
  @grouping_comparator = cls
247
277
  end
248
278
  @grouping_comparator
@@ -251,7 +281,7 @@ module Rubydoop
251
281
 
252
282
  # Sets a custom sort comparator.
253
283
  #
254
- # The equivalent of calling `setSortComparatorClass` on a Hadoop job,
284
+ # The equivalent of calling `setSortComparatorClass` on a Hadoop job,
255
285
  # but instead of a Java class you pass a Ruby class and Rubydoop will wrap
256
286
  # it in a way that works with Hadoop.
257
287
  #
@@ -260,8 +290,8 @@ module Rubydoop
260
290
  # @param [Class] cls The (Ruby) comparator class.
261
291
  def sort_comparator(cls=nil)
262
292
  if cls
263
- @job.configuration.set(SORT_COMPARATOR_KEY, cls.name)
264
- @job.set_sort_comparator_class(@context.proxy_class(:sort_comparator))
293
+ @job.configuration.set(Rubydoop::SortComparatorProxy::RUBY_CLASS_KEY, cls.name)
294
+ @job.set_sort_comparator_class(Rubydoop::SortComparatorProxy)
265
295
  @sort_comparator = cls
266
296
  end
267
297
  @sort_comparator
@@ -284,13 +314,13 @@ module Rubydoop
284
314
  def self.class_setter(dsl_name)
285
315
  define_method(dsl_name) do |cls|
286
316
  if cls
287
- @job.send("set_#{dsl_name}_class", cls.java_class)
317
+ @job.send("set_#{dsl_name}_class", cls.to_java(Java::JavaLang::Class))
288
318
  instance_variable_set(:"@#{dsl_name}", cls)
289
319
  end
290
320
  instance_variable_get(:"@#{dsl_name}")
291
321
  end
292
322
  define_method("#{dsl_name}=") do |cls|
293
- @job.send("set_#{dsl_name}_class", cls.java_class)
323
+ @job.send("set_#{dsl_name}_class", cls.to_java(Java::JavaLang::Class))
294
324
  end
295
325
  end
296
326
 
@@ -317,7 +347,7 @@ module Rubydoop
317
347
  # @!method output_key(cls)
318
348
  #
319
349
  # Sets the reducer's output key type.
320
- #
350
+ #
321
351
  # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setOutputKeyClass(java.lang.Class) Hadoop's Job#setOutputKeyClass
322
352
  #
323
353
  # @param [Class] cls The reducer's output key type
@@ -335,23 +365,74 @@ module Rubydoop
335
365
 
336
366
  # @private
337
367
  class Context
338
- attr_reader :jobs, :arguments
339
-
340
- def initialize(conf, proxy_classes, arguments)
368
+ def initialize(conf)
341
369
  @conf = conf
342
- @proxy_classes = proxy_classes
343
- @arguments = arguments
344
- @jobs = []
370
+ @job_stack = [Jobs::Sequence.new]
345
371
  end
346
372
 
347
373
  def create_job(name)
348
374
  hadoop_job = Hadoop::Mapreduce::Job.new(@conf, name)
349
- @jobs << hadoop_job
375
+ @job_stack.last.add(hadoop_job)
350
376
  hadoop_job
351
377
  end
352
378
 
353
- def proxy_class(type)
354
- @proxy_classes[type]
379
+ def wait_for_completion(verbose)
380
+ @job_stack.first.wait_for_completion(verbose)
381
+ end
382
+
383
+ def parallel
384
+ push(Jobs::Parallel.new)
385
+ if block_given?
386
+ yield
387
+ pop
388
+ end
389
+ end
390
+
391
+ def sequence
392
+ push(Jobs::Sequence.new)
393
+ if block_given?
394
+ yield
395
+ pop
396
+ end
397
+ end
398
+
399
+ def push(job_list)
400
+ @job_stack.last.add(job_list)
401
+ @job_stack.push(job_list)
402
+ end
403
+
404
+ def pop
405
+ @job_stack.pop
406
+ end
407
+
408
+ class Jobs
409
+ attr_reader :jobs
410
+
411
+ def initialize
412
+ @jobs = []
413
+ end
414
+
415
+ def add(job)
416
+ @jobs.push(job)
417
+ end
418
+
419
+ class Sequence < Jobs
420
+ def wait_for_completion(verbose)
421
+ @jobs.all? do |job|
422
+ job.wait_for_completion(verbose)
423
+ end
424
+ end
425
+ end
426
+
427
+ class Parallel < Jobs
428
+ def wait_for_completion(verbose)
429
+ @jobs.map do |job|
430
+ Thread.new do
431
+ job.wait_for_completion(verbose)
432
+ end
433
+ end.map!(&:value).all?
434
+ end
435
+ end
355
436
  end
356
437
  end
357
438
  end
@@ -0,0 +1,50 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module Rubydoop
5
+ # @private
6
+ class JobRunner < Java::OrgApacheHadoopConf::Configured
7
+ include Java::OrgApacheHadoopUtil::Tool
8
+
9
+ def initialize(setup_script=$0, &block)
10
+ @setup_script = setup_script
11
+ @block = block
12
+ end
13
+
14
+ def run(args)
15
+ conf = Java::OrgApacheHadoopMapred::JobConf.new(get_conf)
16
+ conf.set(Java::Rubydoop::InstanceContainer::JOB_SETUP_SCRIPT_KEY, File.basename(@setup_script))
17
+ conf.jar = containing_jar
18
+ context = Context.new(conf)
19
+ configuration_definition = ConfigurationDefinition.new(context)
20
+ begin
21
+ configuration_definition.instance_exec(*args, &@block)
22
+ rescue => e
23
+ raise JobRunnerError, sprintf('Could not load job setup script (%s): %s', @setup_script.inspect, e.message.inspect), e.backtrace
24
+ end
25
+ configuration_definition.wait_for_completion(true) ? 0 : 1
26
+ end
27
+
28
+ def self.run(args, &block)
29
+ Java::JavaLang::System.exit(Java::OrgApacheHadoopUtil::ToolRunner.run(new(&block), args.to_java(:string)))
30
+ end
31
+
32
+ private
33
+
34
+ def containing_jar
35
+ @containing_jar ||= begin
36
+ relative_setup_script = @setup_script[/(?<=#{PUCK_ROOT}).+\Z/]
37
+ class_loader = JRuby.runtime.jruby_class_loader
38
+ if (url = class_loader.get_resources(relative_setup_script).find { |url| url.protocol == 'jar' })
39
+ path = url.path
40
+ path.slice!(/\Afile:/)
41
+ path = Java::JavaNet::URLDecoder.decode(path, 'UTF-8')
42
+ path.slice!(/!.*\Z/)
43
+ path
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ JobRunnerError = Class.new(StandardError)
50
+ end
@@ -1,11 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require 'bundler'
4
- require 'open-uri'
5
- require 'ant'
6
- require 'fileutils'
7
- require 'set'
8
-
4
+ require 'puck'
9
5
 
10
6
  module Rubydoop
11
7
  # Utility for making a job JAR that works with Hadoop.
@@ -25,15 +21,14 @@ module Rubydoop
25
21
  # @option options [String] :project_base_dir The project's base dir, defaults to the current directory (the assumption is that Package will be used from a Rake task)
26
22
  # @option options [String] :project_name The name of the JAR file (minus .jar), defaults to the directory name of the `:project_base_dir`
27
23
  # @option options [String] :build_dir The directory to put the final JAR into, defaults to `:project_base_dir + '/build'`
24
+ # @option options [String] :jruby_jar_path The path to a local copy of `jruby-complete.jar`, unless specified you need to have `jruby-jars` in your `Gemfile`
28
25
  # @option options [Array<String>] :gem_groups All gems from these Gemfile groups will be included, defaults to `[:default]` (the top-level group of a Gemfile)
29
26
  # @option options [Array<String>] :lib_jars Paths to extra JAR files to include in the JAR's lib directory (where they will be on the classpath when the job is run)
30
- # @option options [String] :jruby_version The JRuby version to package, defaults to `JRUBY_VERSION`
31
- # @option options [String] :jruby_jar_path The path to a local copy of `jruby-complete.jar`, defaults to downloading and caching a version defined by `:jruby_version`
32
27
  def initialize(options={})
33
28
  @options = default_options.merge(options)
34
- @options[:project_name] = File.basename(@options[:project_base_dir]) unless @options[:project_name]
35
- @options[:build_dir] = File.join(@options[:project_base_dir], 'build') unless @options[:build_dir]
36
- @options[:jruby_jar_path] = File.join(@options[:build_dir], "jruby-complete-#{@options[:jruby_version]}.jar") unless @options[:jruby_jar_path]
29
+ @options[:project_name] ||= File.basename(@options[:project_base_dir])
30
+ @options[:build_dir] ||= File.join(@options[:project_base_dir], 'build')
31
+ @options[:jar_path] ||= "#{@options[:project_name]}.jar"
37
32
  end
38
33
 
39
34
  # Create the JAR package, see {Package#initialize} for configuration options.
@@ -42,9 +37,15 @@ module Rubydoop
42
37
  # (`jruby-complete.jar`) and locally cached, but if you already have a
43
38
  # copy in a local Ivy or Maven repository that will be used instead.
44
39
  def create!
45
- create_directories!
46
- fetch_jruby!
47
- build_jar!
40
+ Puck::Jar.new(
41
+ app_dir: @options[:project_base_dir],
42
+ app_name: @options[:project_name],
43
+ build_dir: @options[:build_dir],
44
+ jar_name: @options[:jar_path],
45
+ gem_groups: @options[:gem_groups],
46
+ extra_files: lib_jars,
47
+ jruby_complete: @options[:jruby_jar_path]
48
+ ).create
48
49
  end
49
50
 
50
51
  # A shortcut for `Package.new(options).create!`.
@@ -52,11 +53,25 @@ module Rubydoop
52
53
  new(options).create!
53
54
  end
54
55
 
56
+ def respond_to?(name)
57
+ @options.key?(name) or super
58
+ end
59
+
60
+ def method_missing(name, *args)
61
+ @options[name] or super
62
+ end
63
+
64
+ def lib_jars
65
+ extra_files = { File.join(rubydoop_base_dir, 'lib/rubydoop.jar') => 'lib/rubydoop.jar' }
66
+ @options[:lib_jars].each_with_object(extra_files) do |jar, extra_files|
67
+ extra_files[jar] = File.join('lib', File.basename(jar))
68
+ end
69
+ end
70
+
55
71
  private
56
72
 
57
73
  def default_options
58
74
  defaults = {
59
- :main_class => 'rubydoop.RubydoopJobRunner',
60
75
  :rubydoop_base_dir => File.expand_path('../../..', __FILE__),
61
76
  :project_base_dir => Dir.getwd,
62
77
  :gem_groups => [:default],
@@ -64,60 +79,5 @@ module Rubydoop
64
79
  :jruby_version => JRUBY_VERSION
65
80
  }
66
81
  end
67
-
68
- def create_directories!
69
- FileUtils.mkdir_p(@options[:build_dir])
70
- end
71
-
72
- def fetch_jruby!
73
- return if File.exists?(@options[:jruby_jar_path])
74
-
75
- local_maven_path = File.expand_path("~/.m2/repository/org/jruby/jruby-complete/#{@options[:jruby_version]}/jruby-complete-#{@options[:jruby_version]}.jar")
76
- local_ivy_path = File.expand_path("~/.ivy2/cache/org.jruby/jruby-complete/jars/jruby-complete-#{@options[:jruby_version]}.jar")
77
- remote_maven_url = "http://central.maven.org/maven2/org/jruby/jruby-complete/#{@options[:jruby_version]}/jruby-complete-#{@options[:jruby_version]}.jar"
78
-
79
- if File.exists?(local_maven_path)
80
- $stderr.puts("Using #{File.basename(local_maven_path)} from local Maven cache")
81
- @options[:jruby_jar_path] = local_maven_path
82
- elsif File.exists?(local_ivy_path)
83
- $stderr.puts("Using #{File.basename(local_maven_path)} from local Ivy2 cache")
84
- @options[:jruby_jar_path] = local_ivy_path
85
- else
86
- $stderr.puts("Downloading #{remote_maven_url} to #{@options[:jruby_jar_path]}")
87
- jruby_complete_bytes = open(remote_maven_url).read
88
- File.open(@options[:jruby_jar_path], 'wb') do |io|
89
- io.write(jruby_complete_bytes)
90
- end
91
- end
92
- end
93
-
94
- def build_jar!
95
- # the ant block is instance_exec'ed so instance variables and methods are not in scope
96
- options = @options
97
- bundled_gems = load_path
98
- lib_jars = [options[:jruby_jar_path], *options[:lib_jars]]
99
- ant do
100
- jar :destfile => "#{options[:build_dir]}/#{options[:project_name]}.jar" do
101
- manifest { attribute :name => 'Main-Class', :value => options[:main_class] }
102
- zipfileset :src => "#{options[:rubydoop_base_dir]}/lib/rubydoop.jar"
103
- fileset :dir => "#{options[:rubydoop_base_dir]}/lib", :includes => '**/*.rb', :excludes => '*.jar'
104
- fileset :dir => "#{options[:project_base_dir]}/lib"
105
- bundled_gems.each { |path| fileset :dir => path }
106
- lib_jars.each { |extra_jar| zipfileset :dir => File.dirname(extra_jar), :includes => File.basename(extra_jar), :prefix => 'lib' }
107
- end
108
- end
109
- end
110
-
111
- def load_path
112
- Bundler.definition.specs_for(@options[:gem_groups]).flat_map do |spec|
113
- if spec.full_name !~ /^(?:bundler|rubydoop)-\d+/
114
- spec.require_paths.map do |rp|
115
- "#{spec.full_gem_path}/#{rp}"
116
- end
117
- else
118
- []
119
- end
120
- end
121
- end
122
82
  end
123
83
  end
@@ -1,4 +1,4 @@
1
1
  module Rubydoop
2
2
  # @private
3
- VERSION = '1.0.0'
4
- end
3
+ VERSION = '2.0.0.pre1'
4
+ end
metadata CHANGED
@@ -1,16 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubydoop
3
3
  version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 1.0.0
4
+ version: 2.0.0.pre1
6
5
  platform: java
7
6
  authors:
8
7
  - Theo Hultberg
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-10-01 00:00:00.000000000Z
13
- dependencies: []
11
+ date: 2016-01-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ~>
17
+ - !ruby/object:Gem::Version
18
+ version: '1.2'
19
+ name: puck
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.2'
14
27
  description: Rubydoop embeds a JRuby runtime in Hadoop, letting you write map reduce code in Ruby without using the streaming APIs
15
28
  email:
16
29
  - theo@iconara.net
@@ -19,35 +32,35 @@ extensions: []
19
32
  extra_rdoc_files: []
20
33
  files:
21
34
  - lib/hadoop.rb
35
+ - lib/rubydoop.jar
22
36
  - lib/rubydoop.rb
23
37
  - lib/rubydoop/dsl.rb
38
+ - lib/rubydoop/job_runner.rb
24
39
  - lib/rubydoop/package.rb
25
40
  - lib/rubydoop/version.rb
26
- - lib/rubydoop.jar
27
41
  homepage: http://github.com/iconara/rubydoop
28
- licenses: []
42
+ licenses:
43
+ - Apache License 2.0
44
+ metadata: {}
29
45
  post_install_message:
30
46
  rdoc_options: []
31
47
  require_paths:
32
48
  - lib
33
49
  required_ruby_version: !ruby/object:Gem::Requirement
34
50
  requirements:
35
- - - ! '>='
51
+ - - '>='
36
52
  - !ruby/object:Gem::Version
37
53
  version: '0'
38
- none: false
39
54
  required_rubygems_version: !ruby/object:Gem::Requirement
40
55
  requirements:
41
- - - ! '>='
56
+ - - '>'
42
57
  - !ruby/object:Gem::Version
43
- version: '0'
44
- none: false
58
+ version: 1.3.1
45
59
  requirements: []
46
60
  rubyforge_project: rubydoop
47
- rubygems_version: 1.8.15
61
+ rubygems_version: 2.4.5
48
62
  signing_key:
49
- specification_version: 3
63
+ specification_version: 4
50
64
  summary: Write Hadoop jobs in Ruby
51
65
  test_files: []
52
66
  has_rdoc:
53
- ...