rubydoop 1.1.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f4e36e0505e17eeb71201cd7803446ba6c56ad7a
4
- data.tar.gz: 510fe7c8d2ea01d1218ad65fdd7519e5e7dd6d62
3
+ metadata.gz: 82bf7b52baa55faaa4787d61890c72ad3eb7a9e1
4
+ data.tar.gz: f48b41c4268d3bc970ef942b845bcab414a6714f
5
5
  SHA512:
6
- metadata.gz: 77376833f4e51901ed1a3995fcf9f6d1a3e8f516c3c851331b9c7526c412dbb544cb9525b6bc6a8176374c02d5ee1a122fdeae5fc3ad5fab1b9bd8ec9dd73d52
7
- data.tar.gz: 594b079d9246b12f599f570f445e8734c1201cc35901cb76d167728a62977e51e86a994d1493b10954451f288dde45948cbca8586a17c76ddcd775ae73eb4236
6
+ metadata.gz: 30b7c232ed09dc1425d1798f5828da8f9b109769f5fcc3dd9a1c97a71e35aad3605f49c3a84b3a52b879119df37357e9ee8bb81f6d85ba3f051f82c71b4b4624
7
+ data.tar.gz: 218dc633c5038de6b8964bcfc57c2d8d582032d2a3216e715ac755807aa1585946ceac944417d738376b24b757a441d99f7d9e695b66c8cc606c8f4a46314a0a
data/lib/rubydoop.jar CHANGED
Binary file
data/lib/rubydoop.rb CHANGED
@@ -10,49 +10,7 @@ require 'hadoop'
10
10
  # {Package} for the packaging documentation, or the {file:README.md README}
11
11
  # for a getting started guide.
12
12
  module Rubydoop
13
- # @private
14
- def self.create_mapper(conf)
15
- create_instance(conf.get(MAPPER_KEY))
16
- end
17
-
18
- # @private
19
- def self.create_reducer(conf)
20
- create_instance(conf.get(REDUCER_KEY))
21
- end
22
-
23
- # @private
24
- def self.create_combiner(conf)
25
- create_instance(conf.get(COMBINER_KEY))
26
- end
27
-
28
- # @private
29
- def self.create_partitioner(conf)
30
- create_instance(conf.get(PARTITIONER_KEY))
31
- end
32
-
33
- # @private
34
- def self.create_grouping_comparator(conf)
35
- create_instance(conf.get(GROUPING_COMPARATOR_KEY))
36
- end
37
-
38
- # @private
39
- def self.create_sort_comparator(conf)
40
- create_instance(conf.get(SORT_COMPARATOR_KEY))
41
- end
42
-
43
- private
44
-
45
- MAPPER_KEY = 'rubydoop.mapper'.freeze
46
- REDUCER_KEY = 'rubydoop.reducer'.freeze
47
- COMBINER_KEY = 'rubydoop.combiner'.freeze
48
- PARTITIONER_KEY = 'rubydoop.partitioner'.freeze
49
- GROUPING_COMPARATOR_KEY = 'rubydoop.grouping_comparator'.freeze
50
- SORT_COMPARATOR_KEY = 'rubydoop.sort_comparator'.freeze
51
-
52
- def self.create_instance(const_path)
53
- cls = const_path.split('::').reduce(Object) { |host, name| host.const_get(name) }
54
- cls.new
55
- end
13
+ include_package 'rubydoop'
56
14
  end
57
15
 
58
16
  require 'rubydoop/dsl'
data/lib/rubydoop/dsl.rb CHANGED
@@ -62,6 +62,14 @@ module Rubydoop
62
62
  job.instance_exec(&block)
63
63
  job
64
64
  end
65
+
66
+ def parallel(&block)
67
+ @context.parallel(&block)
68
+ end
69
+
70
+ def sequence(&block)
71
+ @context.sequence(&block)
72
+ end
65
73
  end
66
74
 
67
75
  # Job configuration DSL.
@@ -94,11 +102,15 @@ module Rubydoop
94
102
  class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "InputFormat"
95
103
  format = Hadoop::Mapreduce::Lib::Input.const_get(class_name)
96
104
  end
105
+ unless format <= Hadoop::Mapreduce::InputFormat
106
+ @job.configuration.set(Rubydoop::InputFormatProxy::RUBY_CLASS_KEY, format.name)
107
+ format = Rubydoop::InputFormatProxy
108
+ end
97
109
  format.set_input_paths(@job, paths)
98
110
  @job.set_input_format_class(format)
99
111
  end
100
112
 
101
- # Sets the output path of the job.
113
+ # Sets or gets the output path of the job.
102
114
  #
103
115
  # Calls `setOutputFormatClass` on the Hadoop job and uses the static
104
116
  # `setOutputPath` on the output format to set the job's output path.
@@ -108,14 +120,27 @@ module Rubydoop
108
120
  # @param [String] dir The output path
109
121
  # @param [Hash] options
110
122
  # @option options [JavaClass] :format The output format to use, defaults to `TextOutputFormat`
111
- def output(dir, options={})
112
- format = options.fetch(:format, :text)
113
- unless format.is_a?(Class)
114
- class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "OutputFormat"
115
- format = Hadoop::Mapreduce::Lib::Output.const_get(class_name)
123
+ def output(dir=nil, options={})
124
+ if dir
125
+ if dir.is_a?(Hash)
126
+ options = dir
127
+ if options[:intermediate]
128
+ dir = @job.job_name
129
+ else
130
+ raise ArgumentError, sprintf('neither dir nor intermediate: true was specified')
131
+ end
132
+ end
133
+ dir = sprintf('%s-%010d-%05d', dir, Time.now, rand(1e5)) if options[:intermediate]
134
+ @output_dir = dir
135
+ format = options.fetch(:format, :text)
136
+ unless format.is_a?(Class)
137
+ class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "OutputFormat"
138
+ format = Hadoop::Mapreduce::Lib::Output.const_get(class_name)
139
+ end
140
+ format.set_output_path(@job, Hadoop::Fs::Path.new(@output_dir))
141
+ @job.set_output_format_class(format)
116
142
  end
117
- format.set_output_path(@job, Hadoop::Fs::Path.new(dir))
118
- @job.set_output_format_class(format)
143
+ @output_dir
119
144
  end
120
145
 
121
146
  # Sets a job property.
@@ -161,8 +186,8 @@ module Rubydoop
161
186
  # @param [Class] cls The (Ruby) mapper class.
162
187
  def mapper(cls=nil)
163
188
  if cls
164
- @job.configuration.set(MAPPER_KEY, cls.name)
165
- @job.set_mapper_class(@context.proxy_class(:mapper))
189
+ @job.configuration.set(Rubydoop::MapperProxy::RUBY_CLASS_KEY, cls.name)
190
+ @job.set_mapper_class(Rubydoop::MapperProxy)
166
191
  @mapper = cls
167
192
  end
168
193
  @mapper
@@ -187,8 +212,8 @@ module Rubydoop
187
212
  # @param [Class] cls The (Ruby) reducer class.
188
213
  def reducer(cls=nil)
189
214
  if cls
190
- @job.configuration.set(REDUCER_KEY, cls.name)
191
- @job.set_reducer_class(@context.proxy_class(:reducer))
215
+ @job.configuration.set(Rubydoop::ReducerProxy::RUBY_CLASS_KEY, cls.name)
216
+ @job.set_reducer_class(Rubydoop::ReducerProxy)
192
217
  @reducer = cls
193
218
  end
194
219
  @reducer
@@ -208,8 +233,8 @@ module Rubydoop
208
233
  # @param [Class] cls The (Ruby) combiner class.
209
234
  def combiner(cls=nil)
210
235
  if cls
211
- @job.configuration.set(COMBINER_KEY, cls.name)
212
- @job.set_combiner_class(@context.proxy_class(:combiner))
236
+ @job.configuration.set(Rubydoop::CombinerProxy::RUBY_CLASS_KEY, cls.name)
237
+ @job.set_combiner_class(Rubydoop::CombinerProxy)
213
238
  @combiner = cls
214
239
  end
215
240
  @combiner
@@ -230,8 +255,8 @@ module Rubydoop
230
255
  # @param [Class] cls The (Ruby) partitioner class.
231
256
  def partitioner(cls=nil)
232
257
  if cls
233
- @job.configuration.set(PARTITIONER_KEY, cls.name)
234
- @job.set_partitioner_class(@context.proxy_class(:partitioner))
258
+ @job.configuration.set(Rubydoop::PartitionerProxy::RUBY_CLASS_KEY, cls.name)
259
+ @job.set_partitioner_class(Rubydoop::PartitionerProxy)
235
260
  @partitioner = cls
236
261
  end
237
262
  @partitioner
@@ -249,8 +274,8 @@ module Rubydoop
249
274
  # @param [Class] cls The (Ruby) comparator class.
250
275
  def grouping_comparator(cls=nil)
251
276
  if cls
252
- @job.configuration.set(GROUPING_COMPARATOR_KEY, cls.name)
253
- @job.set_grouping_comparator_class(@context.proxy_class(:grouping_comparator))
277
+ @job.configuration.set(Rubydoop::GroupingComparatorProxy::RUBY_CLASS_KEY, cls.name)
278
+ @job.set_grouping_comparator_class(Rubydoop::GroupingComparatorProxy)
254
279
  @grouping_comparator = cls
255
280
  end
256
281
  @grouping_comparator
@@ -268,8 +293,8 @@ module Rubydoop
268
293
  # @param [Class] cls The (Ruby) comparator class.
269
294
  def sort_comparator(cls=nil)
270
295
  if cls
271
- @job.configuration.set(SORT_COMPARATOR_KEY, cls.name)
272
- @job.set_sort_comparator_class(@context.proxy_class(:sort_comparator))
296
+ @job.configuration.set(Rubydoop::SortComparatorProxy::RUBY_CLASS_KEY, cls.name)
297
+ @job.set_sort_comparator_class(Rubydoop::SortComparatorProxy)
273
298
  @sort_comparator = cls
274
299
  end
275
300
  @sort_comparator
@@ -292,13 +317,13 @@ module Rubydoop
292
317
  def self.class_setter(dsl_name)
293
318
  define_method(dsl_name) do |cls|
294
319
  if cls
295
- @job.send("set_#{dsl_name}_class", cls.java_class)
320
+ @job.send("set_#{dsl_name}_class", cls.to_java(Java::JavaLang::Class))
296
321
  instance_variable_set(:"@#{dsl_name}", cls)
297
322
  end
298
323
  instance_variable_get(:"@#{dsl_name}")
299
324
  end
300
325
  define_method("#{dsl_name}=") do |cls|
301
- @job.send("set_#{dsl_name}_class", cls.java_class)
326
+ @job.send("set_#{dsl_name}_class", cls.to_java(Java::JavaLang::Class))
302
327
  end
303
328
  end
304
329
 
@@ -343,23 +368,77 @@ module Rubydoop
343
368
 
344
369
  # @private
345
370
  class Context
346
- attr_reader :jobs, :arguments
371
+ attr_reader :arguments
347
372
 
348
- def initialize(conf, proxy_classes, arguments)
373
+ def initialize(conf, arguments)
349
374
  @conf = conf
350
- @proxy_classes = proxy_classes
351
375
  @arguments = arguments.to_a
352
- @jobs = []
376
+ @job_stack = [Jobs::Sequence.new]
353
377
  end
354
378
 
355
379
  def create_job(name)
356
380
  hadoop_job = Hadoop::Mapreduce::Job.new(@conf, name)
357
- @jobs << hadoop_job
381
+ @job_stack.last.add(hadoop_job)
358
382
  hadoop_job
359
383
  end
360
384
 
361
- def proxy_class(type)
362
- @proxy_classes[type.to_s]
385
+ def wait_for_completion(verbose)
386
+ @job_stack.first.wait_for_completion(verbose)
387
+ end
388
+
389
+ def parallel
390
+ push(Jobs::Parallel.new)
391
+ if block_given?
392
+ yield
393
+ pop
394
+ end
395
+ end
396
+
397
+ def sequence
398
+ push(Jobs::Sequence.new)
399
+ if block_given?
400
+ yield
401
+ pop
402
+ end
403
+ end
404
+
405
+ def push(job_list)
406
+ @job_stack.last.add(job_list)
407
+ @job_stack.push(job_list)
408
+ end
409
+
410
+ def pop
411
+ @job_stack.pop
412
+ end
413
+
414
+ class Jobs
415
+ attr_reader :jobs
416
+
417
+ def initialize
418
+ @jobs = []
419
+ end
420
+
421
+ def add(job)
422
+ @jobs.push(job)
423
+ end
424
+
425
+ class Sequence < Jobs
426
+ def wait_for_completion(verbose)
427
+ @jobs.all? do |job|
428
+ job.wait_for_completion(verbose)
429
+ end
430
+ end
431
+ end
432
+
433
+ class Parallel < Jobs
434
+ def wait_for_completion(verbose)
435
+ @jobs.map do |job|
436
+ Thread.new do
437
+ job.wait_for_completion(verbose)
438
+ end
439
+ end.map!(&:value).all?
440
+ end
441
+ end
363
442
  end
364
443
  end
365
444
  end
@@ -1,4 +1,4 @@
1
1
  module Rubydoop
2
2
  # @private
3
- VERSION = '1.1.3'
3
+ VERSION = '1.2.0'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubydoop
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.3
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Theo Hultberg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-25 00:00:00.000000000 Z
11
+ date: 2015-06-12 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Rubydoop embeds a JRuby runtime in Hadoop, letting you write map reduce code in Ruby without using the streaming APIs
14
14
  email:
@@ -42,7 +42,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
42
  version: '0'
43
43
  requirements: []
44
44
  rubyforge_project: rubydoop
45
- rubygems_version: 2.2.2
45
+ rubygems_version: 2.4.6
46
46
  signing_key:
47
47
  specification_version: 4
48
48
  summary: Write Hadoop jobs in Ruby