rubydoop 1.1.3 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f4e36e0505e17eeb71201cd7803446ba6c56ad7a
4
- data.tar.gz: 510fe7c8d2ea01d1218ad65fdd7519e5e7dd6d62
3
+ metadata.gz: 82bf7b52baa55faaa4787d61890c72ad3eb7a9e1
4
+ data.tar.gz: f48b41c4268d3bc970ef942b845bcab414a6714f
5
5
  SHA512:
6
- metadata.gz: 77376833f4e51901ed1a3995fcf9f6d1a3e8f516c3c851331b9c7526c412dbb544cb9525b6bc6a8176374c02d5ee1a122fdeae5fc3ad5fab1b9bd8ec9dd73d52
7
- data.tar.gz: 594b079d9246b12f599f570f445e8734c1201cc35901cb76d167728a62977e51e86a994d1493b10954451f288dde45948cbca8586a17c76ddcd775ae73eb4236
6
+ metadata.gz: 30b7c232ed09dc1425d1798f5828da8f9b109769f5fcc3dd9a1c97a71e35aad3605f49c3a84b3a52b879119df37357e9ee8bb81f6d85ba3f051f82c71b4b4624
7
+ data.tar.gz: 218dc633c5038de6b8964bcfc57c2d8d582032d2a3216e715ac755807aa1585946ceac944417d738376b24b757a441d99f7d9e695b66c8cc606c8f4a46314a0a
data/lib/rubydoop.jar CHANGED
Binary file
data/lib/rubydoop.rb CHANGED
@@ -10,49 +10,7 @@ require 'hadoop'
10
10
  # {Package} for the packaging documentation, or the {file:README.md README}
11
11
  # for a getting started guide.
12
12
  module Rubydoop
13
- # @private
14
- def self.create_mapper(conf)
15
- create_instance(conf.get(MAPPER_KEY))
16
- end
17
-
18
- # @private
19
- def self.create_reducer(conf)
20
- create_instance(conf.get(REDUCER_KEY))
21
- end
22
-
23
- # @private
24
- def self.create_combiner(conf)
25
- create_instance(conf.get(COMBINER_KEY))
26
- end
27
-
28
- # @private
29
- def self.create_partitioner(conf)
30
- create_instance(conf.get(PARTITIONER_KEY))
31
- end
32
-
33
- # @private
34
- def self.create_grouping_comparator(conf)
35
- create_instance(conf.get(GROUPING_COMPARATOR_KEY))
36
- end
37
-
38
- # @private
39
- def self.create_sort_comparator(conf)
40
- create_instance(conf.get(SORT_COMPARATOR_KEY))
41
- end
42
-
43
- private
44
-
45
- MAPPER_KEY = 'rubydoop.mapper'.freeze
46
- REDUCER_KEY = 'rubydoop.reducer'.freeze
47
- COMBINER_KEY = 'rubydoop.combiner'.freeze
48
- PARTITIONER_KEY = 'rubydoop.partitioner'.freeze
49
- GROUPING_COMPARATOR_KEY = 'rubydoop.grouping_comparator'.freeze
50
- SORT_COMPARATOR_KEY = 'rubydoop.sort_comparator'.freeze
51
-
52
- def self.create_instance(const_path)
53
- cls = const_path.split('::').reduce(Object) { |host, name| host.const_get(name) }
54
- cls.new
55
- end
13
+ include_package 'rubydoop'
56
14
  end
57
15
 
58
16
  require 'rubydoop/dsl'
data/lib/rubydoop/dsl.rb CHANGED
@@ -62,6 +62,14 @@ module Rubydoop
62
62
  job.instance_exec(&block)
63
63
  job
64
64
  end
65
+
66
+ def parallel(&block)
67
+ @context.parallel(&block)
68
+ end
69
+
70
+ def sequence(&block)
71
+ @context.sequence(&block)
72
+ end
65
73
  end
66
74
 
67
75
  # Job configuration DSL.
@@ -94,11 +102,15 @@ module Rubydoop
94
102
  class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "InputFormat"
95
103
  format = Hadoop::Mapreduce::Lib::Input.const_get(class_name)
96
104
  end
105
+ unless format <= Hadoop::Mapreduce::InputFormat
106
+ @job.configuration.set(Rubydoop::InputFormatProxy::RUBY_CLASS_KEY, format.name)
107
+ format = Rubydoop::InputFormatProxy
108
+ end
97
109
  format.set_input_paths(@job, paths)
98
110
  @job.set_input_format_class(format)
99
111
  end
100
112
 
101
- # Sets the output path of the job.
113
+ # Sets or gets the output path of the job.
102
114
  #
103
115
  # Calls `setOutputFormatClass` on the Hadoop job and uses the static
104
116
  # `setOutputPath` on the output format to set the job's output path.
@@ -108,14 +120,27 @@ module Rubydoop
108
120
  # @param [String] dir The output path
109
121
  # @param [Hash] options
110
122
  # @option options [JavaClass] :format The output format to use, defaults to `TextOutputFormat`
111
- def output(dir, options={})
112
- format = options.fetch(:format, :text)
113
- unless format.is_a?(Class)
114
- class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "OutputFormat"
115
- format = Hadoop::Mapreduce::Lib::Output.const_get(class_name)
123
+ def output(dir=nil, options={})
124
+ if dir
125
+ if dir.is_a?(Hash)
126
+ options = dir
127
+ if options[:intermediate]
128
+ dir = @job.job_name
129
+ else
130
+ raise ArgumentError, sprintf('neither dir nor intermediate: true was specified')
131
+ end
132
+ end
133
+ dir = sprintf('%s-%010d-%05d', dir, Time.now, rand(1e5)) if options[:intermediate]
134
+ @output_dir = dir
135
+ format = options.fetch(:format, :text)
136
+ unless format.is_a?(Class)
137
+ class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "OutputFormat"
138
+ format = Hadoop::Mapreduce::Lib::Output.const_get(class_name)
139
+ end
140
+ format.set_output_path(@job, Hadoop::Fs::Path.new(@output_dir))
141
+ @job.set_output_format_class(format)
116
142
  end
117
- format.set_output_path(@job, Hadoop::Fs::Path.new(dir))
118
- @job.set_output_format_class(format)
143
+ @output_dir
119
144
  end
120
145
 
121
146
  # Sets a job property.
@@ -161,8 +186,8 @@ module Rubydoop
161
186
  # @param [Class] cls The (Ruby) mapper class.
162
187
  def mapper(cls=nil)
163
188
  if cls
164
- @job.configuration.set(MAPPER_KEY, cls.name)
165
- @job.set_mapper_class(@context.proxy_class(:mapper))
189
+ @job.configuration.set(Rubydoop::MapperProxy::RUBY_CLASS_KEY, cls.name)
190
+ @job.set_mapper_class(Rubydoop::MapperProxy)
166
191
  @mapper = cls
167
192
  end
168
193
  @mapper
@@ -187,8 +212,8 @@ module Rubydoop
187
212
  # @param [Class] cls The (Ruby) reducer class.
188
213
  def reducer(cls=nil)
189
214
  if cls
190
- @job.configuration.set(REDUCER_KEY, cls.name)
191
- @job.set_reducer_class(@context.proxy_class(:reducer))
215
+ @job.configuration.set(Rubydoop::ReducerProxy::RUBY_CLASS_KEY, cls.name)
216
+ @job.set_reducer_class(Rubydoop::ReducerProxy)
192
217
  @reducer = cls
193
218
  end
194
219
  @reducer
@@ -208,8 +233,8 @@ module Rubydoop
208
233
  # @param [Class] cls The (Ruby) combiner class.
209
234
  def combiner(cls=nil)
210
235
  if cls
211
- @job.configuration.set(COMBINER_KEY, cls.name)
212
- @job.set_combiner_class(@context.proxy_class(:combiner))
236
+ @job.configuration.set(Rubydoop::CombinerProxy::RUBY_CLASS_KEY, cls.name)
237
+ @job.set_combiner_class(Rubydoop::CombinerProxy)
213
238
  @combiner = cls
214
239
  end
215
240
  @combiner
@@ -230,8 +255,8 @@ module Rubydoop
230
255
  # @param [Class] cls The (Ruby) partitioner class.
231
256
  def partitioner(cls=nil)
232
257
  if cls
233
- @job.configuration.set(PARTITIONER_KEY, cls.name)
234
- @job.set_partitioner_class(@context.proxy_class(:partitioner))
258
+ @job.configuration.set(Rubydoop::PartitionerProxy::RUBY_CLASS_KEY, cls.name)
259
+ @job.set_partitioner_class(Rubydoop::PartitionerProxy)
235
260
  @partitioner = cls
236
261
  end
237
262
  @partitioner
@@ -249,8 +274,8 @@ module Rubydoop
249
274
  # @param [Class] cls The (Ruby) comparator class.
250
275
  def grouping_comparator(cls=nil)
251
276
  if cls
252
- @job.configuration.set(GROUPING_COMPARATOR_KEY, cls.name)
253
- @job.set_grouping_comparator_class(@context.proxy_class(:grouping_comparator))
277
+ @job.configuration.set(Rubydoop::GroupingComparatorProxy::RUBY_CLASS_KEY, cls.name)
278
+ @job.set_grouping_comparator_class(Rubydoop::GroupingComparatorProxy)
254
279
  @grouping_comparator = cls
255
280
  end
256
281
  @grouping_comparator
@@ -268,8 +293,8 @@ module Rubydoop
268
293
  # @param [Class] cls The (Ruby) comparator class.
269
294
  def sort_comparator(cls=nil)
270
295
  if cls
271
- @job.configuration.set(SORT_COMPARATOR_KEY, cls.name)
272
- @job.set_sort_comparator_class(@context.proxy_class(:sort_comparator))
296
+ @job.configuration.set(Rubydoop::SortComparatorProxy::RUBY_CLASS_KEY, cls.name)
297
+ @job.set_sort_comparator_class(Rubydoop::SortComparatorProxy)
273
298
  @sort_comparator = cls
274
299
  end
275
300
  @sort_comparator
@@ -292,13 +317,13 @@ module Rubydoop
292
317
  def self.class_setter(dsl_name)
293
318
  define_method(dsl_name) do |cls|
294
319
  if cls
295
- @job.send("set_#{dsl_name}_class", cls.java_class)
320
+ @job.send("set_#{dsl_name}_class", cls.to_java(Java::JavaLang::Class))
296
321
  instance_variable_set(:"@#{dsl_name}", cls)
297
322
  end
298
323
  instance_variable_get(:"@#{dsl_name}")
299
324
  end
300
325
  define_method("#{dsl_name}=") do |cls|
301
- @job.send("set_#{dsl_name}_class", cls.java_class)
326
+ @job.send("set_#{dsl_name}_class", cls.to_java(Java::JavaLang::Class))
302
327
  end
303
328
  end
304
329
 
@@ -343,23 +368,77 @@ module Rubydoop
343
368
 
344
369
  # @private
345
370
  class Context
346
- attr_reader :jobs, :arguments
371
+ attr_reader :arguments
347
372
 
348
- def initialize(conf, proxy_classes, arguments)
373
+ def initialize(conf, arguments)
349
374
  @conf = conf
350
- @proxy_classes = proxy_classes
351
375
  @arguments = arguments.to_a
352
- @jobs = []
376
+ @job_stack = [Jobs::Sequence.new]
353
377
  end
354
378
 
355
379
  def create_job(name)
356
380
  hadoop_job = Hadoop::Mapreduce::Job.new(@conf, name)
357
- @jobs << hadoop_job
381
+ @job_stack.last.add(hadoop_job)
358
382
  hadoop_job
359
383
  end
360
384
 
361
- def proxy_class(type)
362
- @proxy_classes[type.to_s]
385
+ def wait_for_completion(verbose)
386
+ @job_stack.first.wait_for_completion(verbose)
387
+ end
388
+
389
+ def parallel
390
+ push(Jobs::Parallel.new)
391
+ if block_given?
392
+ yield
393
+ pop
394
+ end
395
+ end
396
+
397
+ def sequence
398
+ push(Jobs::Sequence.new)
399
+ if block_given?
400
+ yield
401
+ pop
402
+ end
403
+ end
404
+
405
+ def push(job_list)
406
+ @job_stack.last.add(job_list)
407
+ @job_stack.push(job_list)
408
+ end
409
+
410
+ def pop
411
+ @job_stack.pop
412
+ end
413
+
414
+ class Jobs
415
+ attr_reader :jobs
416
+
417
+ def initialize
418
+ @jobs = []
419
+ end
420
+
421
+ def add(job)
422
+ @jobs.push(job)
423
+ end
424
+
425
+ class Sequence < Jobs
426
+ def wait_for_completion(verbose)
427
+ @jobs.all? do |job|
428
+ job.wait_for_completion(verbose)
429
+ end
430
+ end
431
+ end
432
+
433
+ class Parallel < Jobs
434
+ def wait_for_completion(verbose)
435
+ @jobs.map do |job|
436
+ Thread.new do
437
+ job.wait_for_completion(verbose)
438
+ end
439
+ end.map!(&:value).all?
440
+ end
441
+ end
363
442
  end
364
443
  end
365
444
  end
@@ -1,4 +1,4 @@
1
1
  module Rubydoop
2
2
  # @private
3
- VERSION = '1.1.3'
3
+ VERSION = '1.2.0'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubydoop
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.3
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Theo Hultberg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-25 00:00:00.000000000 Z
11
+ date: 2015-06-12 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Rubydoop embeds a JRuby runtime in Hadoop, letting you write map reduce code in Ruby without using the streaming APIs
14
14
  email:
@@ -42,7 +42,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
42
  version: '0'
43
43
  requirements: []
44
44
  rubyforge_project: rubydoop
45
- rubygems_version: 2.2.2
45
+ rubygems_version: 2.4.6
46
46
  signing_key:
47
47
  specification_version: 4
48
48
  summary: Write Hadoop jobs in Ruby