rubydoop 1.1.3 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rubydoop.jar +0 -0
- data/lib/rubydoop.rb +1 -43
- data/lib/rubydoop/dsl.rb +108 -29
- data/lib/rubydoop/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 82bf7b52baa55faaa4787d61890c72ad3eb7a9e1
|
4
|
+
data.tar.gz: f48b41c4268d3bc970ef942b845bcab414a6714f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 30b7c232ed09dc1425d1798f5828da8f9b109769f5fcc3dd9a1c97a71e35aad3605f49c3a84b3a52b879119df37357e9ee8bb81f6d85ba3f051f82c71b4b4624
|
7
|
+
data.tar.gz: 218dc633c5038de6b8964bcfc57c2d8d582032d2a3216e715ac755807aa1585946ceac944417d738376b24b757a441d99f7d9e695b66c8cc606c8f4a46314a0a
|
data/lib/rubydoop.jar
CHANGED
Binary file
|
data/lib/rubydoop.rb
CHANGED
@@ -10,49 +10,7 @@ require 'hadoop'
|
|
10
10
|
# {Package} for the packaging documentation, or the {file:README.md README}
|
11
11
|
# for a getting started guide.
|
12
12
|
module Rubydoop
|
13
|
-
|
14
|
-
def self.create_mapper(conf)
|
15
|
-
create_instance(conf.get(MAPPER_KEY))
|
16
|
-
end
|
17
|
-
|
18
|
-
# @private
|
19
|
-
def self.create_reducer(conf)
|
20
|
-
create_instance(conf.get(REDUCER_KEY))
|
21
|
-
end
|
22
|
-
|
23
|
-
# @private
|
24
|
-
def self.create_combiner(conf)
|
25
|
-
create_instance(conf.get(COMBINER_KEY))
|
26
|
-
end
|
27
|
-
|
28
|
-
# @private
|
29
|
-
def self.create_partitioner(conf)
|
30
|
-
create_instance(conf.get(PARTITIONER_KEY))
|
31
|
-
end
|
32
|
-
|
33
|
-
# @private
|
34
|
-
def self.create_grouping_comparator(conf)
|
35
|
-
create_instance(conf.get(GROUPING_COMPARATOR_KEY))
|
36
|
-
end
|
37
|
-
|
38
|
-
# @private
|
39
|
-
def self.create_sort_comparator(conf)
|
40
|
-
create_instance(conf.get(SORT_COMPARATOR_KEY))
|
41
|
-
end
|
42
|
-
|
43
|
-
private
|
44
|
-
|
45
|
-
MAPPER_KEY = 'rubydoop.mapper'.freeze
|
46
|
-
REDUCER_KEY = 'rubydoop.reducer'.freeze
|
47
|
-
COMBINER_KEY = 'rubydoop.combiner'.freeze
|
48
|
-
PARTITIONER_KEY = 'rubydoop.partitioner'.freeze
|
49
|
-
GROUPING_COMPARATOR_KEY = 'rubydoop.grouping_comparator'.freeze
|
50
|
-
SORT_COMPARATOR_KEY = 'rubydoop.sort_comparator'.freeze
|
51
|
-
|
52
|
-
def self.create_instance(const_path)
|
53
|
-
cls = const_path.split('::').reduce(Object) { |host, name| host.const_get(name) }
|
54
|
-
cls.new
|
55
|
-
end
|
13
|
+
include_package 'rubydoop'
|
56
14
|
end
|
57
15
|
|
58
16
|
require 'rubydoop/dsl'
|
data/lib/rubydoop/dsl.rb
CHANGED
@@ -62,6 +62,14 @@ module Rubydoop
|
|
62
62
|
job.instance_exec(&block)
|
63
63
|
job
|
64
64
|
end
|
65
|
+
|
66
|
+
def parallel(&block)
|
67
|
+
@context.parallel(&block)
|
68
|
+
end
|
69
|
+
|
70
|
+
def sequence(&block)
|
71
|
+
@context.sequence(&block)
|
72
|
+
end
|
65
73
|
end
|
66
74
|
|
67
75
|
# Job configuration DSL.
|
@@ -94,11 +102,15 @@ module Rubydoop
|
|
94
102
|
class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "InputFormat"
|
95
103
|
format = Hadoop::Mapreduce::Lib::Input.const_get(class_name)
|
96
104
|
end
|
105
|
+
unless format <= Hadoop::Mapreduce::InputFormat
|
106
|
+
@job.configuration.set(Rubydoop::InputFormatProxy::RUBY_CLASS_KEY, format.name)
|
107
|
+
format = Rubydoop::InputFormatProxy
|
108
|
+
end
|
97
109
|
format.set_input_paths(@job, paths)
|
98
110
|
@job.set_input_format_class(format)
|
99
111
|
end
|
100
112
|
|
101
|
-
# Sets the output path of the job.
|
113
|
+
# Sets or gets the output path of the job.
|
102
114
|
#
|
103
115
|
# Calls `setOutputFormatClass` on the Hadoop job and uses the static
|
104
116
|
# `setOutputPath` on the output format to set the job's output path.
|
@@ -108,14 +120,27 @@ module Rubydoop
|
|
108
120
|
# @param [String] dir The output path
|
109
121
|
# @param [Hash] options
|
110
122
|
# @option options [JavaClass] :format The output format to use, defaults to `TextOutputFormat`
|
111
|
-
def output(dir, options={})
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
123
|
+
def output(dir=nil, options={})
|
124
|
+
if dir
|
125
|
+
if dir.is_a?(Hash)
|
126
|
+
options = dir
|
127
|
+
if options[:intermediate]
|
128
|
+
dir = @job.job_name
|
129
|
+
else
|
130
|
+
raise ArgumentError, sprintf('neither dir nor intermediate: true was specified')
|
131
|
+
end
|
132
|
+
end
|
133
|
+
dir = sprintf('%s-%010d-%05d', dir, Time.now, rand(1e5)) if options[:intermediate]
|
134
|
+
@output_dir = dir
|
135
|
+
format = options.fetch(:format, :text)
|
136
|
+
unless format.is_a?(Class)
|
137
|
+
class_name = format.to_s.gsub(/^.|_./) {|x| x[-1,1].upcase } + "OutputFormat"
|
138
|
+
format = Hadoop::Mapreduce::Lib::Output.const_get(class_name)
|
139
|
+
end
|
140
|
+
format.set_output_path(@job, Hadoop::Fs::Path.new(@output_dir))
|
141
|
+
@job.set_output_format_class(format)
|
116
142
|
end
|
117
|
-
|
118
|
-
@job.set_output_format_class(format)
|
143
|
+
@output_dir
|
119
144
|
end
|
120
145
|
|
121
146
|
# Sets a job property.
|
@@ -161,8 +186,8 @@ module Rubydoop
|
|
161
186
|
# @param [Class] cls The (Ruby) mapper class.
|
162
187
|
def mapper(cls=nil)
|
163
188
|
if cls
|
164
|
-
@job.configuration.set(
|
165
|
-
@job.set_mapper_class(
|
189
|
+
@job.configuration.set(Rubydoop::MapperProxy::RUBY_CLASS_KEY, cls.name)
|
190
|
+
@job.set_mapper_class(Rubydoop::MapperProxy)
|
166
191
|
@mapper = cls
|
167
192
|
end
|
168
193
|
@mapper
|
@@ -187,8 +212,8 @@ module Rubydoop
|
|
187
212
|
# @param [Class] cls The (Ruby) reducer class.
|
188
213
|
def reducer(cls=nil)
|
189
214
|
if cls
|
190
|
-
@job.configuration.set(
|
191
|
-
@job.set_reducer_class(
|
215
|
+
@job.configuration.set(Rubydoop::ReducerProxy::RUBY_CLASS_KEY, cls.name)
|
216
|
+
@job.set_reducer_class(Rubydoop::ReducerProxy)
|
192
217
|
@reducer = cls
|
193
218
|
end
|
194
219
|
@reducer
|
@@ -208,8 +233,8 @@ module Rubydoop
|
|
208
233
|
# @param [Class] cls The (Ruby) combiner class.
|
209
234
|
def combiner(cls=nil)
|
210
235
|
if cls
|
211
|
-
@job.configuration.set(
|
212
|
-
@job.set_combiner_class(
|
236
|
+
@job.configuration.set(Rubydoop::CombinerProxy::RUBY_CLASS_KEY, cls.name)
|
237
|
+
@job.set_combiner_class(Rubydoop::CombinerProxy)
|
213
238
|
@combiner = cls
|
214
239
|
end
|
215
240
|
@combiner
|
@@ -230,8 +255,8 @@ module Rubydoop
|
|
230
255
|
# @param [Class] cls The (Ruby) partitioner class.
|
231
256
|
def partitioner(cls=nil)
|
232
257
|
if cls
|
233
|
-
@job.configuration.set(
|
234
|
-
@job.set_partitioner_class(
|
258
|
+
@job.configuration.set(Rubydoop::PartitionerProxy::RUBY_CLASS_KEY, cls.name)
|
259
|
+
@job.set_partitioner_class(Rubydoop::PartitionerProxy)
|
235
260
|
@partitioner = cls
|
236
261
|
end
|
237
262
|
@partitioner
|
@@ -249,8 +274,8 @@ module Rubydoop
|
|
249
274
|
# @param [Class] cls The (Ruby) comparator class.
|
250
275
|
def grouping_comparator(cls=nil)
|
251
276
|
if cls
|
252
|
-
@job.configuration.set(
|
253
|
-
@job.set_grouping_comparator_class(
|
277
|
+
@job.configuration.set(Rubydoop::GroupingComparatorProxy::RUBY_CLASS_KEY, cls.name)
|
278
|
+
@job.set_grouping_comparator_class(Rubydoop::GroupingComparatorProxy)
|
254
279
|
@grouping_comparator = cls
|
255
280
|
end
|
256
281
|
@grouping_comparator
|
@@ -268,8 +293,8 @@ module Rubydoop
|
|
268
293
|
# @param [Class] cls The (Ruby) comparator class.
|
269
294
|
def sort_comparator(cls=nil)
|
270
295
|
if cls
|
271
|
-
@job.configuration.set(
|
272
|
-
@job.set_sort_comparator_class(
|
296
|
+
@job.configuration.set(Rubydoop::SortComparatorProxy::RUBY_CLASS_KEY, cls.name)
|
297
|
+
@job.set_sort_comparator_class(Rubydoop::SortComparatorProxy)
|
273
298
|
@sort_comparator = cls
|
274
299
|
end
|
275
300
|
@sort_comparator
|
@@ -292,13 +317,13 @@ module Rubydoop
|
|
292
317
|
def self.class_setter(dsl_name)
|
293
318
|
define_method(dsl_name) do |cls|
|
294
319
|
if cls
|
295
|
-
@job.send("set_#{dsl_name}_class", cls.
|
320
|
+
@job.send("set_#{dsl_name}_class", cls.to_java(Java::JavaLang::Class))
|
296
321
|
instance_variable_set(:"@#{dsl_name}", cls)
|
297
322
|
end
|
298
323
|
instance_variable_get(:"@#{dsl_name}")
|
299
324
|
end
|
300
325
|
define_method("#{dsl_name}=") do |cls|
|
301
|
-
@job.send("set_#{dsl_name}_class", cls.
|
326
|
+
@job.send("set_#{dsl_name}_class", cls.to_java(Java::JavaLang::Class))
|
302
327
|
end
|
303
328
|
end
|
304
329
|
|
@@ -343,23 +368,77 @@ module Rubydoop
|
|
343
368
|
|
344
369
|
# @private
|
345
370
|
class Context
|
346
|
-
attr_reader :
|
371
|
+
attr_reader :arguments
|
347
372
|
|
348
|
-
def initialize(conf,
|
373
|
+
def initialize(conf, arguments)
|
349
374
|
@conf = conf
|
350
|
-
@proxy_classes = proxy_classes
|
351
375
|
@arguments = arguments.to_a
|
352
|
-
@
|
376
|
+
@job_stack = [Jobs::Sequence.new]
|
353
377
|
end
|
354
378
|
|
355
379
|
def create_job(name)
|
356
380
|
hadoop_job = Hadoop::Mapreduce::Job.new(@conf, name)
|
357
|
-
@
|
381
|
+
@job_stack.last.add(hadoop_job)
|
358
382
|
hadoop_job
|
359
383
|
end
|
360
384
|
|
361
|
-
def
|
362
|
-
@
|
385
|
+
def wait_for_completion(verbose)
|
386
|
+
@job_stack.first.wait_for_completion(verbose)
|
387
|
+
end
|
388
|
+
|
389
|
+
def parallel
|
390
|
+
push(Jobs::Parallel.new)
|
391
|
+
if block_given?
|
392
|
+
yield
|
393
|
+
pop
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
def sequence
|
398
|
+
push(Jobs::Sequence.new)
|
399
|
+
if block_given?
|
400
|
+
yield
|
401
|
+
pop
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
def push(job_list)
|
406
|
+
@job_stack.last.add(job_list)
|
407
|
+
@job_stack.push(job_list)
|
408
|
+
end
|
409
|
+
|
410
|
+
def pop
|
411
|
+
@job_stack.pop
|
412
|
+
end
|
413
|
+
|
414
|
+
class Jobs
|
415
|
+
attr_reader :jobs
|
416
|
+
|
417
|
+
def initialize
|
418
|
+
@jobs = []
|
419
|
+
end
|
420
|
+
|
421
|
+
def add(job)
|
422
|
+
@jobs.push(job)
|
423
|
+
end
|
424
|
+
|
425
|
+
class Sequence < Jobs
|
426
|
+
def wait_for_completion(verbose)
|
427
|
+
@jobs.all? do |job|
|
428
|
+
job.wait_for_completion(verbose)
|
429
|
+
end
|
430
|
+
end
|
431
|
+
end
|
432
|
+
|
433
|
+
class Parallel < Jobs
|
434
|
+
def wait_for_completion(verbose)
|
435
|
+
@jobs.map do |job|
|
436
|
+
Thread.new do
|
437
|
+
job.wait_for_completion(verbose)
|
438
|
+
end
|
439
|
+
end.map!(&:value).all?
|
440
|
+
end
|
441
|
+
end
|
363
442
|
end
|
364
443
|
end
|
365
444
|
end
|
data/lib/rubydoop/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rubydoop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Theo Hultberg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Rubydoop embeds a JRuby runtime in Hadoop, letting you write map reduce code in Ruby without using the streaming APIs
|
14
14
|
email:
|
@@ -42,7 +42,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
42
42
|
version: '0'
|
43
43
|
requirements: []
|
44
44
|
rubyforge_project: rubydoop
|
45
|
-
rubygems_version: 2.
|
45
|
+
rubygems_version: 2.4.6
|
46
46
|
signing_key:
|
47
47
|
specification_version: 4
|
48
48
|
summary: Write Hadoop jobs in Ruby
|