rubydoop 1.0.0-java
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/hadoop.rb +31 -0
- data/lib/rubydoop.jar +0 -0
- data/lib/rubydoop.rb +58 -0
- data/lib/rubydoop/dsl.rb +357 -0
- data/lib/rubydoop/package.rb +123 -0
- data/lib/rubydoop/version.rb +4 -0
- metadata +53 -0
data/lib/hadoop.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'java'
|
4
|
+
|
5
|
+
|
6
|
+
# @private
|
7
|
+
module Hadoop
|
8
|
+
module Io
|
9
|
+
include_package 'org.apache.hadoop.io'
|
10
|
+
end
|
11
|
+
|
12
|
+
module Mapreduce
|
13
|
+
include_package 'org.apache.hadoop.mapreduce'
|
14
|
+
|
15
|
+
module Lib
|
16
|
+
include_package 'org.apache.hadoop.mapreduce.lib'
|
17
|
+
|
18
|
+
module Input
|
19
|
+
include_package 'org.apache.hadoop.mapreduce.lib.input'
|
20
|
+
end
|
21
|
+
|
22
|
+
module Output
|
23
|
+
include_package 'org.apache.hadoop.mapreduce.lib.output'
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
module Fs
|
29
|
+
include_package 'org.apache.hadoop.fs'
|
30
|
+
end
|
31
|
+
end
|
data/lib/rubydoop.jar
ADDED
Binary file
|
data/lib/rubydoop.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
$LOAD_PATH << File.expand_path('..', __FILE__)
|
4
|
+
|
5
|
+
|
6
|
+
require 'hadoop'
|
7
|
+
|
8
|
+
|
9
|
+
# See {Rubydoop.configure} for the job configuration DSL documentation,
|
10
|
+
# {Package} for the packaging documentation, or the {file:README.md README}
|
11
|
+
# for a getting started guide.
|
12
|
+
module Rubydoop
|
13
|
+
# @private
|
14
|
+
def self.create_mapper(conf)
|
15
|
+
create_instance(conf.get(MAPPER_KEY))
|
16
|
+
end
|
17
|
+
|
18
|
+
# @private
|
19
|
+
def self.create_reducer(conf)
|
20
|
+
create_instance(conf.get(REDUCER_KEY))
|
21
|
+
end
|
22
|
+
|
23
|
+
# @private
|
24
|
+
def self.create_combiner(conf)
|
25
|
+
create_instance(conf.get(COMBINER_KEY))
|
26
|
+
end
|
27
|
+
|
28
|
+
# @private
|
29
|
+
def self.create_partitioner(conf)
|
30
|
+
create_instance(conf.get(PARTITIONER_KEY))
|
31
|
+
end
|
32
|
+
|
33
|
+
# @private
|
34
|
+
def self.create_grouping_comparator(conf)
|
35
|
+
create_instance(conf.get(GROUPING_COMPARATOR_KEY))
|
36
|
+
end
|
37
|
+
|
38
|
+
# @private
|
39
|
+
def self.create_sort_comparator(conf)
|
40
|
+
create_instance(conf.get(SORT_COMPARATOR_KEY))
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
MAPPER_KEY = 'rubydoop.mapper'.freeze
|
46
|
+
REDUCER_KEY = 'rubydoop.reducer'.freeze
|
47
|
+
COMBINER_KEY = 'rubydoop.combiner'.freeze
|
48
|
+
PARTITIONER_KEY = 'rubydoop.partitioner'.freeze
|
49
|
+
GROUPING_COMPARATOR_KEY = 'rubydoop.grouping_comparator'.freeze
|
50
|
+
SORT_COMPARATOR_KEY = 'rubydoop.sort_comparator'.freeze
|
51
|
+
|
52
|
+
def self.create_instance(const_path)
|
53
|
+
cls = const_path.split('::').reduce(Object) { |host, name| host.const_get(name) }
|
54
|
+
cls.new
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
require 'rubydoop/dsl'
|
data/lib/rubydoop/dsl.rb
ADDED
@@ -0,0 +1,357 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Rubydoop
|
4
|
+
# Main entrypoint into the configuration DSL.
|
5
|
+
#
|
6
|
+
# @example Configuring a job
|
7
|
+
#
|
8
|
+
# Rubydoop.configure do |*args|
|
9
|
+
# job 'word_count' do
|
10
|
+
# input args[0]
|
11
|
+
# output args[1]
|
12
|
+
#
|
13
|
+
# mapper WordCount::Mapper
|
14
|
+
# reducer WordCount::Mapper
|
15
|
+
#
|
16
|
+
# output_key Hadoop::Io::Text
|
17
|
+
# output_value Hadoop::Io::IntWritable
|
18
|
+
# end
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# Within a configure block you can specify one or more jobs, the `job`
|
22
|
+
# blocks are run in the context of a {JobDefinition} instance, so look
|
23
|
+
# at that class for documentation about the available properties. The
|
24
|
+
# `configure` block is run within the context of a {ConfigurationDefinition}
|
25
|
+
# instance. The arguments to the `configure` block is the command line
|
26
|
+
# arguments, minus those handled by Hadoop's `ToolRunner`.
|
27
|
+
#
|
28
|
+
# @yieldparam [Array<String>] *arguments The command line arguments
|
29
|
+
#
|
30
|
+
# @note The tool runner will set the global variable `$rubydoop_context`
|
31
|
+
# to an object that contains references to the necessary Hadoop
|
32
|
+
# configuration. Unless this global variable is set the configuration
|
33
|
+
# block is not run (this is a feature, it means that the configuration
|
34
|
+
# block doesn't run in mappers and reducers).
|
35
|
+
#
|
36
|
+
def self.configure(impl=ConfigurationDefinition, &block)
|
37
|
+
impl.new($rubydoop_context, &block) if $rubydoop_context
|
38
|
+
end
|
39
|
+
|
40
|
+
# Lower level API for configuring jobs.
|
41
|
+
#
|
42
|
+
# @example Configuring a job
|
43
|
+
#
|
44
|
+
# cc = ConfigurationDefinition.new
|
45
|
+
# cc.job 'word_count' do
|
46
|
+
# # same DSL as shown in the documentation for Rubydoop.configure
|
47
|
+
# end
|
48
|
+
#
|
49
|
+
class ConfigurationDefinition
|
50
|
+
def initialize(context=$rubydoop_context, &block)
|
51
|
+
@context = context
|
52
|
+
instance_exec(*arguments, &block) if @context && block_given?
|
53
|
+
end
|
54
|
+
|
55
|
+
def arguments
|
56
|
+
@context.arguments
|
57
|
+
end
|
58
|
+
|
59
|
+
def job(name, &block)
|
60
|
+
return nil unless @context
|
61
|
+
job = JobDefinition.new(@context, @context.create_job(name))
|
62
|
+
job.instance_exec(&block)
|
63
|
+
job
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Job configuration DSL.
|
68
|
+
#
|
69
|
+
# `Rubydoop.configure` blocks are run within the context of an instance of
|
70
|
+
# this class. These are the methods available in those blocks.
|
71
|
+
#
|
72
|
+
class JobDefinition
|
73
|
+
# @private
|
74
|
+
def initialize(context, job)
|
75
|
+
@context = context
|
76
|
+
@job = job
|
77
|
+
end
|
78
|
+
|
79
|
+
# Sets the input paths of the job.
|
80
|
+
#
|
81
|
+
# Calls `setInputFormatClass` on the Hadoop job and uses the static
|
82
|
+
# `setInputPaths` on the input format to set the job's input path.
|
83
|
+
#
|
84
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setInputFormatClass(java.lang.Class) Hadoop's Job#setInputFormatClass
|
85
|
+
#
|
86
|
+
# @param [String, Enumerable] paths The input paths, either a comma separated
|
87
|
+
# string or an `Enumerable` of strings (which will be joined with a comma).
|
88
|
+
# @param [Hash] options
|
89
|
+
# @option options [JavaClass] :format The input format to use, defaults to `TextInputFormat`
|
90
|
+
def input(paths, options={})
|
91
|
+
paths = paths.join(',') if paths.is_a?(Enumerable)
|
92
|
+
format = options[:format] || Hadoop::Mapreduce::Lib::Input::TextInputFormat
|
93
|
+
format.set_input_paths(@job, paths)
|
94
|
+
@job.set_input_format_class(format)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Sets the output path of the job.
|
98
|
+
#
|
99
|
+
# Calls `setOutputFormatClass` on the Hadoop job and uses the static
|
100
|
+
# `setOutputPath` on the output format to set the job's output path.
|
101
|
+
#
|
102
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setOutputFormatClass(java.lang.Class) Hadoop's Job#setOutputFormatClass
|
103
|
+
#
|
104
|
+
# @param [String] dir The output path
|
105
|
+
# @param [Hash] options
|
106
|
+
# @option options [JavaClass] :format The output format to use, defaults to `TextOutputFormat`
|
107
|
+
def output(dir, options={})
|
108
|
+
format = options[:format] || Hadoop::Mapreduce::Lib::Output::TextOutputFormat
|
109
|
+
format.set_output_path(@job, Hadoop::Fs::Path.new(dir))
|
110
|
+
@job.set_output_format_class(format)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Sets a job property.
|
114
|
+
#
|
115
|
+
# Calls `set`/`setBoolean`/`setLong`/`setFloat` on the Hadoop Job's
|
116
|
+
# configuration (exact method depends on the type of the value).
|
117
|
+
#
|
118
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String,%20java.lang.String) Hadoop's Configuration#set
|
119
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String,%20java.lang.String) Hadoop's Configuration#setBoolean
|
120
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String,%20java.lang.String) Hadoop's Configuration#setLong
|
121
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String,%20java.lang.String) Hadoop's Configuration#setFloat
|
122
|
+
#
|
123
|
+
# @param [String] property The property name
|
124
|
+
# @param [String, Numeric, Boolean] value The property value
|
125
|
+
def set(property, value)
|
126
|
+
case value
|
127
|
+
when Integer
|
128
|
+
@job.configuration.set_long(property, value)
|
129
|
+
when Float
|
130
|
+
@job.configuration.set_float(property, value)
|
131
|
+
when true, false
|
132
|
+
@job.configuration.set_boolean(property, value)
|
133
|
+
else
|
134
|
+
@job.configuration.set(property, value)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# Sets the mapper class.
|
139
|
+
#
|
140
|
+
# The equivalent of calling `setMapperClass` on a Hadoop job, but instead
|
141
|
+
# of a Java class you pass a Ruby class and Rubydoop will wrap it in a way
|
142
|
+
# that works with Hadoop.
|
143
|
+
#
|
144
|
+
# The class only needs to implement the method `map`, which will be called
|
145
|
+
# exactly like a Java mapper class' `map` method would be called.
|
146
|
+
#
|
147
|
+
# You can optionally implement `setup` and `cleanup`, which mirrors the
|
148
|
+
# methods of the same name in Java mappers.
|
149
|
+
#
|
150
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Mapper.html Hadoop's Mapper
|
151
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setMapperClass(java.lang.Class) Hadoop's Job#setMapperClass
|
152
|
+
#
|
153
|
+
# @param [Class] cls The (Ruby) mapper class.
|
154
|
+
def mapper(cls=nil)
|
155
|
+
if cls
|
156
|
+
@job.configuration.set(MAPPER_KEY, cls.name)
|
157
|
+
@job.set_mapper_class(@context.proxy_class(:mapper))
|
158
|
+
@mapper = cls
|
159
|
+
end
|
160
|
+
@mapper
|
161
|
+
end
|
162
|
+
alias_method :mapper=, :mapper
|
163
|
+
|
164
|
+
# Sets the reducer class.
|
165
|
+
#
|
166
|
+
# The equivalent of calling `setReducerClass` on a Hadoop job, but instead
|
167
|
+
# of a Java class you pass a Ruby class and Rubydoop will wrap it in a way
|
168
|
+
# that works with Hadoop.
|
169
|
+
#
|
170
|
+
# The class only needs to implement the method `reduce`, which will be called
|
171
|
+
# exactly like a Java reducer class' `reduce` method would be called.
|
172
|
+
#
|
173
|
+
# You can optionally implement `setup` and `cleanup`, which mirrors the
|
174
|
+
# methods of the same name in Java reducers.
|
175
|
+
#
|
176
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Reducer.html Hadoop's Reducer
|
177
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setReducerClass(java.lang.Class) Hadoop's Job#setReducerClass
|
178
|
+
#
|
179
|
+
# @param [Class] cls The (Ruby) reducer class.
|
180
|
+
def reducer(cls=nil)
|
181
|
+
if cls
|
182
|
+
@job.configuration.set(REDUCER_KEY, cls.name)
|
183
|
+
@job.set_reducer_class(@context.proxy_class(:reducer))
|
184
|
+
@reducer = cls
|
185
|
+
end
|
186
|
+
@reducer
|
187
|
+
end
|
188
|
+
alias_method :reducer=, :reducer
|
189
|
+
|
190
|
+
# Sets the combiner class.
|
191
|
+
#
|
192
|
+
# The equivalent of calling `setCombinerClass` on a Hadoop job, but instead
|
193
|
+
# of a Java class you pass a Ruby class and Rubydoop will wrap it in a way
|
194
|
+
# that works with Hadoop.
|
195
|
+
#
|
196
|
+
# A combiner should implement `reduce`, just like reducers.
|
197
|
+
#
|
198
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setCombinerClass(java.lang.Class) Hadoop's Job#setCombinerClass
|
199
|
+
#
|
200
|
+
# @param [Class] cls The (Ruby) combiner class.
|
201
|
+
def combiner(cls=nil)
|
202
|
+
if cls
|
203
|
+
@job.configuration.set(COMBINER_KEY, cls.name)
|
204
|
+
@job.set_combiner_class(@context.proxy_class(:combiner))
|
205
|
+
@combiner = cls
|
206
|
+
end
|
207
|
+
@combiner
|
208
|
+
end
|
209
|
+
alias_method :combiner=, :combiner
|
210
|
+
|
211
|
+
# Sets a custom partitioner.
|
212
|
+
#
|
213
|
+
# The equivalent of calling `setPartitionerClass` on a Hadoop job, but instead
|
214
|
+
# of a Java class you pass a Ruby class and Rubydoop will wrap it in a way
|
215
|
+
# that works with Hadoop.
|
216
|
+
#
|
217
|
+
# The class must implement `partition`, which will be called exactly like
|
218
|
+
# a Java partitioner would.
|
219
|
+
#
|
220
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setPartitionerClass(java.lang.Class) Hadoop's Job#setPartitionerClass
|
221
|
+
#
|
222
|
+
# @param [Class] cls The (Ruby) partitioner class.
|
223
|
+
def partitioner(cls=nil)
|
224
|
+
if cls
|
225
|
+
@job.configuration.set(PARTITIONER_KEY, cls.name)
|
226
|
+
@job.set_partitioner_class(@context.proxy_class(:partitioner))
|
227
|
+
@partitioner = cls
|
228
|
+
end
|
229
|
+
@partitioner
|
230
|
+
end
|
231
|
+
alias_method :partitioner=, :partitioner
|
232
|
+
|
233
|
+
# Sets a custom grouping comparator.
|
234
|
+
#
|
235
|
+
# The equivalent of calling `setGroupingComparatorClass` on a Hadoop job,
|
236
|
+
# but instead of a Java class you pass a Ruby class and Rubydoop will wrap
|
237
|
+
# it in a way that works with Hadoop.
|
238
|
+
#
|
239
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setGroupingComparatorClass(java.lang.Class) Hadoop's Job#setGroupingComparatorClass
|
240
|
+
#
|
241
|
+
# @param [Class] cls The (Ruby) comparator class.
|
242
|
+
def grouping_comparator(cls=nil)
|
243
|
+
if cls
|
244
|
+
@job.configuration.set(GROUPING_COMPARATOR_KEY, cls.name)
|
245
|
+
@job.set_grouping_comparator_class(@context.proxy_class(:grouping_comparator))
|
246
|
+
@grouping_comparator = cls
|
247
|
+
end
|
248
|
+
@grouping_comparator
|
249
|
+
end
|
250
|
+
alias_method :grouping_comparator=, :grouping_comparator
|
251
|
+
|
252
|
+
# Sets a custom sort comparator.
|
253
|
+
#
|
254
|
+
# The equivalent of calling `setSortComparatorClass` on a Hadoop job,
|
255
|
+
# but instead of a Java class you pass a Ruby class and Rubydoop will wrap
|
256
|
+
# it in a way that works with Hadoop.
|
257
|
+
#
|
258
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setSortComparatorClass(java.lang.Class) Hadoop's Job#setSortComparatorClass
|
259
|
+
#
|
260
|
+
# @param [Class] cls The (Ruby) comparator class.
|
261
|
+
def sort_comparator(cls=nil)
|
262
|
+
if cls
|
263
|
+
@job.configuration.set(SORT_COMPARATOR_KEY, cls.name)
|
264
|
+
@job.set_sort_comparator_class(@context.proxy_class(:sort_comparator))
|
265
|
+
@sort_comparator = cls
|
266
|
+
end
|
267
|
+
@sort_comparator
|
268
|
+
end
|
269
|
+
alias_method :sort_comparator=, :sort_comparator
|
270
|
+
|
271
|
+
# If you need to manipulate the Hadoop job in some that isn't covered by
|
272
|
+
# this DSL, this is the method for you. It yields the `Job`, letting you
|
273
|
+
# do whatever you want with it.
|
274
|
+
#
|
275
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html Hadoop's Job
|
276
|
+
#
|
277
|
+
# @yieldparam [Hadoop::Mapreduce::Job] job The raw Hadoop Job instance
|
278
|
+
def raw(&block)
|
279
|
+
yield @job
|
280
|
+
end
|
281
|
+
|
282
|
+
private
|
283
|
+
|
284
|
+
def self.class_setter(dsl_name)
|
285
|
+
define_method(dsl_name) do |cls|
|
286
|
+
if cls
|
287
|
+
@job.send("set_#{dsl_name}_class", cls.java_class)
|
288
|
+
instance_variable_set(:"@#{dsl_name}", cls)
|
289
|
+
end
|
290
|
+
instance_variable_get(:"@#{dsl_name}")
|
291
|
+
end
|
292
|
+
define_method("#{dsl_name}=") do |cls|
|
293
|
+
@job.send("set_#{dsl_name}_class", cls.java_class)
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
public
|
298
|
+
|
299
|
+
# @!method map_output_key(cls)
|
300
|
+
#
|
301
|
+
# Sets the mapper's output key type.
|
302
|
+
#
|
303
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setMapOutputKeyClass(java.lang.Class) Hadoop's Job#setMapOutputKeyClass
|
304
|
+
#
|
305
|
+
# @param [Class] cls The mapper's output key type
|
306
|
+
class_setter :map_output_key
|
307
|
+
|
308
|
+
# @!method map_output_value(cls)
|
309
|
+
#
|
310
|
+
# Sets the mapper's output value type.
|
311
|
+
#
|
312
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setMapOutputValueClass(java.lang.Class) Hadoop's Job#setMapOutputValueClass
|
313
|
+
#
|
314
|
+
# @param [Class] cls The mapper's output value type
|
315
|
+
class_setter :map_output_value
|
316
|
+
|
317
|
+
# @!method output_key(cls)
|
318
|
+
#
|
319
|
+
# Sets the reducer's output key type.
|
320
|
+
#
|
321
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setOutputKeyClass(java.lang.Class) Hadoop's Job#setOutputKeyClass
|
322
|
+
#
|
323
|
+
# @param [Class] cls The reducer's output key type
|
324
|
+
class_setter :output_key
|
325
|
+
|
326
|
+
# @!method map_output_value(cls)
|
327
|
+
#
|
328
|
+
# Sets the reducer's output value type.
|
329
|
+
#
|
330
|
+
# @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setOutputValueClass(java.lang.Class) Job#setOutputValueClass
|
331
|
+
#
|
332
|
+
# @param [Class] cls The reducer's output value type
|
333
|
+
class_setter :output_value
|
334
|
+
end
|
335
|
+
|
336
|
+
# @private
|
337
|
+
class Context
|
338
|
+
attr_reader :jobs, :arguments
|
339
|
+
|
340
|
+
def initialize(conf, proxy_classes, arguments)
|
341
|
+
@conf = conf
|
342
|
+
@proxy_classes = proxy_classes
|
343
|
+
@arguments = arguments
|
344
|
+
@jobs = []
|
345
|
+
end
|
346
|
+
|
347
|
+
def create_job(name)
|
348
|
+
hadoop_job = Hadoop::Mapreduce::Job.new(@conf, name)
|
349
|
+
@jobs << hadoop_job
|
350
|
+
hadoop_job
|
351
|
+
end
|
352
|
+
|
353
|
+
def proxy_class(type)
|
354
|
+
@proxy_classes[type]
|
355
|
+
end
|
356
|
+
end
|
357
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'bundler'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'ant'
|
6
|
+
require 'fileutils'
|
7
|
+
require 'set'
|
8
|
+
|
9
|
+
|
10
|
+
module Rubydoop
|
11
|
+
# Utility for making a job JAR that works with Hadoop.
|
12
|
+
#
|
13
|
+
# @example Easy to use from Rake
|
14
|
+
# task :package do
|
15
|
+
# Rudoop::Package.create!
|
16
|
+
# end
|
17
|
+
class Package
|
18
|
+
# A package has sane defaults that works in most situations, but almost
|
19
|
+
# everything can be changed.
|
20
|
+
#
|
21
|
+
# If you have extra JAR files that you need to make available for your job
|
22
|
+
# you can specify them with the `:lib_jars` option.
|
23
|
+
#
|
24
|
+
# @param [Hash] options
|
25
|
+
# @option options [String] :project_base_dir The project's base dir, defaults to the current directory (the assumption is that Package will be used from a Rake task)
|
26
|
+
# @option options [String] :project_name The name of the JAR file (minus .jar), defaults to the directory name of the `:project_base_dir`
|
27
|
+
# @option options [String] :build_dir The directory to put the final JAR into, defaults to `:project_base_dir + '/build'`
|
28
|
+
# @option options [Array<String>] :gem_groups All gems from these Gemfile groups will be included, defaults to `[:default]` (the top-level group of a Gemfile)
|
29
|
+
# @option options [Array<String>] :lib_jars Paths to extra JAR files to include in the JAR's lib directory (where they will be on the classpath when the job is run)
|
30
|
+
# @option options [String] :jruby_version The JRuby version to package, defaults to `JRUBY_VERSION`
|
31
|
+
# @option options [String] :jruby_jar_path The path to a local copy of `jruby-complete.jar`, defaults to downloading and caching a version defined by `:jruby_version`
|
32
|
+
def initialize(options={})
|
33
|
+
@options = default_options.merge(options)
|
34
|
+
@options[:project_name] = File.basename(@options[:project_base_dir]) unless @options[:project_name]
|
35
|
+
@options[:build_dir] = File.join(@options[:project_base_dir], 'build') unless @options[:build_dir]
|
36
|
+
@options[:jruby_jar_path] = File.join(@options[:build_dir], "jruby-complete-#{@options[:jruby_version]}.jar") unless @options[:jruby_jar_path]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Create the JAR package, see {Package#initialize} for configuration options.
|
40
|
+
#
|
41
|
+
# On the first run a complete JRuby runtime JAR will be downloaded
|
42
|
+
# (`jruby-complete.jar`) and locally cached, but if you already have a
|
43
|
+
# copy in a local Ivy or Maven repository that will be used instead.
|
44
|
+
def create!
|
45
|
+
create_directories!
|
46
|
+
fetch_jruby!
|
47
|
+
build_jar!
|
48
|
+
end
|
49
|
+
|
50
|
+
# A shortcut for `Package.new(options).create!`.
|
51
|
+
def self.create!(options={})
|
52
|
+
new(options).create!
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def default_options
|
58
|
+
defaults = {
|
59
|
+
:main_class => 'rubydoop.RubydoopJobRunner',
|
60
|
+
:rubydoop_base_dir => File.expand_path('../../..', __FILE__),
|
61
|
+
:project_base_dir => Dir.getwd,
|
62
|
+
:gem_groups => [:default],
|
63
|
+
:lib_jars => [],
|
64
|
+
:jruby_version => JRUBY_VERSION
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
def create_directories!
|
69
|
+
FileUtils.mkdir_p(@options[:build_dir])
|
70
|
+
end
|
71
|
+
|
72
|
+
def fetch_jruby!
|
73
|
+
return if File.exists?(@options[:jruby_jar_path])
|
74
|
+
|
75
|
+
local_maven_path = File.expand_path("~/.m2/repository/org/jruby/jruby-complete/#{@options[:jruby_version]}/jruby-complete-#{@options[:jruby_version]}.jar")
|
76
|
+
local_ivy_path = File.expand_path("~/.ivy2/cache/org.jruby/jruby-complete/jars/jruby-complete-#{@options[:jruby_version]}.jar")
|
77
|
+
remote_maven_url = "http://central.maven.org/maven2/org/jruby/jruby-complete/#{@options[:jruby_version]}/jruby-complete-#{@options[:jruby_version]}.jar"
|
78
|
+
|
79
|
+
if File.exists?(local_maven_path)
|
80
|
+
$stderr.puts("Using #{File.basename(local_maven_path)} from local Maven cache")
|
81
|
+
@options[:jruby_jar_path] = local_maven_path
|
82
|
+
elsif File.exists?(local_ivy_path)
|
83
|
+
$stderr.puts("Using #{File.basename(local_maven_path)} from local Ivy2 cache")
|
84
|
+
@options[:jruby_jar_path] = local_ivy_path
|
85
|
+
else
|
86
|
+
$stderr.puts("Downloading #{remote_maven_url} to #{@options[:jruby_jar_path]}")
|
87
|
+
jruby_complete_bytes = open(remote_maven_url).read
|
88
|
+
File.open(@options[:jruby_jar_path], 'wb') do |io|
|
89
|
+
io.write(jruby_complete_bytes)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def build_jar!
|
95
|
+
# the ant block is instance_exec'ed so instance variables and methods are not in scope
|
96
|
+
options = @options
|
97
|
+
bundled_gems = load_path
|
98
|
+
lib_jars = [options[:jruby_jar_path], *options[:lib_jars]]
|
99
|
+
ant do
|
100
|
+
jar :destfile => "#{options[:build_dir]}/#{options[:project_name]}.jar" do
|
101
|
+
manifest { attribute :name => 'Main-Class', :value => options[:main_class] }
|
102
|
+
zipfileset :src => "#{options[:rubydoop_base_dir]}/lib/rubydoop.jar"
|
103
|
+
fileset :dir => "#{options[:rubydoop_base_dir]}/lib", :includes => '**/*.rb', :excludes => '*.jar'
|
104
|
+
fileset :dir => "#{options[:project_base_dir]}/lib"
|
105
|
+
bundled_gems.each { |path| fileset :dir => path }
|
106
|
+
lib_jars.each { |extra_jar| zipfileset :dir => File.dirname(extra_jar), :includes => File.basename(extra_jar), :prefix => 'lib' }
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def load_path
|
112
|
+
Bundler.definition.specs_for(@options[:gem_groups]).flat_map do |spec|
|
113
|
+
if spec.full_name !~ /^(?:bundler|rubydoop)-\d+/
|
114
|
+
spec.require_paths.map do |rp|
|
115
|
+
"#{spec.full_gem_path}/#{rp}"
|
116
|
+
end
|
117
|
+
else
|
118
|
+
[]
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
metadata
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rubydoop
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 1.0.0
|
6
|
+
platform: java
|
7
|
+
authors:
|
8
|
+
- Theo Hultberg
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-10-01 00:00:00.000000000Z
|
13
|
+
dependencies: []
|
14
|
+
description: Rubydoop embeds a JRuby runtime in Hadoop, letting you write map reduce code in Ruby without using the streaming APIs
|
15
|
+
email:
|
16
|
+
- theo@iconara.net
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- lib/hadoop.rb
|
22
|
+
- lib/rubydoop.rb
|
23
|
+
- lib/rubydoop/dsl.rb
|
24
|
+
- lib/rubydoop/package.rb
|
25
|
+
- lib/rubydoop/version.rb
|
26
|
+
- lib/rubydoop.jar
|
27
|
+
homepage: http://github.com/iconara/rubydoop
|
28
|
+
licenses: []
|
29
|
+
post_install_message:
|
30
|
+
rdoc_options: []
|
31
|
+
require_paths:
|
32
|
+
- lib
|
33
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
none: false
|
39
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
none: false
|
45
|
+
requirements: []
|
46
|
+
rubyforge_project: rubydoop
|
47
|
+
rubygems_version: 1.8.15
|
48
|
+
signing_key:
|
49
|
+
specification_version: 3
|
50
|
+
summary: Write Hadoop jobs in Ruby
|
51
|
+
test_files: []
|
52
|
+
has_rdoc:
|
53
|
+
...
|