rubydoop 0.0.5 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/hadoop.rb ADDED
@@ -0,0 +1,31 @@
1
+ # encoding: utf-8
2
+
3
+ require 'java'
4
+
5
+
6
+ # @private
7
+ module Hadoop
8
+ module Io
9
+ include_package 'org.apache.hadoop.io'
10
+ end
11
+
12
+ module Mapreduce
13
+ include_package 'org.apache.hadoop.mapreduce'
14
+
15
+ module Lib
16
+ include_package 'org.apache.hadoop.mapreduce.lib'
17
+
18
+ module Input
19
+ include_package 'org.apache.hadoop.mapreduce.lib.input'
20
+ end
21
+
22
+ module Output
23
+ include_package 'org.apache.hadoop.mapreduce.lib.output'
24
+ end
25
+ end
26
+ end
27
+
28
+ module Fs
29
+ include_package 'org.apache.hadoop.fs'
30
+ end
31
+ end
data/lib/rubydoop.jar ADDED
Binary file
data/lib/rubydoop.rb CHANGED
@@ -1,65 +1,58 @@
1
- def map(&proc)
2
- @map = proc
3
- end
1
+ # encoding: utf-8
4
2
 
5
- def reduce(&proc)
6
- @reduce = proc
7
- end
3
+ $LOAD_PATH << File.expand_path('..', __FILE__)
8
4
 
9
- def emit(key, value)
10
- puts [key, value].join("\t")
11
- end
12
5
 
13
- at_exit do
14
- HADOOP_HOME ||= (ENV['HADOOP_HOME'] || '/usr/local/hadoop') unless defined?(HADOOP_HOME)
15
- case ARGV.first
16
- when 'start'
17
- cmd = <<-EOC
18
- hadoop fs -rmr output
19
- hadoop jar #{HADOOP_HOME}/contrib/streaming/hadoop-*-streaming.jar\\
20
- -inputformat org.apache.hadoop.mapred.KeyValueTextInputFormat\\
21
- -output output -input input\\
22
- -file #{File.expand_path __FILE__} \\
23
- -file #{File.expand_path $0} \\
24
- -mapper "#{File.basename $0} map" \\
25
- -reducer "#{File.basename $0} reduce"
26
- EOC
27
- puts cmd
28
- exec cmd
29
- when 'map'
30
- while line = STDIN.gets
31
- if line =~ /^([^\t]+)\t(.+)$/
32
- @map.call $1, $2
33
- else
34
- @map.call line, nil
35
- end
36
- end
37
- when 'reduce'
38
- key, values = nil, []
39
- while line = STDIN.gets
40
- if line =~ /^([^\t]+)\t(.+)$/
41
- thiskey, thisvalue = $1, $2
42
- if key != thiskey && key
43
- @reduce.call key, values
44
- key, values = nil, []
45
- end
46
- key = thiskey
47
- values << thisvalue
48
- end
49
- end
50
- if values.any? && key
51
- @reduce.call key,values
52
- end
53
- when 'simulate'
54
- raise unless File.exists?(ARGV.last)
55
- exec "cat #{ARGV.last} | #{$0} map | sort | #{$0} reduce"
56
- else
57
- STDERR.puts <<-EOM
58
- Please run "#{$0} COMMAND", where COMMAND is one of the following:
59
- \tstart
60
- \tmap
61
- \treduce
62
- EOM
63
- exit -1
6
+ require 'hadoop'
7
+
8
+
9
+ # See {Rubydoop.configure} for the job configuration DSL documentation,
10
+ # {Package} for the packaging documentation, or the {file:README.md README}
11
+ # for a getting started guide.
12
+ module Rubydoop
13
+ # @private
14
+ def self.create_mapper(conf)
15
+ create_instance(conf.get(MAPPER_KEY))
16
+ end
17
+
18
+ # @private
19
+ def self.create_reducer(conf)
20
+ create_instance(conf.get(REDUCER_KEY))
21
+ end
22
+
23
+ # @private
24
+ def self.create_combiner(conf)
25
+ create_instance(conf.get(COMBINER_KEY))
26
+ end
27
+
28
+ # @private
29
+ def self.create_partitioner(conf)
30
+ create_instance(conf.get(PARTITIONER_KEY))
31
+ end
32
+
33
+ # @private
34
+ def self.create_grouping_comparator(conf)
35
+ create_instance(conf.get(GROUPING_COMPARATOR_KEY))
36
+ end
37
+
38
+ # @private
39
+ def self.create_sort_comparator(conf)
40
+ create_instance(conf.get(SORT_COMPARATOR_KEY))
41
+ end
42
+
43
+ private
44
+
45
+ MAPPER_KEY = 'rubydoop.mapper'.freeze
46
+ REDUCER_KEY = 'rubydoop.reducer'.freeze
47
+ COMBINER_KEY = 'rubydoop.combiner'.freeze
48
+ PARTITIONER_KEY = 'rubydoop.partitioner'.freeze
49
+ GROUPING_COMPARATOR_KEY = 'rubydoop.grouping_comparator'.freeze
50
+ SORT_COMPARATOR_KEY = 'rubydoop.sort_comparator'.freeze
51
+
52
+ def self.create_instance(const_path)
53
+ cls = const_path.split('::').reduce(Object) { |host, name| host.const_get(name) }
54
+ cls.new
64
55
  end
65
56
  end
57
+
58
+ require 'rubydoop/dsl'
@@ -0,0 +1,357 @@
1
+ # encoding: utf-8
2
+
3
+ module Rubydoop
4
+ # Main entrypoint into the configuration DSL.
5
+ #
6
+ # @example Configuring a job
7
+ #
8
+ # Rubydoop.configure do |*args|
9
+ # job 'word_count' do
10
+ # input args[0]
11
+ # output args[1]
12
+ #
13
+ # mapper WordCount::Mapper
14
+ # reducer WordCount::Mapper
15
+ #
16
+ # output_key Hadoop::Io::Text
17
+ # output_value Hadoop::Io::IntWritable
18
+ # end
19
+ # end
20
+ #
21
+ # Within a configure block you can specify one or more jobs, the `job`
22
+ # blocks are run in the context of a {JobDefinition} instance, so look
23
+ # at that class for documentation about the available properties. The
24
+ # `configure` block is run within the context of a {ConfigurationDefinition}
25
+ # instance. The arguments to the `configure` block is the command line
26
+ # arguments, minus those handled by Hadoop's `ToolRunner`.
27
+ #
28
+ # @yieldparam [Array<String>] *arguments The command line arguments
29
+ #
30
+ # @note The tool runner will set the global variable `$rubydoop_context`
31
+ # to an object that contains references to the necessary Hadoop
32
+ # configuration. Unless this global variable is set the configuration
33
+ # block is not run (this is a feature, it means that the configuration
34
+ # block doesn't run in mappers and reducers).
35
+ #
36
+ def self.configure(impl=ConfigurationDefinition, &block)
37
+ impl.new($rubydoop_context, &block) if $rubydoop_context
38
+ end
39
+
40
+ # Lower level API for configuring jobs.
41
+ #
42
+ # @example Configuring a job
43
+ #
44
+ # cc = ConfigurationDefinition.new
45
+ # cc.job 'word_count' do
46
+ # # same DSL as shown in the documentation for Rubydoop.configure
47
+ # end
48
+ #
49
+ class ConfigurationDefinition
50
+ def initialize(context=$rubydoop_context, &block)
51
+ @context = context
52
+ instance_exec(*arguments, &block) if @context && block_given?
53
+ end
54
+
55
+ def arguments
56
+ @context.arguments
57
+ end
58
+
59
+ def job(name, &block)
60
+ return nil unless @context
61
+ job = JobDefinition.new(@context, @context.create_job(name))
62
+ job.instance_exec(&block)
63
+ job
64
+ end
65
+ end
66
+
67
+ # Job configuration DSL.
68
+ #
69
+ # `Rubydoop.configure` blocks are run within the context of an instance of
70
+ # this class. These are the methods available in those blocks.
71
+ #
72
+ class JobDefinition
73
+ # @private
74
+ def initialize(context, job)
75
+ @context = context
76
+ @job = job
77
+ end
78
+
79
+ # Sets the input paths of the job.
80
+ #
81
+ # Calls `setInputFormatClass` on the Hadoop job and uses the static
82
+ # `setInputPaths` on the input format to set the job's input path.
83
+ #
84
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setInputFormatClass(java.lang.Class) Hadoop's Job#setInputFormatClass
85
+ #
86
+ # @param [String, Enumerable] paths The input paths, either a comma separated
87
+ # string or an `Enumerable` of strings (which will be joined with a comma).
88
+ # @param [Hash] options
89
+ # @option options [JavaClass] :format The input format to use, defaults to `TextInputFormat`
90
+ def input(paths, options={})
91
+ paths = paths.join(',') if paths.is_a?(Enumerable)
92
+ format = options[:format] || Hadoop::Mapreduce::Lib::Input::TextInputFormat
93
+ format.set_input_paths(@job, paths)
94
+ @job.set_input_format_class(format)
95
+ end
96
+
97
+ # Sets the output path of the job.
98
+ #
99
+ # Calls `setOutputFormatClass` on the Hadoop job and uses the static
100
+ # `setOutputPath` on the output format to set the job's output path.
101
+ #
102
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setOutputFormatClass(java.lang.Class) Hadoop's Job#setOutputFormatClass
103
+ #
104
+ # @param [String] dir The output path
105
+ # @param [Hash] options
106
+ # @option options [JavaClass] :format The output format to use, defaults to `TextOutputFormat`
107
+ def output(dir, options={})
108
+ format = options[:format] || Hadoop::Mapreduce::Lib::Output::TextOutputFormat
109
+ format.set_output_path(@job, Hadoop::Fs::Path.new(dir))
110
+ @job.set_output_format_class(format)
111
+ end
112
+
113
+ # Sets a job property.
114
+ #
115
+ # Calls `set`/`setBoolean`/`setLong`/`setFloat` on the Hadoop Job's
116
+ # configuration (exact method depends on the type of the value).
117
+ #
118
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String,%20java.lang.String) Hadoop's Configuration#set
119
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String,%20java.lang.String) Hadoop's Configuration#setBoolean
120
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String,%20java.lang.String) Hadoop's Configuration#setLong
121
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String,%20java.lang.String) Hadoop's Configuration#setFloat
122
+ #
123
+ # @param [String] property The property name
124
+ # @param [String, Numeric, Boolean] value The property value
125
+ def set(property, value)
126
+ case value
127
+ when Integer
128
+ @job.configuration.set_long(property, value)
129
+ when Float
130
+ @job.configuration.set_float(property, value)
131
+ when true, false
132
+ @job.configuration.set_boolean(property, value)
133
+ else
134
+ @job.configuration.set(property, value)
135
+ end
136
+ end
137
+
138
+ # Sets the mapper class.
139
+ #
140
+ # The equivalent of calling `setMapperClass` on a Hadoop job, but instead
141
+ # of a Java class you pass a Ruby class and Rubydoop will wrap it in a way
142
+ # that works with Hadoop.
143
+ #
144
+ # The class only needs to implement the method `map`, which will be called
145
+ # exactly like a Java mapper class' `map` method would be called.
146
+ #
147
+ # You can optionally implement `setup` and `cleanup`, which mirrors the
148
+ # methods of the same name in Java mappers.
149
+ #
150
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Mapper.html Hadoop's Mapper
151
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setMapperClass(java.lang.Class) Hadoop's Job#setMapperClass
152
+ #
153
+ # @param [Class] cls The (Ruby) mapper class.
154
+ def mapper(cls=nil)
155
+ if cls
156
+ @job.configuration.set(MAPPER_KEY, cls.name)
157
+ @job.set_mapper_class(@context.proxy_class(:mapper))
158
+ @mapper = cls
159
+ end
160
+ @mapper
161
+ end
162
+ alias_method :mapper=, :mapper
163
+
164
+ # Sets the reducer class.
165
+ #
166
+ # The equivalent of calling `setReducerClass` on a Hadoop job, but instead
167
+ # of a Java class you pass a Ruby class and Rubydoop will wrap it in a way
168
+ # that works with Hadoop.
169
+ #
170
+ # The class only needs to implement the method `reduce`, which will be called
171
+ # exactly like a Java reducer class' `reduce` method would be called.
172
+ #
173
+ # You can optionally implement `setup` and `cleanup`, which mirrors the
174
+ # methods of the same name in Java reducers.
175
+ #
176
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Reducer.html Hadoop's Reducer
177
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setReducerClass(java.lang.Class) Hadoop's Job#setReducerClass
178
+ #
179
+ # @param [Class] cls The (Ruby) reducer class.
180
+ def reducer(cls=nil)
181
+ if cls
182
+ @job.configuration.set(REDUCER_KEY, cls.name)
183
+ @job.set_reducer_class(@context.proxy_class(:reducer))
184
+ @reducer = cls
185
+ end
186
+ @reducer
187
+ end
188
+ alias_method :reducer=, :reducer
189
+
190
+ # Sets the combiner class.
191
+ #
192
+ # The equivalent of calling `setCombinerClass` on a Hadoop job, but instead
193
+ # of a Java class you pass a Ruby class and Rubydoop will wrap it in a way
194
+ # that works with Hadoop.
195
+ #
196
+ # A combiner should implement `reduce`, just like reducers.
197
+ #
198
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setCombinerClass(java.lang.Class) Hadoop's Job#setCombinerClass
199
+ #
200
+ # @param [Class] cls The (Ruby) combiner class.
201
+ def combiner(cls=nil)
202
+ if cls
203
+ @job.configuration.set(COMBINER_KEY, cls.name)
204
+ @job.set_combiner_class(@context.proxy_class(:combiner))
205
+ @combiner = cls
206
+ end
207
+ @combiner
208
+ end
209
+ alias_method :combiner=, :combiner
210
+
211
+ # Sets a custom partitioner.
212
+ #
213
+ # The equivalent of calling `setPartitionerClass` on a Hadoop job, but instead
214
+ # of a Java class you pass a Ruby class and Rubydoop will wrap it in a way
215
+ # that works with Hadoop.
216
+ #
217
+ # The class must implement `partition`, which will be called exactly like
218
+ # a Java partitioner would.
219
+ #
220
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setPartitionerClass(java.lang.Class) Hadoop's Job#setPartitionerClass
221
+ #
222
+ # @param [Class] cls The (Ruby) partitioner class.
223
+ def partitioner(cls=nil)
224
+ if cls
225
+ @job.configuration.set(PARTITIONER_KEY, cls.name)
226
+ @job.set_partitioner_class(@context.proxy_class(:partitioner))
227
+ @partitioner = cls
228
+ end
229
+ @partitioner
230
+ end
231
+ alias_method :partitioner=, :partitioner
232
+
233
+ # Sets a custom grouping comparator.
234
+ #
235
+ # The equivalent of calling `setGroupingComparatorClass` on a Hadoop job,
236
+ # but instead of a Java class you pass a Ruby class and Rubydoop will wrap
237
+ # it in a way that works with Hadoop.
238
+ #
239
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setGroupingComparatorClass(java.lang.Class) Hadoop's Job#setGroupingComparatorClass
240
+ #
241
+ # @param [Class] cls The (Ruby) comparator class.
242
+ def grouping_comparator(cls=nil)
243
+ if cls
244
+ @job.configuration.set(GROUPING_COMPARATOR_KEY, cls.name)
245
+ @job.set_grouping_comparator_class(@context.proxy_class(:grouping_comparator))
246
+ @grouping_comparator = cls
247
+ end
248
+ @grouping_comparator
249
+ end
250
+ alias_method :grouping_comparator=, :grouping_comparator
251
+
252
+ # Sets a custom sort comparator.
253
+ #
254
+ # The equivalent of calling `setSortComparatorClass` on a Hadoop job,
255
+ # but instead of a Java class you pass a Ruby class and Rubydoop will wrap
256
+ # it in a way that works with Hadoop.
257
+ #
258
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setSortComparatorClass(java.lang.Class) Hadoop's Job#setSortComparatorClass
259
+ #
260
+ # @param [Class] cls The (Ruby) comparator class.
261
+ def sort_comparator(cls=nil)
262
+ if cls
263
+ @job.configuration.set(SORT_COMPARATOR_KEY, cls.name)
264
+ @job.set_sort_comparator_class(@context.proxy_class(:sort_comparator))
265
+ @sort_comparator = cls
266
+ end
267
+ @sort_comparator
268
+ end
269
+ alias_method :sort_comparator=, :sort_comparator
270
+
271
+ # If you need to manipulate the Hadoop job in some that isn't covered by
272
+ # this DSL, this is the method for you. It yields the `Job`, letting you
273
+ # do whatever you want with it.
274
+ #
275
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html Hadoop's Job
276
+ #
277
+ # @yieldparam [Hadoop::Mapreduce::Job] job The raw Hadoop Job instance
278
+ def raw(&block)
279
+ yield @job
280
+ end
281
+
282
+ private
283
+
284
+ def self.class_setter(dsl_name)
285
+ define_method(dsl_name) do |cls|
286
+ if cls
287
+ @job.send("set_#{dsl_name}_class", cls.java_class)
288
+ instance_variable_set(:"@#{dsl_name}", cls)
289
+ end
290
+ instance_variable_get(:"@#{dsl_name}")
291
+ end
292
+ define_method("#{dsl_name}=") do |cls|
293
+ @job.send("set_#{dsl_name}_class", cls.java_class)
294
+ end
295
+ end
296
+
297
+ public
298
+
299
+ # @!method map_output_key(cls)
300
+ #
301
+ # Sets the mapper's output key type.
302
+ #
303
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setMapOutputKeyClass(java.lang.Class) Hadoop's Job#setMapOutputKeyClass
304
+ #
305
+ # @param [Class] cls The mapper's output key type
306
+ class_setter :map_output_key
307
+
308
+ # @!method map_output_value(cls)
309
+ #
310
+ # Sets the mapper's output value type.
311
+ #
312
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setMapOutputValueClass(java.lang.Class) Hadoop's Job#setMapOutputValueClass
313
+ #
314
+ # @param [Class] cls The mapper's output value type
315
+ class_setter :map_output_value
316
+
317
+ # @!method output_key(cls)
318
+ #
319
+ # Sets the reducer's output key type.
320
+ #
321
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setOutputKeyClass(java.lang.Class) Hadoop's Job#setOutputKeyClass
322
+ #
323
+ # @param [Class] cls The reducer's output key type
324
+ class_setter :output_key
325
+
326
+ # @!method map_output_value(cls)
327
+ #
328
+ # Sets the reducer's output value type.
329
+ #
330
+ # @see http://hadoop.apache.org/docs/r1.0.3/api/org/apache/hadoop/mapreduce/Job.html#setOutputValueClass(java.lang.Class) Job#setOutputValueClass
331
+ #
332
+ # @param [Class] cls The reducer's output value type
333
+ class_setter :output_value
334
+ end
335
+
336
+ # @private
337
+ class Context
338
+ attr_reader :jobs, :arguments
339
+
340
+ def initialize(conf, proxy_classes, arguments)
341
+ @conf = conf
342
+ @proxy_classes = proxy_classes
343
+ @arguments = arguments
344
+ @jobs = []
345
+ end
346
+
347
+ def create_job(name)
348
+ hadoop_job = Hadoop::Mapreduce::Job.new(@conf, name)
349
+ @jobs << hadoop_job
350
+ hadoop_job
351
+ end
352
+
353
+ def proxy_class(type)
354
+ @proxy_classes[type]
355
+ end
356
+ end
357
+ end
@@ -0,0 +1,129 @@
1
+ # encoding: utf-8
2
+
3
+ require 'bundler'
4
+ require 'open-uri'
5
+ require 'ant'
6
+ require 'fileutils'
7
+ require 'set'
8
+
9
+
10
+ module Rubydoop
11
+ # Utility for making a job JAR that works with Hadoop.
12
+ #
13
+ # @example Easy to use from Rake
14
+ # task :package do
15
+ # Rudoop::Package.create!
16
+ # end
17
+ class Package
18
+ # A package has sane defaults that works in most situations, but almost
19
+ # everything can be changed.
20
+ #
21
+ # If you have extra JAR files that you need to make available for your job
22
+ # you can specify them with the `:lib_jars` option.
23
+ #
24
+ # @param [Hash] options
25
+ # @option options [String] :project_base_dir The project's base dir, defaults to the current directory (the assumption is that Package will be used from a Rake task)
26
+ # @option options [String] :project_name The name of the JAR file (minus .jar), defaults to the directory name of the `:project_base_dir`
27
+ # @option options [String] :build_dir The directory to put the final JAR into, defaults to `:project_base_dir + '/build'`
28
+ # @option options [Array<String>] :gem_groups All gems from these Gemfile groups will be included, defaults to `[:default]` (the top-level group of a Gemfile)
29
+ # @option options [Array<String>] :lib_jars Paths to extra JAR files to include in the JAR's lib directory (where they will be on the classpath when the job is run)
30
+ # @option options [String] :jruby_version The JRuby version to package, defaults to `JRUBY_VERSION`
31
+ # @option options [String] :jruby_jar_path The path to a local copy of `jruby-complete.jar`, defaults to downloading and caching a version defined by `:jruby_version`
32
+ def initialize(options={})
33
+ @options = default_options.merge(options)
34
+ @options[:project_name] ||= File.basename(@options[:project_base_dir])
35
+ @options[:build_dir] ||= File.join(@options[:project_base_dir], 'build')
36
+ @options[:jruby_jar_path] ||= File.join(@options[:build_dir], "jruby-complete-#{@options[:jruby_version]}.jar")
37
+ @options[:jar_path] ||= File.join(@options[:build_dir], "#{@options[:project_name]}.jar")
38
+ end
39
+
40
+ # Create the JAR package, see {Package#initialize} for configuration options.
41
+ #
42
+ # On the first run a complete JRuby runtime JAR will be downloaded
43
+ # (`jruby-complete.jar`) and locally cached, but if you already have a
44
+ # copy in a local Ivy or Maven repository that will be used instead.
45
+ def create!
46
+ create_directories!
47
+ fetch_jruby!
48
+ build_jar!
49
+ end
50
+
51
+ # A shortcut for `Package.new(options).create!`.
52
+ def self.create!(options={})
53
+ new(options).create!
54
+ end
55
+
56
+ def respond_to?(name)
57
+ @options.key?(name) or super
58
+ end
59
+
60
+ def method_missing(name, *args)
61
+ @options[name] or super
62
+ end
63
+
64
+ private
65
+
66
+ def default_options
67
+ defaults = {
68
+ :main_class => 'rubydoop.RubydoopJobRunner',
69
+ :rubydoop_base_dir => File.expand_path('../../..', __FILE__),
70
+ :project_base_dir => Dir.getwd,
71
+ :gem_groups => [:default],
72
+ :lib_jars => [],
73
+ :jruby_version => JRUBY_VERSION
74
+ }
75
+ end
76
+
77
+ def create_directories!
78
+ FileUtils.mkdir_p(@options[:build_dir])
79
+ end
80
+
81
+ def fetch_jruby!
82
+ return if File.exists?(@options[:jruby_jar_path])
83
+
84
+ local_maven_path = File.expand_path("~/.m2/repository/org/jruby/jruby-complete/#{@options[:jruby_version]}/jruby-complete-#{@options[:jruby_version]}.jar")
85
+ local_ivy_path = File.expand_path("~/.ivy2/cache/org.jruby/jruby-complete/jars/jruby-complete-#{@options[:jruby_version]}.jar")
86
+ remote_maven_url = "http://central.maven.org/maven2/org/jruby/jruby-complete/#{@options[:jruby_version]}/jruby-complete-#{@options[:jruby_version]}.jar"
87
+
88
+ if File.exists?(local_maven_path)
89
+ @options[:jruby_jar_path] = local_maven_path
90
+ elsif File.exists?(local_ivy_path)
91
+ @options[:jruby_jar_path] = local_ivy_path
92
+ else
93
+ jruby_complete_bytes = open(remote_maven_url).read
94
+ File.open(@options[:jruby_jar_path], 'wb') do |io|
95
+ io.write(jruby_complete_bytes)
96
+ end
97
+ end
98
+ end
99
+
100
+ def build_jar!
101
+ # the ant block is instance_exec'ed so instance variables and methods are not in scope
102
+ options = @options
103
+ bundled_gems = load_path
104
+ lib_jars = [options[:jruby_jar_path], *options[:lib_jars]]
105
+ ant :output_level => 1 do
106
+ jar :destfile => options[:jar_path] do
107
+ manifest { attribute :name => 'Main-Class', :value => options[:main_class] }
108
+ zipfileset :src => "#{options[:rubydoop_base_dir]}/lib/rubydoop.jar"
109
+ fileset :dir => "#{options[:rubydoop_base_dir]}/lib", :includes => '**/*.rb', :excludes => '*.jar'
110
+ fileset :dir => "#{options[:project_base_dir]}/lib"
111
+ bundled_gems.each { |path| fileset :dir => path }
112
+ lib_jars.each { |extra_jar| zipfileset :dir => File.dirname(extra_jar), :includes => File.basename(extra_jar), :prefix => 'lib' }
113
+ end
114
+ end
115
+ end
116
+
117
+ def load_path
118
+ Bundler.definition.specs_for(@options[:gem_groups]).flat_map do |spec|
119
+ if spec.full_name !~ /^(?:bundler|rubydoop)-\d+/
120
+ spec.require_paths.map do |rp|
121
+ "#{spec.full_gem_path}/#{rp}"
122
+ end
123
+ else
124
+ []
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,4 @@
1
+ module Rubydoop
2
+ # @private
3
+ VERSION = '1.0.2'
4
+ end
metadata CHANGED
@@ -1,63 +1,53 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rubydoop
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 0
8
- - 5
9
- version: 0.0.5
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 1.0.2
10
6
  platform: ruby
11
- authors:
12
- - Brenden Grace
13
- autorequire:
7
+ authors:
8
+ - Theo Hultberg
9
+ autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2011-04-21 00:00:00 -04:00
18
- default_executable:
12
+ date: 2012-10-11 00:00:00.000000000 Z
19
13
  dependencies: []
20
-
21
- description: Simple Ruby Sugar for Hadoop Streaming
22
- email: brenden.grace@gmail.com
14
+ description: Rubydoop embeds a JRuby runtime in Hadoop, letting you write map reduce code in Ruby without using the streaming APIs
15
+ email:
16
+ - theo@iconara.net
23
17
  executables: []
24
-
25
18
  extensions: []
26
-
27
19
  extra_rdoc_files: []
28
-
29
- files:
30
- - lib/rubydoop/minitest.rb
20
+ files:
21
+ - lib/hadoop.rb
31
22
  - lib/rubydoop.rb
32
- has_rdoc: true
33
- homepage: https://github.com/bcg/rubydoop
23
+ - lib/rubydoop/dsl.rb
24
+ - lib/rubydoop/package.rb
25
+ - lib/rubydoop/version.rb
26
+ - lib/rubydoop.jar
27
+ homepage: http://github.com/iconara/rubydoop
34
28
  licenses: []
35
-
36
- post_install_message:
37
- rdoc_options:
38
- - --charset=UTF-8
39
- require_paths:
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
40
32
  - lib
41
- required_ruby_version: !ruby/object:Gem::Requirement
42
- requirements:
43
- - - ">="
44
- - !ruby/object:Gem::Version
45
- segments:
46
- - 0
47
- version: "0"
48
- required_rubygems_version: !ruby/object:Gem::Requirement
49
- requirements:
50
- - - ">="
51
- - !ruby/object:Gem::Version
52
- segments:
53
- - 0
54
- version: "0"
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ none: false
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ none: false
55
45
  requirements: []
56
-
57
46
  rubyforge_project: rubydoop
58
- rubygems_version: 1.3.6
59
- signing_key:
47
+ rubygems_version: 1.8.24
48
+ signing_key:
60
49
  specification_version: 3
61
- summary: Simple Ruby Sugar for Hadoop Streaming
50
+ summary: Write Hadoop jobs in Ruby
62
51
  test_files: []
63
-
52
+ has_rdoc:
53
+ ...
@@ -1,23 +0,0 @@
1
- require 'open3'
2
-
3
- class Rubydoop
4
- class MiniTest < MiniTest::Unit::TestCase
5
-
6
- def map(script, input)
7
- Open3.pipeline_rw("./#{script} map", "sort") do |in_io, out_io, wt|
8
- in_io.print input
9
- in_io.close
10
- out_io.readlines
11
- end
12
- end
13
-
14
- def mapreduce(script, input)
15
- Open3.pipeline_rw("./#{script} map", "sort", "./#{script} reduce") do |in_io, out_io, wt|
16
- in_io.print input
17
- in_io.close
18
- out_io.readlines
19
- end
20
- end
21
-
22
- end
23
- end