rubydoop 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rubydoop.rb +60 -0
- data/lib/rubydoop/minitest.rb +23 -0
- metadata +65 -0
data/lib/rubydoop.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
def map(&proc)
|
2
|
+
@map = proc
|
3
|
+
end
|
4
|
+
|
5
|
+
def reduce(&proc)
|
6
|
+
@reduce = proc
|
7
|
+
end
|
8
|
+
|
9
|
+
def emit(key, value)
|
10
|
+
puts [key, value].join("\t")
|
11
|
+
end
|
12
|
+
|
13
|
+
at_exit do
|
14
|
+
HADOOP_HOME ||= ENV['HADOOP_HOME'] || '/usr/local/hadoop'
|
15
|
+
case ARGV.first
|
16
|
+
when 'start'
|
17
|
+
cmd = <<-EOC
|
18
|
+
hadoop fs -rmr output
|
19
|
+
hadoop jar #{HADOOP_HOME}/contrib/streaming/hadoop-*-streaming.jar\\
|
20
|
+
-inputformat org.apache.hadoop.mapred.KeyValueTextInputFormat\\
|
21
|
+
-output output -input input\\
|
22
|
+
-file #{File.expand_path __FILE__} \\
|
23
|
+
-file #{File.expand_path $0} \\
|
24
|
+
-mapper "#{File.basename $0} map" \\
|
25
|
+
-reducer "#{File.basename $0} reduce"
|
26
|
+
EOC
|
27
|
+
puts cmd
|
28
|
+
exec cmd
|
29
|
+
when 'map'
|
30
|
+
while line = STDIN.gets
|
31
|
+
if line =~ /^([^\t]+)\t(.+)$/
|
32
|
+
@map.call $1, $2
|
33
|
+
end
|
34
|
+
end
|
35
|
+
when 'reduce'
|
36
|
+
key, values = nil, []
|
37
|
+
while line = STDIN.gets
|
38
|
+
if line =~ /^([^\t]+)\t(.+)$/
|
39
|
+
thiskey, thisvalue = $1, $2
|
40
|
+
if key != thiskey && key
|
41
|
+
@reduce.call key, values
|
42
|
+
key, values = nil, []
|
43
|
+
end
|
44
|
+
key = thiskey
|
45
|
+
values << thisvalue
|
46
|
+
end
|
47
|
+
end
|
48
|
+
when 'simulate'
|
49
|
+
raise unless File.exists?(ARGV.last)
|
50
|
+
exec "cat #{ARGV.last} | #{$0} map | sort | #{$0} reduce"
|
51
|
+
else
|
52
|
+
STDERR.puts <<-EOM
|
53
|
+
Please run "#{$0} COMMAND", where COMMAND is one of the following:
|
54
|
+
\tstart
|
55
|
+
\tmap
|
56
|
+
\treduce
|
57
|
+
EOM
|
58
|
+
exit -1
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'open3'
|
2
|
+
|
3
|
+
class Rubydoop
|
4
|
+
class MiniTest < MiniTest::Unit::TestCase
|
5
|
+
|
6
|
+
def map(script, input)
|
7
|
+
Open3.pipeline_rw("./#{script} map", "sort") do |in_io, out_io, wt|
|
8
|
+
in_io.print input
|
9
|
+
in_io.close
|
10
|
+
out_io.readlines
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def mapreduce(script, input)
|
15
|
+
Open3.pipeline_rw("./#{script} map", "sort", "./#{script} reduce") do |in_io, out_io, wt|
|
16
|
+
in_io.print input
|
17
|
+
in_io.close
|
18
|
+
out_io.readlines
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rubydoop
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
version: 0.0.2
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Jacob Rothstein
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-04-21 00:00:00 -04:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: Simple Ruby Sugar for Hadoop Streaming
|
22
|
+
email: jbr@yakbarber.com
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files: []
|
28
|
+
|
29
|
+
files:
|
30
|
+
- lib/rubydoop/minitest.rb
|
31
|
+
- lib/rubydoop.rb
|
32
|
+
has_rdoc: true
|
33
|
+
homepage: https://github.com/jbr/rubydoop
|
34
|
+
licenses: []
|
35
|
+
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options:
|
38
|
+
- --charset=UTF-8
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
segments:
|
47
|
+
- 0
|
48
|
+
version: "0"
|
49
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
segments:
|
55
|
+
- 0
|
56
|
+
version: "0"
|
57
|
+
requirements: []
|
58
|
+
|
59
|
+
rubyforge_project: rubydoop
|
60
|
+
rubygems_version: 1.3.7
|
61
|
+
signing_key:
|
62
|
+
specification_version: 3
|
63
|
+
summary: Simple Ruby Sugar for Hadoop Streaming
|
64
|
+
test_files: []
|
65
|
+
|