rubydoop 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/rubydoop.rb +60 -0
  2. data/lib/rubydoop/minitest.rb +23 -0
  3. metadata +65 -0
data/lib/rubydoop.rb ADDED
@@ -0,0 +1,60 @@
1
+ def map(&proc)
2
+ @map = proc
3
+ end
4
+
5
+ def reduce(&proc)
6
+ @reduce = proc
7
+ end
8
+
9
+ def emit(key, value)
10
+ puts [key, value].join("\t")
11
+ end
12
+
13
+ at_exit do
14
+ HADOOP_HOME ||= ENV['HADOOP_HOME'] || '/usr/local/hadoop'
15
+ case ARGV.first
16
+ when 'start'
17
+ cmd = <<-EOC
18
+ hadoop fs -rmr output
19
+ hadoop jar #{HADOOP_HOME}/contrib/streaming/hadoop-*-streaming.jar\\
20
+ -inputformat org.apache.hadoop.mapred.KeyValueTextInputFormat\\
21
+ -output output -input input\\
22
+ -file #{File.expand_path __FILE__} \\
23
+ -file #{File.expand_path $0} \\
24
+ -mapper "#{File.basename $0} map" \\
25
+ -reducer "#{File.basename $0} reduce"
26
+ EOC
27
+ puts cmd
28
+ exec cmd
29
+ when 'map'
30
+ while line = STDIN.gets
31
+ if line =~ /^([^\t]+)\t(.+)$/
32
+ @map.call $1, $2
33
+ end
34
+ end
35
+ when 'reduce'
36
+ key, values = nil, []
37
+ while line = STDIN.gets
38
+ if line =~ /^([^\t]+)\t(.+)$/
39
+ thiskey, thisvalue = $1, $2
40
+ if key != thiskey && key
41
+ @reduce.call key, values
42
+ key, values = nil, []
43
+ end
44
+ key = thiskey
45
+ values << thisvalue
46
+ end
47
+ end
48
+ when 'simulate'
49
+ raise unless File.exists?(ARGV.last)
50
+ exec "cat #{ARGV.last} | #{$0} map | sort | #{$0} reduce"
51
+ else
52
+ STDERR.puts <<-EOM
53
+ Please run "#{$0} COMMAND", where COMMAND is one of the following:
54
+ \tstart
55
+ \tmap
56
+ \treduce
57
+ EOM
58
+ exit -1
59
+ end
60
+ end
@@ -0,0 +1,23 @@
1
+ require 'open3'
2
+
3
+ class Rubydoop
4
+ class MiniTest < MiniTest::Unit::TestCase
5
+
6
+ def map(script, input)
7
+ Open3.pipeline_rw("./#{script} map", "sort") do |in_io, out_io, wt|
8
+ in_io.print input
9
+ in_io.close
10
+ out_io.readlines
11
+ end
12
+ end
13
+
14
+ def mapreduce(script, input)
15
+ Open3.pipeline_rw("./#{script} map", "sort", "./#{script} reduce") do |in_io, out_io, wt|
16
+ in_io.print input
17
+ in_io.close
18
+ out_io.readlines
19
+ end
20
+ end
21
+
22
+ end
23
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rubydoop
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 2
9
+ version: 0.0.2
10
+ platform: ruby
11
+ authors:
12
+ - Jacob Rothstein
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-04-21 00:00:00 -04:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: Simple Ruby Sugar for Hadoop Streaming
22
+ email: jbr@yakbarber.com
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files: []
28
+
29
+ files:
30
+ - lib/rubydoop/minitest.rb
31
+ - lib/rubydoop.rb
32
+ has_rdoc: true
33
+ homepage: https://github.com/jbr/rubydoop
34
+ licenses: []
35
+
36
+ post_install_message:
37
+ rdoc_options:
38
+ - --charset=UTF-8
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ segments:
47
+ - 0
48
+ version: "0"
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ segments:
55
+ - 0
56
+ version: "0"
57
+ requirements: []
58
+
59
+ rubyforge_project: rubydoop
60
+ rubygems_version: 1.3.7
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: Simple Ruby Sugar for Hadoop Streaming
64
+ test_files: []
65
+