rubydoop 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/rubydoop.rb +60 -0
  2. data/lib/rubydoop/minitest.rb +23 -0
  3. metadata +65 -0
data/lib/rubydoop.rb ADDED
@@ -0,0 +1,60 @@
1
+ def map(&proc)
2
+ @map = proc
3
+ end
4
+
5
+ def reduce(&proc)
6
+ @reduce = proc
7
+ end
8
+
9
+ def emit(key, value)
10
+ puts [key, value].join("\t")
11
+ end
12
+
13
+ at_exit do
14
+ HADOOP_HOME ||= ENV['HADOOP_HOME'] || '/usr/local/hadoop'
15
+ case ARGV.first
16
+ when 'start'
17
+ cmd = <<-EOC
18
+ hadoop fs -rmr output
19
+ hadoop jar #{HADOOP_HOME}/contrib/streaming/hadoop-*-streaming.jar\\
20
+ -inputformat org.apache.hadoop.mapred.KeyValueTextInputFormat\\
21
+ -output output -input input\\
22
+ -file #{File.expand_path __FILE__} \\
23
+ -file #{File.expand_path $0} \\
24
+ -mapper "#{File.basename $0} map" \\
25
+ -reducer "#{File.basename $0} reduce"
26
+ EOC
27
+ puts cmd
28
+ exec cmd
29
+ when 'map'
30
+ while line = STDIN.gets
31
+ if line =~ /^([^\t]+)\t(.+)$/
32
+ @map.call $1, $2
33
+ end
34
+ end
35
+ when 'reduce'
36
+ key, values = nil, []
37
+ while line = STDIN.gets
38
+ if line =~ /^([^\t]+)\t(.+)$/
39
+ thiskey, thisvalue = $1, $2
40
+ if key != thiskey && key
41
+ @reduce.call key, values
42
+ key, values = nil, []
43
+ end
44
+ key = thiskey
45
+ values << thisvalue
46
+ end
47
+ end
48
+ when 'simulate'
49
+ raise unless File.exists?(ARGV.last)
50
+ exec "cat #{ARGV.last} | #{$0} map | sort | #{$0} reduce"
51
+ else
52
+ STDERR.puts <<-EOM
53
+ Please run "#{$0} COMMAND", where COMMAND is one of the following:
54
+ \tstart
55
+ \tmap
56
+ \treduce
57
+ EOM
58
+ exit -1
59
+ end
60
+ end
@@ -0,0 +1,23 @@
1
+ require 'open3'
2
+
3
+ class Rubydoop
4
+ class MiniTest < MiniTest::Unit::TestCase
5
+
6
+ def map(script, input)
7
+ Open3.pipeline_rw("./#{script} map", "sort") do |in_io, out_io, wt|
8
+ in_io.print input
9
+ in_io.close
10
+ out_io.readlines
11
+ end
12
+ end
13
+
14
+ def mapreduce(script, input)
15
+ Open3.pipeline_rw("./#{script} map", "sort", "./#{script} reduce") do |in_io, out_io, wt|
16
+ in_io.print input
17
+ in_io.close
18
+ out_io.readlines
19
+ end
20
+ end
21
+
22
+ end
23
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rubydoop
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 2
9
+ version: 0.0.2
10
+ platform: ruby
11
+ authors:
12
+ - Jacob Rothstein
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-04-21 00:00:00 -04:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: Simple Ruby Sugar for Hadoop Streaming
22
+ email: jbr@yakbarber.com
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files: []
28
+
29
+ files:
30
+ - lib/rubydoop/minitest.rb
31
+ - lib/rubydoop.rb
32
+ has_rdoc: true
33
+ homepage: https://github.com/jbr/rubydoop
34
+ licenses: []
35
+
36
+ post_install_message:
37
+ rdoc_options:
38
+ - --charset=UTF-8
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ segments:
47
+ - 0
48
+ version: "0"
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ segments:
55
+ - 0
56
+ version: "0"
57
+ requirements: []
58
+
59
+ rubyforge_project: rubydoop
60
+ rubygems_version: 1.3.7
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: Simple Ruby Sugar for Hadoop Streaming
64
+ test_files: []
65
+