mandy 0.2.14
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +17 -0
- data/bin/mandy +21 -0
- data/bin/mandy-get +29 -0
- data/bin/mandy-hadoop +74 -0
- data/bin/mandy-install +11 -0
- data/bin/mandy-local +27 -0
- data/bin/mandy-map +24 -0
- data/bin/mandy-put +28 -0
- data/bin/mandy-reduce +24 -0
- data/bin/mandy-rm +23 -0
- data/lib/dsl.rb +10 -0
- data/lib/job.rb +92 -0
- data/lib/mandy.rb +35 -0
- data/lib/mappers/base_mapper.rb +30 -0
- data/lib/mappers/pass_through_mapper.rb +10 -0
- data/lib/mappers/transpose_mapper.rb +10 -0
- data/lib/packer.rb +25 -0
- data/lib/reducers/base_reducer.rb +36 -0
- data/lib/reducers/max_reducer.rb +10 -0
- data/lib/reducers/min_reducer.rb +10 -0
- data/lib/reducers/pass_through_reducer.rb +9 -0
- data/lib/reducers/sum_reducer.rb +9 -0
- data/lib/reducers/transpose_reducer.rb +9 -0
- data/lib/ruby-hbase.rb +10 -0
- data/lib/ruby-hbase/hbase_table.rb +166 -0
- data/lib/ruby-hbase/scanner.rb +55 -0
- data/lib/ruby-hbase/version.rb +9 -0
- data/lib/ruby-hbase/xml_decoder.rb +18 -0
- data/lib/serializers/json.rb +13 -0
- data/lib/stores/hbase.rb +24 -0
- data/lib/stores/in_memory.rb +24 -0
- data/lib/support/array_serializer.rb +32 -0
- data/lib/support/formatting.rb +27 -0
- data/lib/support/tuple.rb +40 -0
- data/lib/task.rb +83 -0
- data/lib/test_runner.rb +75 -0
- data/readme.md +11 -0
- metadata +97 -0
data/Rakefile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require "rake"
|
3
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'lib', 'mandy'))
|
4
|
+
require 'spec/rake/spectask'
|
5
|
+
|
6
|
+
task :default => :spec
|
7
|
+
|
8
|
+
Spec::Rake::SpecTask.new(:spec) do |t|
|
9
|
+
t.spec_files = FileList['spec/lib/**/*_spec.rb']
|
10
|
+
t.spec_opts = %w{-f s -c -L mtime}
|
11
|
+
end
|
12
|
+
|
13
|
+
task :gem do
|
14
|
+
`sudo gem build mandy.gemspec`
|
15
|
+
`mkdir pkg; mv mandy-*.gem pkg/`
|
16
|
+
`sudo gem install pkg/mandy-*.gem`
|
17
|
+
end
|
data/bin/mandy
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "rubygems"
|
4
|
+
require "mandy"
|
5
|
+
|
6
|
+
puts "\nYou are running Mandy!"
|
7
|
+
puts "Here are the commands at your disposal..."
|
8
|
+
puts ''
|
9
|
+
|
10
|
+
{
|
11
|
+
'mandy-install' => 'Installs the Mandy Rubygem on several hosts via ssh.',
|
12
|
+
'mandy-local' => 'Run a Map/Reduce task locally without requiring hadoop',
|
13
|
+
'mandy-hadoop' => 'Run a Map/Reduce task on hadoop using the provided cluster config',
|
14
|
+
'mandy-rm' => 'remove a file or directory from HDFS',
|
15
|
+
'mandy-put' => 'upload a file into HDFS',
|
16
|
+
'mandy-map' => 'Run a map task reading on STDIN and writing to STDOUT',
|
17
|
+
'mandy-reduce' => 'Run a reduce task reading on STDIN and writing to STDOUT'
|
18
|
+
}.each do |command, description|
|
19
|
+
|
20
|
+
puts "#{command.ljust(15)} #{description}"
|
21
|
+
end
|
data/bin/mandy-get
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
options = OpenStruct.new
|
6
|
+
|
7
|
+
OptionParser.new do |opts|
|
8
|
+
opts.banner = "USAGE: mandy-get hdfs_file_location local_file_destination [options]"
|
9
|
+
|
10
|
+
opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
|
11
|
+
options.config = config
|
12
|
+
end
|
13
|
+
|
14
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
15
|
+
puts opts
|
16
|
+
exit
|
17
|
+
end
|
18
|
+
end.parse!
|
19
|
+
|
20
|
+
|
21
|
+
def absolute_path(path)
|
22
|
+
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
23
|
+
end
|
24
|
+
|
25
|
+
remote_file = ARGV[0]
|
26
|
+
local_file = ARGV[1]
|
27
|
+
config = absolute_path(options.config || 'cluster.xml')
|
28
|
+
|
29
|
+
`$HADOOP_HOME/bin/hadoop fs -conf #{config} -getmerge #{remote_file} #{local_file}`
|
data/bin/mandy-hadoop
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "rubygems"
|
3
|
+
require "mandy"
|
4
|
+
require 'optparse'
|
5
|
+
require 'ostruct'
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
|
10
|
+
OptionParser.new do |opts|
|
11
|
+
opts.banner = "USAGE: mandy-hadoop script input output [options]"
|
12
|
+
|
13
|
+
opts.on("-p", "--payload PAYLOAD", "Add a working directory to be sent to the cluster.") do |payload|
|
14
|
+
options.payload = payload
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
|
18
|
+
options.config = config
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on("-v", '--variables name=value', "Pass additional parameters to jobs") do |config|
|
22
|
+
options.cmdenv = config
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("-j", '--json {"key":"1 value"}', "Pass JSON encoded parameters to jobs") do |config|
|
26
|
+
options.cmdenv = "json=#{URI.encode(config)}"
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
30
|
+
puts opts
|
31
|
+
exit
|
32
|
+
end
|
33
|
+
end.parse!
|
34
|
+
|
35
|
+
def absolute_path(path)
|
36
|
+
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
37
|
+
end
|
38
|
+
|
39
|
+
file = ARGV[0]
|
40
|
+
filename = File.basename(file)
|
41
|
+
input = ARGV[1]
|
42
|
+
output_folder = ARGV[2]
|
43
|
+
config = options.config || 'cluster.xml'
|
44
|
+
payload = options.payload ? Mandy::Packer.pack(options.payload) : ARGV[0]
|
45
|
+
cmdenv = options.cmdenv
|
46
|
+
|
47
|
+
at_exit { Mandy::Packer.cleanup!(payload) }
|
48
|
+
|
49
|
+
require absolute_path(file)
|
50
|
+
|
51
|
+
output = nil
|
52
|
+
|
53
|
+
Mandy::Job.jobs.each_with_index do |job, i|
|
54
|
+
|
55
|
+
jobconf = job.settings.map { |key, value| %(-D #{key}='#{value}') }.join(' ')
|
56
|
+
output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
|
57
|
+
|
58
|
+
command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
|
59
|
+
-conf '#{config}' \
|
60
|
+
-input "#{input}" \
|
61
|
+
-mapper "mandy-map #{filename} '#{job.name}' #{File.basename(payload)}" \
|
62
|
+
-reducer "mandy-reduce #{filename} '#{job.name}' #{File.basename(payload)}" \
|
63
|
+
-file "#{payload}" \
|
64
|
+
-cmdenv #{cmdenv} \
|
65
|
+
-output "#{output}")
|
66
|
+
|
67
|
+
`#{command}`
|
68
|
+
|
69
|
+
# puts "#{command}"
|
70
|
+
input = output
|
71
|
+
end
|
72
|
+
|
73
|
+
# print out the output location so caller can know where to get the results from
|
74
|
+
puts output
|
data/bin/mandy-install
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
if ARGV.size==0
|
4
|
+
puts "USAGE: mandy-install user@server1.com [user@server2.com ...]"
|
5
|
+
exit
|
6
|
+
end
|
7
|
+
|
8
|
+
ARGV.each do |server|
|
9
|
+
puts "Installing on #{server}..."
|
10
|
+
system %(ssh #{server} "sudo gem install trafficbroker-mandy --source http://gems.github.com")
|
11
|
+
end
|
data/bin/mandy-local
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
if ARGV.size==0
|
4
|
+
puts "USAGE: mandy-local my_script.rb local_input_file local_output_folder"
|
5
|
+
exit
|
6
|
+
end
|
7
|
+
|
8
|
+
require "fileutils"
|
9
|
+
|
10
|
+
def absolute_path(path)
|
11
|
+
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
12
|
+
end
|
13
|
+
|
14
|
+
file = absolute_path(ARGV[0])
|
15
|
+
input = absolute_path(ARGV[1])
|
16
|
+
output_folder = FileUtils.mkdir_p(absolute_path(ARGV[2]))
|
17
|
+
require file
|
18
|
+
|
19
|
+
out = nil
|
20
|
+
Mandy::Job.jobs.each_with_index do |job, i|
|
21
|
+
out = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
|
22
|
+
puts "Running #{job.name}..."
|
23
|
+
`cat #{input} | mandy-map #{file} "#{job.name}" | sort | mandy-reduce #{file} "#{job.name}" > #{out}`
|
24
|
+
input = out
|
25
|
+
end
|
26
|
+
|
27
|
+
puts out
|
data/bin/mandy-map
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "rubygems"
|
3
|
+
require "mandy"
|
4
|
+
|
5
|
+
if ARGV.size==0
|
6
|
+
puts "USAGE: mandy-map my_script.rb 'Job Name' [payload]"
|
7
|
+
exit
|
8
|
+
end
|
9
|
+
|
10
|
+
if ARGV.size > 2
|
11
|
+
payload = ARGV[2]
|
12
|
+
Mandy::Packer.unpack(payload)
|
13
|
+
end
|
14
|
+
|
15
|
+
def absolute_path(path)
|
16
|
+
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
17
|
+
end
|
18
|
+
|
19
|
+
file = absolute_path(ARGV[0])
|
20
|
+
job_name = ARGV[1]
|
21
|
+
|
22
|
+
require file
|
23
|
+
|
24
|
+
Mandy::Job.find_by_name(job_name).run_map
|
data/bin/mandy-put
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
options = OpenStruct.new
|
6
|
+
|
7
|
+
OptionParser.new do |opts|
|
8
|
+
opts.banner = "USAGE: mandy-put local_file_or_folder hdfs_destination_location [options]"
|
9
|
+
|
10
|
+
opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
|
11
|
+
options.config = config
|
12
|
+
end
|
13
|
+
|
14
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
15
|
+
puts opts
|
16
|
+
exit
|
17
|
+
end
|
18
|
+
end.parse!
|
19
|
+
|
20
|
+
def absolute_path(path)
|
21
|
+
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
22
|
+
end
|
23
|
+
|
24
|
+
source = absolute_path(ARGV[0])
|
25
|
+
dest = ARGV[1]
|
26
|
+
config = options.config || 'cluster.xml'
|
27
|
+
|
28
|
+
`$HADOOP_HOME/bin/hadoop fs -conf #{config} -copyFromLocal #{source} #{dest}`
|
data/bin/mandy-reduce
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "rubygems"
|
3
|
+
require "mandy"
|
4
|
+
|
5
|
+
if ARGV.size==0
|
6
|
+
puts "USAGE: mandy-reduce my_script.rb 'Job Name' [payload]"
|
7
|
+
exit
|
8
|
+
end
|
9
|
+
|
10
|
+
def absolute_path(path)
|
11
|
+
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
12
|
+
end
|
13
|
+
|
14
|
+
if ARGV.size > 2
|
15
|
+
payload = ARGV[2]
|
16
|
+
Mandy::Packer.unpack(payload)
|
17
|
+
end
|
18
|
+
|
19
|
+
file = absolute_path(ARGV[0])
|
20
|
+
job_name = ARGV[1]
|
21
|
+
|
22
|
+
require file
|
23
|
+
|
24
|
+
Mandy::Job.find_by_name(job_name).run_reduce
|
data/bin/mandy-rm
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
options = OpenStruct.new
|
6
|
+
|
7
|
+
OptionParser.new do |opts|
|
8
|
+
opts.banner = "USAGE: mandy-rm file_or_folder_on_hdfs [options]"
|
9
|
+
|
10
|
+
opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
|
11
|
+
options.config = config
|
12
|
+
end
|
13
|
+
|
14
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
15
|
+
puts opts
|
16
|
+
exit
|
17
|
+
end
|
18
|
+
end.parse!
|
19
|
+
|
20
|
+
file = ARGV[0]
|
21
|
+
config = options.config || 'cluster.xml'
|
22
|
+
|
23
|
+
`$HADOOP_HOME/bin/hadoop fs -conf #{config} -rmr #{file}`
|
data/lib/dsl.rb
ADDED
data/lib/job.rb
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
module Mandy
|
2
|
+
class Job
|
3
|
+
class << self
|
4
|
+
def jobs
|
5
|
+
@jobs ||= []
|
6
|
+
end
|
7
|
+
|
8
|
+
def find_by_name(name)
|
9
|
+
jobs.find {|job| job.name == name }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_reader :settings
|
14
|
+
attr_reader :name
|
15
|
+
|
16
|
+
def initialize(name, &blk)
|
17
|
+
@name = name
|
18
|
+
@settings = {}
|
19
|
+
@modules = []
|
20
|
+
@mapper_class = Mandy::Mappers::PassThroughMapper
|
21
|
+
@reducer_class = Mandy::Reducers::PassThroughReducer
|
22
|
+
set('mapred.job.name', name)
|
23
|
+
instance_eval(&blk) if blk
|
24
|
+
end
|
25
|
+
|
26
|
+
def mixin(*modules)
|
27
|
+
modules.each {|m| @modules << m}
|
28
|
+
end
|
29
|
+
alias_method :serialize, :mixin
|
30
|
+
|
31
|
+
def input_format(format)
|
32
|
+
@input_format = format
|
33
|
+
end
|
34
|
+
|
35
|
+
def output_format(format)
|
36
|
+
@output_format = format
|
37
|
+
end
|
38
|
+
|
39
|
+
def set(key, value)
|
40
|
+
@settings[key.to_s] = value.to_s
|
41
|
+
end
|
42
|
+
|
43
|
+
def map_tasks(count)
|
44
|
+
set('mapred.map.tasks', count)
|
45
|
+
end
|
46
|
+
|
47
|
+
def reduce_tasks(count)
|
48
|
+
set('mapred.reduce.tasks', count)
|
49
|
+
end
|
50
|
+
|
51
|
+
def store(type, name, options={})
|
52
|
+
Mandy.stores[name] = case type
|
53
|
+
when :hbase
|
54
|
+
Stores::HBase.new(options)
|
55
|
+
else
|
56
|
+
raise "Unknown store type #{type}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def map(klass=nil, &blk)
|
61
|
+
@mapper_class = klass || Mandy::Mappers::Base.compile(&blk)
|
62
|
+
@modules.each {|m| @mapper_class.send(:include, m) }
|
63
|
+
@mapper_class
|
64
|
+
end
|
65
|
+
|
66
|
+
def reduce(klass=nil, &blk)
|
67
|
+
@reducer_class = klass || Mandy::Reducers::Base.compile(&blk)
|
68
|
+
@modules.each {|m| @reducer_class.send(:include, m) }
|
69
|
+
@reducer_class
|
70
|
+
end
|
71
|
+
|
72
|
+
def run_map(input=STDIN, output=STDOUT, &blk)
|
73
|
+
@mapper_class.send(:include, Mandy::IO::OutputFormatting) unless reducer_defined?
|
74
|
+
mapper = @mapper_class.new(input, output, @input_format, @output_format)
|
75
|
+
yield(mapper) if blk
|
76
|
+
mapper.execute
|
77
|
+
end
|
78
|
+
|
79
|
+
def run_reduce(input=STDIN, output=STDOUT, &blk)
|
80
|
+
reducer = @reducer_class.new(input, output, @input_format, @output_format)
|
81
|
+
yield(reducer) if blk
|
82
|
+
reducer.execute
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def reducer_defined?
|
88
|
+
@reducer_class != Mandy::Reducers::PassThroughReducer
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
end
|
data/lib/mandy.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "json"
|
3
|
+
require "uri"
|
4
|
+
|
5
|
+
%w(
|
6
|
+
support/formatting
|
7
|
+
task
|
8
|
+
dsl
|
9
|
+
job
|
10
|
+
packer
|
11
|
+
support/tuple
|
12
|
+
support/array_serializer
|
13
|
+
mappers/base_mapper
|
14
|
+
mappers/transpose_mapper
|
15
|
+
mappers/pass_through_mapper
|
16
|
+
reducers/base_reducer
|
17
|
+
reducers/pass_through_reducer
|
18
|
+
reducers/sum_reducer
|
19
|
+
reducers/max_reducer
|
20
|
+
reducers/min_reducer
|
21
|
+
reducers/transpose_reducer
|
22
|
+
serializers/json
|
23
|
+
stores/hbase
|
24
|
+
stores/in_memory
|
25
|
+
test_runner
|
26
|
+
ruby-hbase
|
27
|
+
).each {|file| require File.join(File.dirname(__FILE__), file) }
|
28
|
+
|
29
|
+
module Mandy
|
30
|
+
class << self
|
31
|
+
def stores
|
32
|
+
@stores||={}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Mappers
|
3
|
+
class Base < Mandy::Task
|
4
|
+
include Mandy::IO::InputFormatting
|
5
|
+
|
6
|
+
def self.compile(&blk)
|
7
|
+
Class.new(Mandy::Mappers::Base) do
|
8
|
+
self.class_eval do
|
9
|
+
define_method(:mapper, blk) if blk
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def execute
|
15
|
+
@input.each_line do |line|
|
16
|
+
key, value = line.split(KEY_VALUE_SEPERATOR, 2)
|
17
|
+
key, value = nil, key if value.nil?
|
18
|
+
value.chomp!
|
19
|
+
mapper(input_deserialize_key(key), input_deserialize_value(value))
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def mapper(key,value)
|
26
|
+
#nil
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/packer.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require "fileutils"
|
2
|
+
|
3
|
+
module Mandy
|
4
|
+
class Packer
|
5
|
+
TMP_DIR = '/tmp/mandy'
|
6
|
+
|
7
|
+
def self.pack(dir)
|
8
|
+
return dir if File.file?(dir)
|
9
|
+
FileUtils.mkdir_p(TMP_DIR)
|
10
|
+
tmp_path = "#{TMP_DIR}/packed-job-#{Time.now.to_i}.tar"
|
11
|
+
Dir.chdir(dir) { `tar -cf #{tmp_path} *` }
|
12
|
+
tmp_path
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.unpack(file)
|
16
|
+
return false unless File.extname(file) == '.tar'
|
17
|
+
`tar -xf #{file}`
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.cleanup!(file)
|
21
|
+
return false unless File.extname(file) == '.tar'
|
22
|
+
`rm #{file}`
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Reducers
|
3
|
+
class Base < Mandy::Task
|
4
|
+
include Mandy::IO::OutputFormatting
|
5
|
+
|
6
|
+
def self.compile(&blk)
|
7
|
+
Class.new(Mandy::Reducers::Base) do
|
8
|
+
self.class_eval do
|
9
|
+
define_method(:reducer, blk) if blk
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def execute
|
15
|
+
last_key, values = nil, []
|
16
|
+
@input.each_line do |line|
|
17
|
+
key, value = line.split(KEY_VALUE_SEPERATOR)
|
18
|
+
value.chomp!
|
19
|
+
last_key = key if last_key.nil?
|
20
|
+
if key != last_key
|
21
|
+
reducer(last_key, values)
|
22
|
+
last_key, values = key, []
|
23
|
+
end
|
24
|
+
values << value
|
25
|
+
end
|
26
|
+
reducer(deserialize_key(last_key), values.map {|v| deserialize_value(v) })
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def reducer(key,values)
|
32
|
+
#nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/ruby-hbase.rb
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
module HBase
|
2
|
+
class RowNotFound < Exception
|
3
|
+
def initialize(msg=nil)
|
4
|
+
super
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
class HTable
|
9
|
+
include XmlDecoder
|
10
|
+
|
11
|
+
def initialize(table_uri)
|
12
|
+
@table_uri = table_uri
|
13
|
+
|
14
|
+
@uri = URI.parse(table_uri)
|
15
|
+
|
16
|
+
@host, @table_name = @uri.host, @uri.path.split("/").last
|
17
|
+
end
|
18
|
+
|
19
|
+
def name
|
20
|
+
@table_name
|
21
|
+
end
|
22
|
+
|
23
|
+
######################
|
24
|
+
# Meta-type requests
|
25
|
+
|
26
|
+
def start_keys
|
27
|
+
raise NotImplementedError
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def column_descriptors
|
32
|
+
column_families = []
|
33
|
+
|
34
|
+
# get the xml for the column descriptors
|
35
|
+
response = Net::HTTP.get_response(@uri.host, "/api/#{@table_name}", @uri.port)
|
36
|
+
body = response.body
|
37
|
+
|
38
|
+
# parse the xml into a document
|
39
|
+
doc = XML::Parser.string(body).parse
|
40
|
+
|
41
|
+
doc.find("/table/columnfamilies/columnfamily").each do |node|
|
42
|
+
colfam = {}
|
43
|
+
colfam[:name] = node.find_first("name").content.strip.chop
|
44
|
+
column_families << colfam
|
45
|
+
end
|
46
|
+
column_families
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
#####################
|
51
|
+
# Standard CRUD ops
|
52
|
+
|
53
|
+
DEFAULT_GET_OPTIONS = {:timestamp => nil, :columns => nil}
|
54
|
+
|
55
|
+
def get(key, options = {})
|
56
|
+
opts = DEFAULT_GET_OPTIONS.merge(options)
|
57
|
+
|
58
|
+
columns = Array(opts.delete(:columns)).compact
|
59
|
+
timestamp = opts.delete(:timestamp)
|
60
|
+
timestamp = (timestamp.to_f * 1000).to_i.to_s if timestamp
|
61
|
+
|
62
|
+
Net::HTTP.start(@uri.host, @uri.port) do |session|
|
63
|
+
columns_query = columns.map{ |name| "column=#{name}" }.join("&")
|
64
|
+
|
65
|
+
ts_section = timestamp ? "/#{timestamp}" : ""
|
66
|
+
|
67
|
+
query_string = "?" + columns_query
|
68
|
+
|
69
|
+
query = "/api/#{@table_name}/row/#{url_encode(key)}#{ts_section}#{query_string}"
|
70
|
+
response = session.get(query, {"Accept" => "*/*"})
|
71
|
+
|
72
|
+
case response.code.to_i
|
73
|
+
when 200 #success!
|
74
|
+
body = response.body
|
75
|
+
parse_row_result(body).last
|
76
|
+
when 204 #no data - probably an incorrect colname
|
77
|
+
raise "Didn't get any data back - check your column names!"
|
78
|
+
when 404
|
79
|
+
raise RowNotFound, "Could not find row '#{key}'"
|
80
|
+
else
|
81
|
+
nil
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def put(key, keys_and_values, timestamp = nil)
|
87
|
+
Net::HTTP.start(@uri.host, @uri.port) do |session|
|
88
|
+
xml = "<columns>"
|
89
|
+
|
90
|
+
ts_section = timestamp ? "/#{(timestamp.to_f * 1000).to_i}" : ""
|
91
|
+
|
92
|
+
keys_and_values.each do |name, value|
|
93
|
+
xml << "<column><name>#{name}</name><value>#{[value.to_s].pack("m")}</value></column>"
|
94
|
+
end
|
95
|
+
|
96
|
+
xml << "</columns>"
|
97
|
+
|
98
|
+
query = "/api/#{@table_name}/row/#{url_encode(key)}#{ts_section}"
|
99
|
+
response = session.post(query, xml, {"Content-type" => "text/xml"})
|
100
|
+
|
101
|
+
case response.code.to_i
|
102
|
+
when 200
|
103
|
+
true
|
104
|
+
else
|
105
|
+
unexpected_response(response)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def delete(row, columns = nil, timestamp = nil)
|
111
|
+
Net::HTTP.start(@uri.host, @uri.port) do |session|
|
112
|
+
columns_query = Array(columns).compact.map{ |name| "column=#{name}" }.join("&")
|
113
|
+
|
114
|
+
response = session.delete("/api/#{@table_name}/row/#{row}?#{columns_query}")
|
115
|
+
case response.code.to_i
|
116
|
+
when 202
|
117
|
+
return true
|
118
|
+
else
|
119
|
+
unexpected_response(response)
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
#######################
|
126
|
+
# Scanning interface
|
127
|
+
|
128
|
+
def get_scanner(start_row, end_row, timestamp = nil, columns = nil)
|
129
|
+
start_row_query = start_row ? "start_row=#{start_row}" : nil
|
130
|
+
end_row_query = end_row ? "end_row=#{end_row}" : nil
|
131
|
+
timestamp_section = timestamp ? "/#{(timestamp.to_f * 1000).to_i}" : nil
|
132
|
+
columns_section = columns ? columns.map{ |col| "column=#{col}" }.join("&") : nil
|
133
|
+
|
134
|
+
query_string = [start_row_query, end_row_query,
|
135
|
+
timestamp_section, columns_section].compact.join("&")
|
136
|
+
|
137
|
+
path = ""
|
138
|
+
|
139
|
+
# open the scanner
|
140
|
+
Net::HTTP.start(@uri.host, @uri.port) do |session|
|
141
|
+
response = session.post("/api/#{@table_name}/scanner?#{query_string}",
|
142
|
+
"", {"Accept" => "text/xml"}
|
143
|
+
)
|
144
|
+
|
145
|
+
case response.code.to_i
|
146
|
+
when 201
|
147
|
+
# redirect - grab the path and send
|
148
|
+
Scanner.new(self, "http://#{@uri.host}:#{@uri.port}" + response["Location"])
|
149
|
+
else
|
150
|
+
unexpected_response(response)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
private
|
157
|
+
|
158
|
+
def url_encode(str)
|
159
|
+
ERB::Util.url_encode(str)
|
160
|
+
end
|
161
|
+
|
162
|
+
def unexpected_response(response)
|
163
|
+
raise "Unexpected response code #{response.code.to_i}:\n#{response.body}"
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module HBase
|
2
|
+
class Scanner
|
3
|
+
include XmlDecoder
|
4
|
+
|
5
|
+
def initialize(table, scanner_uri)
|
6
|
+
@table, @scanner_uri = table, scanner_uri
|
7
|
+
end
|
8
|
+
|
9
|
+
def close
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
def next
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
def each
|
18
|
+
parsed_uri = URI.parse(@scanner_uri)
|
19
|
+
Net::HTTP.start(parsed_uri.host, parsed_uri.port) do |session|
|
20
|
+
while true
|
21
|
+
response = session.post(@scanner_uri, "")
|
22
|
+
|
23
|
+
case response.code.to_i
|
24
|
+
when 404
|
25
|
+
# over
|
26
|
+
break
|
27
|
+
when 200
|
28
|
+
# item
|
29
|
+
yield *parse_row_result(response.body)
|
30
|
+
else
|
31
|
+
# error
|
32
|
+
raise "Unexpected response code #{response.code}, body:\n#{response.body}"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# def parse_row(xml)
|
41
|
+
# doc = REXML::Document.new(xml)
|
42
|
+
#
|
43
|
+
# result = {}
|
44
|
+
#
|
45
|
+
# doc.root.each_element("/row/column") do |column|
|
46
|
+
# name = column.get_elements("name")[0].text.strip
|
47
|
+
# value = column.get_elements("value")[0].text.strip.unpack("m").first
|
48
|
+
# result[name] = value
|
49
|
+
# end
|
50
|
+
#
|
51
|
+
# [doc.root.get_elements("name")[0].text.strip, result]
|
52
|
+
# end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module HBase
|
2
|
+
module XmlDecoder
|
3
|
+
def parse_row_result(xml)
|
4
|
+
doc = XML::Parser.string(xml).parse
|
5
|
+
|
6
|
+
name_node = doc.root.find_first("/row/name")
|
7
|
+
name = name_node ? name_node.content.strip : nil
|
8
|
+
|
9
|
+
values = {}
|
10
|
+
|
11
|
+
doc.find("/row/columns/column").each do |node|
|
12
|
+
values[node.find_first("name").content.strip.unpack('m').first] = node.find_first("value").content.strip.unpack("m").first
|
13
|
+
end
|
14
|
+
|
15
|
+
[name, values]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/stores/hbase.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Stores
|
3
|
+
class HBase
|
4
|
+
attr_reader :options
|
5
|
+
|
6
|
+
def initialize(options)
|
7
|
+
@options = options
|
8
|
+
@table = ::HBase::HTable.new(options[:url])
|
9
|
+
end
|
10
|
+
|
11
|
+
def get(key)
|
12
|
+
@table.get(key)
|
13
|
+
end
|
14
|
+
|
15
|
+
def put(key, values)
|
16
|
+
@table.put(key, values)
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
self.class == other.class && self.options == other.options
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Stores
|
3
|
+
class InMemory
|
4
|
+
attr_reader :options
|
5
|
+
|
6
|
+
def initialize(options={})
|
7
|
+
@options = options
|
8
|
+
@table = {}
|
9
|
+
end
|
10
|
+
|
11
|
+
def get(key)
|
12
|
+
@table[key.to_s]
|
13
|
+
end
|
14
|
+
|
15
|
+
def put(key, values)
|
16
|
+
@table[key.to_s] = values
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
self.class == other.class && self.options == other.options
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Mandy
|
2
|
+
class ArraySerializer
|
3
|
+
|
4
|
+
SEPERATOR = '|' unless defined?(SEPERATOR)
|
5
|
+
|
6
|
+
attr_reader :items
|
7
|
+
|
8
|
+
def initialize(items)
|
9
|
+
@items = items || []
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_s
|
13
|
+
@items.join(SEPERATOR)
|
14
|
+
end
|
15
|
+
|
16
|
+
def ==(other)
|
17
|
+
(self.class == other.class && self.items == other.items) || (other.is_a?(Array) && self.items == other)
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_a
|
21
|
+
@items
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.from_s(str)
|
25
|
+
str.split(SEPERATOR)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.tuples_from_s(str)
|
29
|
+
from_s(str).map {|s| Tuple.from_s(s) }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Mandy
|
2
|
+
module IO
|
3
|
+
module InputFormatting
|
4
|
+
def input_deserialize_key(key)
|
5
|
+
return key if input_format && input_format == :plain
|
6
|
+
deserialize_key(key)
|
7
|
+
end
|
8
|
+
|
9
|
+
def input_deserialize_value(value)
|
10
|
+
return value if input_format && input_format == :plain
|
11
|
+
deserialize_value(value)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
module OutputFormatting
|
16
|
+
def output_serialize_key(key)
|
17
|
+
return key if output_format && output_format == :plain
|
18
|
+
serialize_key(key)
|
19
|
+
end
|
20
|
+
|
21
|
+
def output_serialize_value(value)
|
22
|
+
return value if output_format && output_format == :plain
|
23
|
+
serialize_value(value)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Mandy
|
2
|
+
class Tuple
|
3
|
+
|
4
|
+
SEPERATOR = ',' unless defined?(SEPERATOR)
|
5
|
+
|
6
|
+
attr_accessor :name, :value
|
7
|
+
|
8
|
+
def initialize(name, value, name_accessor = nil, value_accessor = nil)
|
9
|
+
@name, @value = name, value
|
10
|
+
alias_accessor(name_accessor, :name) unless name_accessor.nil?
|
11
|
+
alias_accessor(value_accessor, :value) unless value_accessor.nil?
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
%(#{@name}#{SEPERATOR}#{@value})
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.from_s(str)
|
19
|
+
parts = str.split(SEPERATOR)
|
20
|
+
raise "Can't create tuple from #{str.inspect}. Format should be 'A#{SEPERATOR}B'" unless parts.size==2
|
21
|
+
new(*parts)
|
22
|
+
end
|
23
|
+
|
24
|
+
def inspect
|
25
|
+
%(<Tuple #{self.to_s}>)
|
26
|
+
end
|
27
|
+
|
28
|
+
def ==(other)
|
29
|
+
return false unless self.class == other.class
|
30
|
+
self.name == other.name && self.value == other.value
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def alias_accessor(new_accessor, old_accessor)
|
36
|
+
self.class.send(:alias_method, new_accessor, old_accessor)
|
37
|
+
self.class.send(:alias_method, :"#{new_accessor}=", :"#{old_accessor}=")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/task.rb
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
module Mandy
|
2
|
+
class Task
|
3
|
+
JSON_PAYLOAD_KEY = "json"
|
4
|
+
KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
|
5
|
+
NUMERIC_PADDING = 16
|
6
|
+
|
7
|
+
attr_reader :input_format, :output_format
|
8
|
+
|
9
|
+
def initialize(input=STDIN, output=STDOUT, input_format = nil, output_format = nil)
|
10
|
+
@input, @output = input, output
|
11
|
+
@input_format, @output_format = input_format, output_format
|
12
|
+
end
|
13
|
+
|
14
|
+
def emit(key, value=nil)
|
15
|
+
key = 'nil' if key.nil?
|
16
|
+
@output.puts(value.nil? ? key.to_s : "#{output_serialize_key(key)}\t#{output_serialize_value(value)}")
|
17
|
+
end
|
18
|
+
|
19
|
+
def get(store, key)
|
20
|
+
Mandy.stores[store].get(key)
|
21
|
+
end
|
22
|
+
|
23
|
+
def put(store, key, values)
|
24
|
+
Mandy.stores[store].put(key, values)
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
def pad(key)
|
29
|
+
key_parts = key.to_s.split(".")
|
30
|
+
key_parts[0] = key_parts.first.rjust(NUMERIC_PADDING, '0')
|
31
|
+
key_parts.join('.')
|
32
|
+
end
|
33
|
+
|
34
|
+
def update_status(message)
|
35
|
+
STDERR.puts("reporter:status:#{message}")
|
36
|
+
end
|
37
|
+
|
38
|
+
def update_counter(group, counter, count)
|
39
|
+
STDERR.puts("reporter:counter:#{group},#{counter},#{count}")
|
40
|
+
end
|
41
|
+
|
42
|
+
def parameter(name)
|
43
|
+
return find_json_param(name) if json_provided?
|
44
|
+
ENV[name.to_s]
|
45
|
+
end
|
46
|
+
|
47
|
+
def find_json_param(name)
|
48
|
+
@json_args ||= JSON.parse(URI.decode(ENV[JSON_PAYLOAD_KEY]))
|
49
|
+
@json_args[name.to_s]
|
50
|
+
end
|
51
|
+
|
52
|
+
def json_provided?
|
53
|
+
!ENV[JSON_PAYLOAD_KEY].nil?
|
54
|
+
end
|
55
|
+
|
56
|
+
def deserialize_key(key)
|
57
|
+
key
|
58
|
+
end
|
59
|
+
|
60
|
+
def deserialize_value(value)
|
61
|
+
value
|
62
|
+
end
|
63
|
+
|
64
|
+
def serialize_key(key)
|
65
|
+
key = pad(key) if key.is_a?(Numeric) && key.to_s.length < NUMERIC_PADDING
|
66
|
+
key
|
67
|
+
end
|
68
|
+
|
69
|
+
def serialize_value(value)
|
70
|
+
value = ArraySerializer.new(value) if value.is_a?(Array)
|
71
|
+
value.to_s
|
72
|
+
end
|
73
|
+
|
74
|
+
def output_serialize_key(key)
|
75
|
+
serialize_key(key)
|
76
|
+
end
|
77
|
+
|
78
|
+
def output_serialize_value(value)
|
79
|
+
serialize_value(value)
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
data/lib/test_runner.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
module Mandy
|
2
|
+
class TestRunner
|
3
|
+
attr_reader :job
|
4
|
+
|
5
|
+
def initialize(job=Mandy::Job.jobs.first.name, opts={})
|
6
|
+
ENV[Mandy::Task::JSON_PAYLOAD_KEY] = opts[:parameters].to_json
|
7
|
+
@job = Mandy::Job.find_by_name(job)
|
8
|
+
end
|
9
|
+
|
10
|
+
def map(input_stream, output_stream=StringIO.new(''), &blk)
|
11
|
+
input_stream = input_from_array(input_stream) if input_stream.is_a?(Array)
|
12
|
+
input_stream = StringIO.new(input_stream) if input_stream.is_a?(String)
|
13
|
+
@job.run_map(input_stream, output_stream, &blk)
|
14
|
+
output_stream.rewind
|
15
|
+
output_stream
|
16
|
+
end
|
17
|
+
|
18
|
+
def reduce(input_stream, output_stream=StringIO.new(''), &blk)
|
19
|
+
input_stream = input_from_hash(input_stream) if input_stream.is_a?(Hash)
|
20
|
+
input_stream = StringIO.new(input_stream) if input_stream.is_a?(String)
|
21
|
+
@job.run_reduce(input_stream, output_stream, &blk)
|
22
|
+
output_stream.rewind
|
23
|
+
output_stream
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.end_to_end(verbose=false)
|
27
|
+
CompositeJobRunner.new(Mandy::Job.jobs,verbose)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def input_from_array(input)
|
33
|
+
input.join("\n")
|
34
|
+
end
|
35
|
+
|
36
|
+
def input_from_hash(input)
|
37
|
+
output = []
|
38
|
+
input.each do |key, values|
|
39
|
+
output << "#{key}\t#{values}" and next unless values.is_a?(Array)
|
40
|
+
values.each { |value| output << "#{key}\t#{value}" }
|
41
|
+
end
|
42
|
+
input_from_array(output.sort)
|
43
|
+
end
|
44
|
+
|
45
|
+
class CompositeJobRunner
|
46
|
+
def initialize(jobs, verbose=false)
|
47
|
+
@jobs = jobs
|
48
|
+
@verbose = verbose
|
49
|
+
@job_runners = @jobs.map { |job| Mandy::TestRunner.new(job.name) }
|
50
|
+
end
|
51
|
+
|
52
|
+
def execute(input_stream, output_stream=StringIO.new(''))
|
53
|
+
map_temp = StringIO.new('')
|
54
|
+
reduce_temp = StringIO.new('')
|
55
|
+
@job_runners.each_with_index do |runner, index|
|
56
|
+
runner.map(input_stream, map_temp)
|
57
|
+
if @verbose
|
58
|
+
puts "#{runner.job.name} [MAP] #{map_temp.readlines.inspect}"
|
59
|
+
map_temp.rewind
|
60
|
+
end
|
61
|
+
reduce_input = StringIO.new(map_temp.readlines.sort.join(''))
|
62
|
+
runner.reduce(reduce_input, (index==@job_runners.size-1 ? output_stream : reduce_temp))
|
63
|
+
if @verbose
|
64
|
+
puts "#{runner.job.name} [RED] #{reduce_temp.readlines.inspect}"
|
65
|
+
reduce_temp.rewind
|
66
|
+
end
|
67
|
+
input_stream = reduce_temp
|
68
|
+
map_temp = StringIO.new('')
|
69
|
+
reduce_temp = StringIO.new('')
|
70
|
+
end
|
71
|
+
output_stream
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/readme.md
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
Mandy - Simplified Hadoop distribution for Ruby code
|
2
|
+
====================================================
|
3
|
+
|
4
|
+
Mandy hides the differences and complexities between running map/reduce tasks locally or distributed or in test environments.
|
5
|
+
|
6
|
+
It provides a simple DSL to define new jobs for distribution. See examples/word_count.rb for a demo of some functionality.
|
7
|
+
Run the word count example locally with...
|
8
|
+
|
9
|
+
mandy-local examples/word_count.rb examples/alice.txt examples/output
|
10
|
+
|
11
|
+
Mandy is licensed under the MIT Licence, please see LICENCE for further information.
|
metadata
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mandy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.14
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andy Kent
|
8
|
+
- Paul Ingles
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2009-07-09 00:00:00 +01:00
|
14
|
+
default_executable:
|
15
|
+
dependencies: []
|
16
|
+
|
17
|
+
description: Map/Reduce
|
18
|
+
email: andy.kent@me.com
|
19
|
+
executables:
|
20
|
+
- mandy
|
21
|
+
- mandy-hadoop
|
22
|
+
- mandy-local
|
23
|
+
- mandy-map
|
24
|
+
- mandy-put
|
25
|
+
- mandy-get
|
26
|
+
- mandy-reduce
|
27
|
+
- mandy-rm
|
28
|
+
- mandy-install
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files: []
|
32
|
+
|
33
|
+
files:
|
34
|
+
- bin/mandy-hadoop
|
35
|
+
- bin/mandy-local
|
36
|
+
- bin/mandy-map
|
37
|
+
- bin/mandy-get
|
38
|
+
- bin/mandy-put
|
39
|
+
- bin/mandy-reduce
|
40
|
+
- readme.md
|
41
|
+
- Rakefile
|
42
|
+
- lib/mandy.rb
|
43
|
+
- lib/support/tuple.rb
|
44
|
+
- lib/support/formatting.rb
|
45
|
+
- lib/support/array_serializer.rb
|
46
|
+
- lib/task.rb
|
47
|
+
- lib/dsl.rb
|
48
|
+
- lib/job.rb
|
49
|
+
- lib/mappers/base_mapper.rb
|
50
|
+
- lib/mappers/transpose_mapper.rb
|
51
|
+
- lib/mappers/pass_through_mapper.rb
|
52
|
+
- lib/packer.rb
|
53
|
+
- lib/reducers/base_reducer.rb
|
54
|
+
- lib/reducers/transpose_reducer.rb
|
55
|
+
- lib/reducers/pass_through_reducer.rb
|
56
|
+
- lib/reducers/sum_reducer.rb
|
57
|
+
- lib/reducers/max_reducer.rb
|
58
|
+
- lib/reducers/min_reducer.rb
|
59
|
+
- lib/serializers/json.rb
|
60
|
+
- lib/stores/hbase.rb
|
61
|
+
- lib/stores/in_memory.rb
|
62
|
+
- lib/ruby-hbase.rb
|
63
|
+
- lib/ruby-hbase/hbase_table.rb
|
64
|
+
- lib/ruby-hbase/scanner.rb
|
65
|
+
- lib/ruby-hbase/version.rb
|
66
|
+
- lib/ruby-hbase/xml_decoder.rb
|
67
|
+
- lib/test_runner.rb
|
68
|
+
has_rdoc: true
|
69
|
+
homepage: http://github.com/trafficbroker/mandy
|
70
|
+
licenses: []
|
71
|
+
|
72
|
+
post_install_message:
|
73
|
+
rdoc_options: []
|
74
|
+
|
75
|
+
require_paths:
|
76
|
+
- lib
|
77
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: "0"
|
82
|
+
version:
|
83
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: "0"
|
88
|
+
version:
|
89
|
+
requirements: []
|
90
|
+
|
91
|
+
rubyforge_project:
|
92
|
+
rubygems_version: 1.3.4
|
93
|
+
signing_key:
|
94
|
+
specification_version: 2
|
95
|
+
summary: Map/Reduce
|
96
|
+
test_files: []
|
97
|
+
|