mandy 0.2.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +17 -0
- data/bin/mandy +21 -0
- data/bin/mandy-get +29 -0
- data/bin/mandy-hadoop +74 -0
- data/bin/mandy-install +11 -0
- data/bin/mandy-local +27 -0
- data/bin/mandy-map +24 -0
- data/bin/mandy-put +28 -0
- data/bin/mandy-reduce +24 -0
- data/bin/mandy-rm +23 -0
- data/lib/dsl.rb +10 -0
- data/lib/job.rb +92 -0
- data/lib/mandy.rb +35 -0
- data/lib/mappers/base_mapper.rb +30 -0
- data/lib/mappers/pass_through_mapper.rb +10 -0
- data/lib/mappers/transpose_mapper.rb +10 -0
- data/lib/packer.rb +25 -0
- data/lib/reducers/base_reducer.rb +36 -0
- data/lib/reducers/max_reducer.rb +10 -0
- data/lib/reducers/min_reducer.rb +10 -0
- data/lib/reducers/pass_through_reducer.rb +9 -0
- data/lib/reducers/sum_reducer.rb +9 -0
- data/lib/reducers/transpose_reducer.rb +9 -0
- data/lib/ruby-hbase.rb +10 -0
- data/lib/ruby-hbase/hbase_table.rb +166 -0
- data/lib/ruby-hbase/scanner.rb +55 -0
- data/lib/ruby-hbase/version.rb +9 -0
- data/lib/ruby-hbase/xml_decoder.rb +18 -0
- data/lib/serializers/json.rb +13 -0
- data/lib/stores/hbase.rb +24 -0
- data/lib/stores/in_memory.rb +24 -0
- data/lib/support/array_serializer.rb +32 -0
- data/lib/support/formatting.rb +27 -0
- data/lib/support/tuple.rb +40 -0
- data/lib/task.rb +83 -0
- data/lib/test_runner.rb +75 -0
- data/readme.md +11 -0
- metadata +97 -0
data/Rakefile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require "rake"
|
3
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'lib', 'mandy'))
|
4
|
+
require 'spec/rake/spectask'
|
5
|
+
|
6
|
+
task :default => :spec
|
7
|
+
|
8
|
+
Spec::Rake::SpecTask.new(:spec) do |t|
|
9
|
+
t.spec_files = FileList['spec/lib/**/*_spec.rb']
|
10
|
+
t.spec_opts = %w{-f s -c -L mtime}
|
11
|
+
end
|
12
|
+
|
13
|
+
task :gem do
|
14
|
+
`sudo gem build mandy.gemspec`
|
15
|
+
`mkdir pkg; mv mandy-*.gem pkg/`
|
16
|
+
`sudo gem install pkg/mandy-*.gem`
|
17
|
+
end
|
data/bin/mandy
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "rubygems"
|
4
|
+
require "mandy"
|
5
|
+
|
6
|
+
puts "\nYou are running Mandy!"
|
7
|
+
puts "Here are the commands at your disposal..."
|
8
|
+
puts ''
|
9
|
+
|
10
|
+
{
|
11
|
+
'mandy-install' => 'Installs the Mandy Rubygem on several hosts via ssh.',
|
12
|
+
'mandy-local' => 'Run a Map/Reduce task locally without requiring hadoop',
|
13
|
+
'mandy-hadoop' => 'Run a Map/Reduce task on hadoop using the provided cluster config',
|
14
|
+
'mandy-rm' => 'remove a file or directory from HDFS',
|
15
|
+
'mandy-put' => 'upload a file into HDFS',
|
16
|
+
'mandy-map' => 'Run a map task reading on STDIN and writing to STDOUT',
|
17
|
+
'mandy-reduce' => 'Run a reduce task reading on STDIN and writing to STDOUT'
|
18
|
+
}.each do |command, description|
|
19
|
+
|
20
|
+
puts "#{command.ljust(15)} #{description}"
|
21
|
+
end
|
data/bin/mandy-get
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
options = OpenStruct.new
|
6
|
+
|
7
|
+
OptionParser.new do |opts|
|
8
|
+
opts.banner = "USAGE: mandy-get hdfs_file_location local_file_destination [options]"
|
9
|
+
|
10
|
+
opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
|
11
|
+
options.config = config
|
12
|
+
end
|
13
|
+
|
14
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
15
|
+
puts opts
|
16
|
+
exit
|
17
|
+
end
|
18
|
+
end.parse!
|
19
|
+
|
20
|
+
|
21
|
+
def absolute_path(path)
|
22
|
+
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
23
|
+
end
|
24
|
+
|
25
|
+
remote_file = ARGV[0]
|
26
|
+
local_file = ARGV[1]
|
27
|
+
config = absolute_path(options.config || 'cluster.xml')
|
28
|
+
|
29
|
+
`$HADOOP_HOME/bin/hadoop fs -conf #{config} -getmerge #{remote_file} #{local_file}`
|
data/bin/mandy-hadoop
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "rubygems"
|
3
|
+
require "mandy"
|
4
|
+
require 'optparse'
|
5
|
+
require 'ostruct'
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
|
10
|
+
OptionParser.new do |opts|
|
11
|
+
opts.banner = "USAGE: mandy-hadoop script input output [options]"
|
12
|
+
|
13
|
+
opts.on("-p", "--payload PAYLOAD", "Add a working directory to be sent to the cluster.") do |payload|
|
14
|
+
options.payload = payload
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
|
18
|
+
options.config = config
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on("-v", '--variables name=value', "Pass additional parameters to jobs") do |config|
|
22
|
+
options.cmdenv = config
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("-j", '--json {"key":"1 value"}', "Pass JSON encoded parameters to jobs") do |config|
|
26
|
+
options.cmdenv = "json=#{URI.encode(config)}"
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
30
|
+
puts opts
|
31
|
+
exit
|
32
|
+
end
|
33
|
+
end.parse!
|
34
|
+
|
35
|
+
def absolute_path(path)
|
36
|
+
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
37
|
+
end
|
38
|
+
|
39
|
+
file = ARGV[0]
|
40
|
+
filename = File.basename(file)
|
41
|
+
input = ARGV[1]
|
42
|
+
output_folder = ARGV[2]
|
43
|
+
config = options.config || 'cluster.xml'
|
44
|
+
payload = options.payload ? Mandy::Packer.pack(options.payload) : ARGV[0]
|
45
|
+
cmdenv = options.cmdenv
|
46
|
+
|
47
|
+
at_exit { Mandy::Packer.cleanup!(payload) }
|
48
|
+
|
49
|
+
require absolute_path(file)
|
50
|
+
|
51
|
+
output = nil
|
52
|
+
|
53
|
+
Mandy::Job.jobs.each_with_index do |job, i|
|
54
|
+
|
55
|
+
jobconf = job.settings.map { |key, value| %(-D #{key}='#{value}') }.join(' ')
|
56
|
+
output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
|
57
|
+
|
58
|
+
command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
|
59
|
+
-conf '#{config}' \
|
60
|
+
-input "#{input}" \
|
61
|
+
-mapper "mandy-map #{filename} '#{job.name}' #{File.basename(payload)}" \
|
62
|
+
-reducer "mandy-reduce #{filename} '#{job.name}' #{File.basename(payload)}" \
|
63
|
+
-file "#{payload}" \
|
64
|
+
-cmdenv #{cmdenv} \
|
65
|
+
-output "#{output}")
|
66
|
+
|
67
|
+
`#{command}`
|
68
|
+
|
69
|
+
# puts "#{command}"
|
70
|
+
input = output
|
71
|
+
end
|
72
|
+
|
73
|
+
# print out the output location so caller can know where to get the results from
|
74
|
+
puts output
|
data/bin/mandy-install
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
if ARGV.size==0
|
4
|
+
puts "USAGE: mandy-install user@server1.com [user@server2.com ...]"
|
5
|
+
exit
|
6
|
+
end
|
7
|
+
|
8
|
+
ARGV.each do |server|
|
9
|
+
puts "Installing on #{server}..."
|
10
|
+
system %(ssh #{server} "sudo gem install trafficbroker-mandy --source http://gems.github.com")
|
11
|
+
end
|
data/bin/mandy-local
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
if ARGV.size==0
|
4
|
+
puts "USAGE: mandy-local my_script.rb local_input_file local_output_folder"
|
5
|
+
exit
|
6
|
+
end
|
7
|
+
|
8
|
+
require "fileutils"
|
9
|
+
|
10
|
+
def absolute_path(path)
|
11
|
+
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
12
|
+
end
|
13
|
+
|
14
|
+
file = absolute_path(ARGV[0])
|
15
|
+
input = absolute_path(ARGV[1])
|
16
|
+
output_folder = FileUtils.mkdir_p(absolute_path(ARGV[2]))
|
17
|
+
require file
|
18
|
+
|
19
|
+
out = nil
|
20
|
+
Mandy::Job.jobs.each_with_index do |job, i|
|
21
|
+
out = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
|
22
|
+
puts "Running #{job.name}..."
|
23
|
+
`cat #{input} | mandy-map #{file} "#{job.name}" | sort | mandy-reduce #{file} "#{job.name}" > #{out}`
|
24
|
+
input = out
|
25
|
+
end
|
26
|
+
|
27
|
+
puts out
|
data/bin/mandy-map
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "rubygems"
|
3
|
+
require "mandy"
|
4
|
+
|
5
|
+
if ARGV.size==0
|
6
|
+
puts "USAGE: mandy-map my_script.rb 'Job Name' [payload]"
|
7
|
+
exit
|
8
|
+
end
|
9
|
+
|
10
|
+
if ARGV.size > 2
|
11
|
+
payload = ARGV[2]
|
12
|
+
Mandy::Packer.unpack(payload)
|
13
|
+
end
|
14
|
+
|
15
|
+
def absolute_path(path)
|
16
|
+
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
17
|
+
end
|
18
|
+
|
19
|
+
file = absolute_path(ARGV[0])
|
20
|
+
job_name = ARGV[1]
|
21
|
+
|
22
|
+
require file
|
23
|
+
|
24
|
+
Mandy::Job.find_by_name(job_name).run_map
|
data/bin/mandy-put
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
options = OpenStruct.new
|
6
|
+
|
7
|
+
OptionParser.new do |opts|
|
8
|
+
opts.banner = "USAGE: mandy-put local_file_or_folder hdfs_destination_location [options]"
|
9
|
+
|
10
|
+
opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
|
11
|
+
options.config = config
|
12
|
+
end
|
13
|
+
|
14
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
15
|
+
puts opts
|
16
|
+
exit
|
17
|
+
end
|
18
|
+
end.parse!
|
19
|
+
|
20
|
+
def absolute_path(path)
|
21
|
+
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
22
|
+
end
|
23
|
+
|
24
|
+
source = absolute_path(ARGV[0])
|
25
|
+
dest = ARGV[1]
|
26
|
+
config = options.config || 'cluster.xml'
|
27
|
+
|
28
|
+
`$HADOOP_HOME/bin/hadoop fs -conf #{config} -copyFromLocal #{source} #{dest}`
|
data/bin/mandy-reduce
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "rubygems"
|
3
|
+
require "mandy"
|
4
|
+
|
5
|
+
if ARGV.size==0
|
6
|
+
puts "USAGE: mandy-reduce my_script.rb 'Job Name' [payload]"
|
7
|
+
exit
|
8
|
+
end
|
9
|
+
|
10
|
+
def absolute_path(path)
|
11
|
+
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
12
|
+
end
|
13
|
+
|
14
|
+
if ARGV.size > 2
|
15
|
+
payload = ARGV[2]
|
16
|
+
Mandy::Packer.unpack(payload)
|
17
|
+
end
|
18
|
+
|
19
|
+
file = absolute_path(ARGV[0])
|
20
|
+
job_name = ARGV[1]
|
21
|
+
|
22
|
+
require file
|
23
|
+
|
24
|
+
Mandy::Job.find_by_name(job_name).run_reduce
|
data/bin/mandy-rm
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
options = OpenStruct.new
|
6
|
+
|
7
|
+
OptionParser.new do |opts|
|
8
|
+
opts.banner = "USAGE: mandy-rm file_or_folder_on_hdfs [options]"
|
9
|
+
|
10
|
+
opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
|
11
|
+
options.config = config
|
12
|
+
end
|
13
|
+
|
14
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
15
|
+
puts opts
|
16
|
+
exit
|
17
|
+
end
|
18
|
+
end.parse!
|
19
|
+
|
20
|
+
file = ARGV[0]
|
21
|
+
config = options.config || 'cluster.xml'
|
22
|
+
|
23
|
+
`$HADOOP_HOME/bin/hadoop fs -conf #{config} -rmr #{file}`
|
data/lib/dsl.rb
ADDED
data/lib/job.rb
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
module Mandy
|
2
|
+
class Job
|
3
|
+
class << self
|
4
|
+
def jobs
|
5
|
+
@jobs ||= []
|
6
|
+
end
|
7
|
+
|
8
|
+
def find_by_name(name)
|
9
|
+
jobs.find {|job| job.name == name }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_reader :settings
|
14
|
+
attr_reader :name
|
15
|
+
|
16
|
+
def initialize(name, &blk)
|
17
|
+
@name = name
|
18
|
+
@settings = {}
|
19
|
+
@modules = []
|
20
|
+
@mapper_class = Mandy::Mappers::PassThroughMapper
|
21
|
+
@reducer_class = Mandy::Reducers::PassThroughReducer
|
22
|
+
set('mapred.job.name', name)
|
23
|
+
instance_eval(&blk) if blk
|
24
|
+
end
|
25
|
+
|
26
|
+
def mixin(*modules)
|
27
|
+
modules.each {|m| @modules << m}
|
28
|
+
end
|
29
|
+
alias_method :serialize, :mixin
|
30
|
+
|
31
|
+
def input_format(format)
|
32
|
+
@input_format = format
|
33
|
+
end
|
34
|
+
|
35
|
+
def output_format(format)
|
36
|
+
@output_format = format
|
37
|
+
end
|
38
|
+
|
39
|
+
def set(key, value)
|
40
|
+
@settings[key.to_s] = value.to_s
|
41
|
+
end
|
42
|
+
|
43
|
+
def map_tasks(count)
|
44
|
+
set('mapred.map.tasks', count)
|
45
|
+
end
|
46
|
+
|
47
|
+
def reduce_tasks(count)
|
48
|
+
set('mapred.reduce.tasks', count)
|
49
|
+
end
|
50
|
+
|
51
|
+
def store(type, name, options={})
|
52
|
+
Mandy.stores[name] = case type
|
53
|
+
when :hbase
|
54
|
+
Stores::HBase.new(options)
|
55
|
+
else
|
56
|
+
raise "Unknown store type #{type}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def map(klass=nil, &blk)
|
61
|
+
@mapper_class = klass || Mandy::Mappers::Base.compile(&blk)
|
62
|
+
@modules.each {|m| @mapper_class.send(:include, m) }
|
63
|
+
@mapper_class
|
64
|
+
end
|
65
|
+
|
66
|
+
def reduce(klass=nil, &blk)
|
67
|
+
@reducer_class = klass || Mandy::Reducers::Base.compile(&blk)
|
68
|
+
@modules.each {|m| @reducer_class.send(:include, m) }
|
69
|
+
@reducer_class
|
70
|
+
end
|
71
|
+
|
72
|
+
def run_map(input=STDIN, output=STDOUT, &blk)
|
73
|
+
@mapper_class.send(:include, Mandy::IO::OutputFormatting) unless reducer_defined?
|
74
|
+
mapper = @mapper_class.new(input, output, @input_format, @output_format)
|
75
|
+
yield(mapper) if blk
|
76
|
+
mapper.execute
|
77
|
+
end
|
78
|
+
|
79
|
+
def run_reduce(input=STDIN, output=STDOUT, &blk)
|
80
|
+
reducer = @reducer_class.new(input, output, @input_format, @output_format)
|
81
|
+
yield(reducer) if blk
|
82
|
+
reducer.execute
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def reducer_defined?
|
88
|
+
@reducer_class != Mandy::Reducers::PassThroughReducer
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
end
|
data/lib/mandy.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "json"
|
3
|
+
require "uri"
|
4
|
+
|
5
|
+
%w(
|
6
|
+
support/formatting
|
7
|
+
task
|
8
|
+
dsl
|
9
|
+
job
|
10
|
+
packer
|
11
|
+
support/tuple
|
12
|
+
support/array_serializer
|
13
|
+
mappers/base_mapper
|
14
|
+
mappers/transpose_mapper
|
15
|
+
mappers/pass_through_mapper
|
16
|
+
reducers/base_reducer
|
17
|
+
reducers/pass_through_reducer
|
18
|
+
reducers/sum_reducer
|
19
|
+
reducers/max_reducer
|
20
|
+
reducers/min_reducer
|
21
|
+
reducers/transpose_reducer
|
22
|
+
serializers/json
|
23
|
+
stores/hbase
|
24
|
+
stores/in_memory
|
25
|
+
test_runner
|
26
|
+
ruby-hbase
|
27
|
+
).each {|file| require File.join(File.dirname(__FILE__), file) }
|
28
|
+
|
29
|
+
module Mandy
|
30
|
+
class << self
|
31
|
+
def stores
|
32
|
+
@stores||={}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Mappers
|
3
|
+
class Base < Mandy::Task
|
4
|
+
include Mandy::IO::InputFormatting
|
5
|
+
|
6
|
+
def self.compile(&blk)
|
7
|
+
Class.new(Mandy::Mappers::Base) do
|
8
|
+
self.class_eval do
|
9
|
+
define_method(:mapper, blk) if blk
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def execute
|
15
|
+
@input.each_line do |line|
|
16
|
+
key, value = line.split(KEY_VALUE_SEPERATOR, 2)
|
17
|
+
key, value = nil, key if value.nil?
|
18
|
+
value.chomp!
|
19
|
+
mapper(input_deserialize_key(key), input_deserialize_value(value))
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def mapper(key,value)
|
26
|
+
#nil
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/packer.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require "fileutils"
|
2
|
+
|
3
|
+
module Mandy
|
4
|
+
class Packer
|
5
|
+
TMP_DIR = '/tmp/mandy'
|
6
|
+
|
7
|
+
def self.pack(dir)
|
8
|
+
return dir if File.file?(dir)
|
9
|
+
FileUtils.mkdir_p(TMP_DIR)
|
10
|
+
tmp_path = "#{TMP_DIR}/packed-job-#{Time.now.to_i}.tar"
|
11
|
+
Dir.chdir(dir) { `tar -cf #{tmp_path} *` }
|
12
|
+
tmp_path
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.unpack(file)
|
16
|
+
return false unless File.extname(file) == '.tar'
|
17
|
+
`tar -xf #{file}`
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.cleanup!(file)
|
21
|
+
return false unless File.extname(file) == '.tar'
|
22
|
+
`rm #{file}`
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Reducers
|
3
|
+
class Base < Mandy::Task
|
4
|
+
include Mandy::IO::OutputFormatting
|
5
|
+
|
6
|
+
def self.compile(&blk)
|
7
|
+
Class.new(Mandy::Reducers::Base) do
|
8
|
+
self.class_eval do
|
9
|
+
define_method(:reducer, blk) if blk
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def execute
|
15
|
+
last_key, values = nil, []
|
16
|
+
@input.each_line do |line|
|
17
|
+
key, value = line.split(KEY_VALUE_SEPERATOR)
|
18
|
+
value.chomp!
|
19
|
+
last_key = key if last_key.nil?
|
20
|
+
if key != last_key
|
21
|
+
reducer(last_key, values)
|
22
|
+
last_key, values = key, []
|
23
|
+
end
|
24
|
+
values << value
|
25
|
+
end
|
26
|
+
reducer(deserialize_key(last_key), values.map {|v| deserialize_value(v) })
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def reducer(key,values)
|
32
|
+
#nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/ruby-hbase.rb
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
module HBase
|
2
|
+
class RowNotFound < Exception
|
3
|
+
def initialize(msg=nil)
|
4
|
+
super
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
class HTable
|
9
|
+
include XmlDecoder
|
10
|
+
|
11
|
+
def initialize(table_uri)
|
12
|
+
@table_uri = table_uri
|
13
|
+
|
14
|
+
@uri = URI.parse(table_uri)
|
15
|
+
|
16
|
+
@host, @table_name = @uri.host, @uri.path.split("/").last
|
17
|
+
end
|
18
|
+
|
19
|
+
def name
|
20
|
+
@table_name
|
21
|
+
end
|
22
|
+
|
23
|
+
######################
|
24
|
+
# Meta-type requests
|
25
|
+
|
26
|
+
def start_keys
|
27
|
+
raise NotImplementedError
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def column_descriptors
|
32
|
+
column_families = []
|
33
|
+
|
34
|
+
# get the xml for the column descriptors
|
35
|
+
response = Net::HTTP.get_response(@uri.host, "/api/#{@table_name}", @uri.port)
|
36
|
+
body = response.body
|
37
|
+
|
38
|
+
# parse the xml into a document
|
39
|
+
doc = XML::Parser.string(body).parse
|
40
|
+
|
41
|
+
doc.find("/table/columnfamilies/columnfamily").each do |node|
|
42
|
+
colfam = {}
|
43
|
+
colfam[:name] = node.find_first("name").content.strip.chop
|
44
|
+
column_families << colfam
|
45
|
+
end
|
46
|
+
column_families
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
#####################
|
51
|
+
# Standard CRUD ops
|
52
|
+
|
53
|
+
DEFAULT_GET_OPTIONS = {:timestamp => nil, :columns => nil}
|
54
|
+
|
55
|
+
def get(key, options = {})
|
56
|
+
opts = DEFAULT_GET_OPTIONS.merge(options)
|
57
|
+
|
58
|
+
columns = Array(opts.delete(:columns)).compact
|
59
|
+
timestamp = opts.delete(:timestamp)
|
60
|
+
timestamp = (timestamp.to_f * 1000).to_i.to_s if timestamp
|
61
|
+
|
62
|
+
Net::HTTP.start(@uri.host, @uri.port) do |session|
|
63
|
+
columns_query = columns.map{ |name| "column=#{name}" }.join("&")
|
64
|
+
|
65
|
+
ts_section = timestamp ? "/#{timestamp}" : ""
|
66
|
+
|
67
|
+
query_string = "?" + columns_query
|
68
|
+
|
69
|
+
query = "/api/#{@table_name}/row/#{url_encode(key)}#{ts_section}#{query_string}"
|
70
|
+
response = session.get(query, {"Accept" => "*/*"})
|
71
|
+
|
72
|
+
case response.code.to_i
|
73
|
+
when 200 #success!
|
74
|
+
body = response.body
|
75
|
+
parse_row_result(body).last
|
76
|
+
when 204 #no data - probably an incorrect colname
|
77
|
+
raise "Didn't get any data back - check your column names!"
|
78
|
+
when 404
|
79
|
+
raise RowNotFound, "Could not find row '#{key}'"
|
80
|
+
else
|
81
|
+
nil
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def put(key, keys_and_values, timestamp = nil)
|
87
|
+
Net::HTTP.start(@uri.host, @uri.port) do |session|
|
88
|
+
xml = "<columns>"
|
89
|
+
|
90
|
+
ts_section = timestamp ? "/#{(timestamp.to_f * 1000).to_i}" : ""
|
91
|
+
|
92
|
+
keys_and_values.each do |name, value|
|
93
|
+
xml << "<column><name>#{name}</name><value>#{[value.to_s].pack("m")}</value></column>"
|
94
|
+
end
|
95
|
+
|
96
|
+
xml << "</columns>"
|
97
|
+
|
98
|
+
query = "/api/#{@table_name}/row/#{url_encode(key)}#{ts_section}"
|
99
|
+
response = session.post(query, xml, {"Content-type" => "text/xml"})
|
100
|
+
|
101
|
+
case response.code.to_i
|
102
|
+
when 200
|
103
|
+
true
|
104
|
+
else
|
105
|
+
unexpected_response(response)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def delete(row, columns = nil, timestamp = nil)
|
111
|
+
Net::HTTP.start(@uri.host, @uri.port) do |session|
|
112
|
+
columns_query = Array(columns).compact.map{ |name| "column=#{name}" }.join("&")
|
113
|
+
|
114
|
+
response = session.delete("/api/#{@table_name}/row/#{row}?#{columns_query}")
|
115
|
+
case response.code.to_i
|
116
|
+
when 202
|
117
|
+
return true
|
118
|
+
else
|
119
|
+
unexpected_response(response)
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
#######################
|
126
|
+
# Scanning interface
|
127
|
+
|
128
|
+
def get_scanner(start_row, end_row, timestamp = nil, columns = nil)
|
129
|
+
start_row_query = start_row ? "start_row=#{start_row}" : nil
|
130
|
+
end_row_query = end_row ? "end_row=#{end_row}" : nil
|
131
|
+
timestamp_section = timestamp ? "/#{(timestamp.to_f * 1000).to_i}" : nil
|
132
|
+
columns_section = columns ? columns.map{ |col| "column=#{col}" }.join("&") : nil
|
133
|
+
|
134
|
+
query_string = [start_row_query, end_row_query,
|
135
|
+
timestamp_section, columns_section].compact.join("&")
|
136
|
+
|
137
|
+
path = ""
|
138
|
+
|
139
|
+
# open the scanner
|
140
|
+
Net::HTTP.start(@uri.host, @uri.port) do |session|
|
141
|
+
response = session.post("/api/#{@table_name}/scanner?#{query_string}",
|
142
|
+
"", {"Accept" => "text/xml"}
|
143
|
+
)
|
144
|
+
|
145
|
+
case response.code.to_i
|
146
|
+
when 201
|
147
|
+
# redirect - grab the path and send
|
148
|
+
Scanner.new(self, "http://#{@uri.host}:#{@uri.port}" + response["Location"])
|
149
|
+
else
|
150
|
+
unexpected_response(response)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
private
|
157
|
+
|
158
|
+
def url_encode(str)
|
159
|
+
ERB::Util.url_encode(str)
|
160
|
+
end
|
161
|
+
|
162
|
+
def unexpected_response(response)
|
163
|
+
raise "Unexpected response code #{response.code.to_i}:\n#{response.body}"
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module HBase
|
2
|
+
class Scanner
|
3
|
+
include XmlDecoder
|
4
|
+
|
5
|
+
def initialize(table, scanner_uri)
|
6
|
+
@table, @scanner_uri = table, scanner_uri
|
7
|
+
end
|
8
|
+
|
9
|
+
def close
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
def next
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
def each
|
18
|
+
parsed_uri = URI.parse(@scanner_uri)
|
19
|
+
Net::HTTP.start(parsed_uri.host, parsed_uri.port) do |session|
|
20
|
+
while true
|
21
|
+
response = session.post(@scanner_uri, "")
|
22
|
+
|
23
|
+
case response.code.to_i
|
24
|
+
when 404
|
25
|
+
# over
|
26
|
+
break
|
27
|
+
when 200
|
28
|
+
# item
|
29
|
+
yield *parse_row_result(response.body)
|
30
|
+
else
|
31
|
+
# error
|
32
|
+
raise "Unexpected response code #{response.code}, body:\n#{response.body}"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# def parse_row(xml)
|
41
|
+
# doc = REXML::Document.new(xml)
|
42
|
+
#
|
43
|
+
# result = {}
|
44
|
+
#
|
45
|
+
# doc.root.each_element("/row/column") do |column|
|
46
|
+
# name = column.get_elements("name")[0].text.strip
|
47
|
+
# value = column.get_elements("value")[0].text.strip.unpack("m").first
|
48
|
+
# result[name] = value
|
49
|
+
# end
|
50
|
+
#
|
51
|
+
# [doc.root.get_elements("name")[0].text.strip, result]
|
52
|
+
# end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module HBase
|
2
|
+
module XmlDecoder
|
3
|
+
def parse_row_result(xml)
|
4
|
+
doc = XML::Parser.string(xml).parse
|
5
|
+
|
6
|
+
name_node = doc.root.find_first("/row/name")
|
7
|
+
name = name_node ? name_node.content.strip : nil
|
8
|
+
|
9
|
+
values = {}
|
10
|
+
|
11
|
+
doc.find("/row/columns/column").each do |node|
|
12
|
+
values[node.find_first("name").content.strip.unpack('m').first] = node.find_first("value").content.strip.unpack("m").first
|
13
|
+
end
|
14
|
+
|
15
|
+
[name, values]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/stores/hbase.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Stores
|
3
|
+
class HBase
|
4
|
+
attr_reader :options
|
5
|
+
|
6
|
+
def initialize(options)
|
7
|
+
@options = options
|
8
|
+
@table = ::HBase::HTable.new(options[:url])
|
9
|
+
end
|
10
|
+
|
11
|
+
def get(key)
|
12
|
+
@table.get(key)
|
13
|
+
end
|
14
|
+
|
15
|
+
def put(key, values)
|
16
|
+
@table.put(key, values)
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
self.class == other.class && self.options == other.options
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Stores
|
3
|
+
class InMemory
|
4
|
+
attr_reader :options
|
5
|
+
|
6
|
+
def initialize(options={})
|
7
|
+
@options = options
|
8
|
+
@table = {}
|
9
|
+
end
|
10
|
+
|
11
|
+
def get(key)
|
12
|
+
@table[key.to_s]
|
13
|
+
end
|
14
|
+
|
15
|
+
def put(key, values)
|
16
|
+
@table[key.to_s] = values
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
self.class == other.class && self.options == other.options
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Mandy
|
2
|
+
class ArraySerializer
|
3
|
+
|
4
|
+
SEPERATOR = '|' unless defined?(SEPERATOR)
|
5
|
+
|
6
|
+
attr_reader :items
|
7
|
+
|
8
|
+
def initialize(items)
|
9
|
+
@items = items || []
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_s
|
13
|
+
@items.join(SEPERATOR)
|
14
|
+
end
|
15
|
+
|
16
|
+
def ==(other)
|
17
|
+
(self.class == other.class && self.items == other.items) || (other.is_a?(Array) && self.items == other)
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_a
|
21
|
+
@items
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.from_s(str)
|
25
|
+
str.split(SEPERATOR)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.tuples_from_s(str)
|
29
|
+
from_s(str).map {|s| Tuple.from_s(s) }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Mandy
|
2
|
+
module IO
|
3
|
+
module InputFormatting
|
4
|
+
def input_deserialize_key(key)
|
5
|
+
return key if input_format && input_format == :plain
|
6
|
+
deserialize_key(key)
|
7
|
+
end
|
8
|
+
|
9
|
+
def input_deserialize_value(value)
|
10
|
+
return value if input_format && input_format == :plain
|
11
|
+
deserialize_value(value)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
module OutputFormatting
|
16
|
+
def output_serialize_key(key)
|
17
|
+
return key if output_format && output_format == :plain
|
18
|
+
serialize_key(key)
|
19
|
+
end
|
20
|
+
|
21
|
+
def output_serialize_value(value)
|
22
|
+
return value if output_format && output_format == :plain
|
23
|
+
serialize_value(value)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Mandy
|
2
|
+
class Tuple
|
3
|
+
|
4
|
+
SEPERATOR = ',' unless defined?(SEPERATOR)
|
5
|
+
|
6
|
+
attr_accessor :name, :value
|
7
|
+
|
8
|
+
def initialize(name, value, name_accessor = nil, value_accessor = nil)
|
9
|
+
@name, @value = name, value
|
10
|
+
alias_accessor(name_accessor, :name) unless name_accessor.nil?
|
11
|
+
alias_accessor(value_accessor, :value) unless value_accessor.nil?
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
%(#{@name}#{SEPERATOR}#{@value})
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.from_s(str)
|
19
|
+
parts = str.split(SEPERATOR)
|
20
|
+
raise "Can't create tuple from #{str.inspect}. Format should be 'A#{SEPERATOR}B'" unless parts.size==2
|
21
|
+
new(*parts)
|
22
|
+
end
|
23
|
+
|
24
|
+
def inspect
|
25
|
+
%(<Tuple #{self.to_s}>)
|
26
|
+
end
|
27
|
+
|
28
|
+
def ==(other)
|
29
|
+
return false unless self.class == other.class
|
30
|
+
self.name == other.name && self.value == other.value
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def alias_accessor(new_accessor, old_accessor)
|
36
|
+
self.class.send(:alias_method, new_accessor, old_accessor)
|
37
|
+
self.class.send(:alias_method, :"#{new_accessor}=", :"#{old_accessor}=")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/task.rb
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
module Mandy
|
2
|
+
class Task
|
3
|
+
JSON_PAYLOAD_KEY = "json"
|
4
|
+
KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
|
5
|
+
NUMERIC_PADDING = 16
|
6
|
+
|
7
|
+
attr_reader :input_format, :output_format
|
8
|
+
|
9
|
+
def initialize(input=STDIN, output=STDOUT, input_format = nil, output_format = nil)
|
10
|
+
@input, @output = input, output
|
11
|
+
@input_format, @output_format = input_format, output_format
|
12
|
+
end
|
13
|
+
|
14
|
+
def emit(key, value=nil)
|
15
|
+
key = 'nil' if key.nil?
|
16
|
+
@output.puts(value.nil? ? key.to_s : "#{output_serialize_key(key)}\t#{output_serialize_value(value)}")
|
17
|
+
end
|
18
|
+
|
19
|
+
def get(store, key)
|
20
|
+
Mandy.stores[store].get(key)
|
21
|
+
end
|
22
|
+
|
23
|
+
def put(store, key, values)
|
24
|
+
Mandy.stores[store].put(key, values)
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
def pad(key)
|
29
|
+
key_parts = key.to_s.split(".")
|
30
|
+
key_parts[0] = key_parts.first.rjust(NUMERIC_PADDING, '0')
|
31
|
+
key_parts.join('.')
|
32
|
+
end
|
33
|
+
|
34
|
+
def update_status(message)
|
35
|
+
STDERR.puts("reporter:status:#{message}")
|
36
|
+
end
|
37
|
+
|
38
|
+
def update_counter(group, counter, count)
|
39
|
+
STDERR.puts("reporter:counter:#{group},#{counter},#{count}")
|
40
|
+
end
|
41
|
+
|
42
|
+
def parameter(name)
|
43
|
+
return find_json_param(name) if json_provided?
|
44
|
+
ENV[name.to_s]
|
45
|
+
end
|
46
|
+
|
47
|
+
def find_json_param(name)
|
48
|
+
@json_args ||= JSON.parse(URI.decode(ENV[JSON_PAYLOAD_KEY]))
|
49
|
+
@json_args[name.to_s]
|
50
|
+
end
|
51
|
+
|
52
|
+
def json_provided?
|
53
|
+
!ENV[JSON_PAYLOAD_KEY].nil?
|
54
|
+
end
|
55
|
+
|
56
|
+
def deserialize_key(key)
|
57
|
+
key
|
58
|
+
end
|
59
|
+
|
60
|
+
def deserialize_value(value)
|
61
|
+
value
|
62
|
+
end
|
63
|
+
|
64
|
+
def serialize_key(key)
|
65
|
+
key = pad(key) if key.is_a?(Numeric) && key.to_s.length < NUMERIC_PADDING
|
66
|
+
key
|
67
|
+
end
|
68
|
+
|
69
|
+
def serialize_value(value)
|
70
|
+
value = ArraySerializer.new(value) if value.is_a?(Array)
|
71
|
+
value.to_s
|
72
|
+
end
|
73
|
+
|
74
|
+
def output_serialize_key(key)
|
75
|
+
serialize_key(key)
|
76
|
+
end
|
77
|
+
|
78
|
+
def output_serialize_value(value)
|
79
|
+
serialize_value(value)
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
data/lib/test_runner.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
module Mandy
|
2
|
+
class TestRunner
|
3
|
+
attr_reader :job
|
4
|
+
|
5
|
+
def initialize(job=Mandy::Job.jobs.first.name, opts={})
|
6
|
+
ENV[Mandy::Task::JSON_PAYLOAD_KEY] = opts[:parameters].to_json
|
7
|
+
@job = Mandy::Job.find_by_name(job)
|
8
|
+
end
|
9
|
+
|
10
|
+
def map(input_stream, output_stream=StringIO.new(''), &blk)
|
11
|
+
input_stream = input_from_array(input_stream) if input_stream.is_a?(Array)
|
12
|
+
input_stream = StringIO.new(input_stream) if input_stream.is_a?(String)
|
13
|
+
@job.run_map(input_stream, output_stream, &blk)
|
14
|
+
output_stream.rewind
|
15
|
+
output_stream
|
16
|
+
end
|
17
|
+
|
18
|
+
def reduce(input_stream, output_stream=StringIO.new(''), &blk)
|
19
|
+
input_stream = input_from_hash(input_stream) if input_stream.is_a?(Hash)
|
20
|
+
input_stream = StringIO.new(input_stream) if input_stream.is_a?(String)
|
21
|
+
@job.run_reduce(input_stream, output_stream, &blk)
|
22
|
+
output_stream.rewind
|
23
|
+
output_stream
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.end_to_end(verbose=false)
|
27
|
+
CompositeJobRunner.new(Mandy::Job.jobs,verbose)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def input_from_array(input)
|
33
|
+
input.join("\n")
|
34
|
+
end
|
35
|
+
|
36
|
+
def input_from_hash(input)
|
37
|
+
output = []
|
38
|
+
input.each do |key, values|
|
39
|
+
output << "#{key}\t#{values}" and next unless values.is_a?(Array)
|
40
|
+
values.each { |value| output << "#{key}\t#{value}" }
|
41
|
+
end
|
42
|
+
input_from_array(output.sort)
|
43
|
+
end
|
44
|
+
|
45
|
+
class CompositeJobRunner
|
46
|
+
def initialize(jobs, verbose=false)
|
47
|
+
@jobs = jobs
|
48
|
+
@verbose = verbose
|
49
|
+
@job_runners = @jobs.map { |job| Mandy::TestRunner.new(job.name) }
|
50
|
+
end
|
51
|
+
|
52
|
+
def execute(input_stream, output_stream=StringIO.new(''))
|
53
|
+
map_temp = StringIO.new('')
|
54
|
+
reduce_temp = StringIO.new('')
|
55
|
+
@job_runners.each_with_index do |runner, index|
|
56
|
+
runner.map(input_stream, map_temp)
|
57
|
+
if @verbose
|
58
|
+
puts "#{runner.job.name} [MAP] #{map_temp.readlines.inspect}"
|
59
|
+
map_temp.rewind
|
60
|
+
end
|
61
|
+
reduce_input = StringIO.new(map_temp.readlines.sort.join(''))
|
62
|
+
runner.reduce(reduce_input, (index==@job_runners.size-1 ? output_stream : reduce_temp))
|
63
|
+
if @verbose
|
64
|
+
puts "#{runner.job.name} [RED] #{reduce_temp.readlines.inspect}"
|
65
|
+
reduce_temp.rewind
|
66
|
+
end
|
67
|
+
input_stream = reduce_temp
|
68
|
+
map_temp = StringIO.new('')
|
69
|
+
reduce_temp = StringIO.new('')
|
70
|
+
end
|
71
|
+
output_stream
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/readme.md
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
Mandy - Simplified Hadoop distribution for Ruby code
|
2
|
+
====================================================
|
3
|
+
|
4
|
+
Mandy hides the differences and complexities between running map/reduce tasks locally or distributed or in test environments.
|
5
|
+
|
6
|
+
It provides a simple DSL to define new jobs for distribution. See examples/word_count.rb for a demo of some functionality.
|
7
|
+
Run the word count example locally with...
|
8
|
+
|
9
|
+
mandy-local examples/word_count.rb examples/alice.txt examples/output
|
10
|
+
|
11
|
+
Mandy is licensed under the MIT Licence, please see LICENCE for further information.
|
metadata
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mandy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.14
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andy Kent
|
8
|
+
- Paul Ingles
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2009-07-09 00:00:00 +01:00
|
14
|
+
default_executable:
|
15
|
+
dependencies: []
|
16
|
+
|
17
|
+
description: Map/Reduce
|
18
|
+
email: andy.kent@me.com
|
19
|
+
executables:
|
20
|
+
- mandy
|
21
|
+
- mandy-hadoop
|
22
|
+
- mandy-local
|
23
|
+
- mandy-map
|
24
|
+
- mandy-put
|
25
|
+
- mandy-get
|
26
|
+
- mandy-reduce
|
27
|
+
- mandy-rm
|
28
|
+
- mandy-install
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files: []
|
32
|
+
|
33
|
+
files:
|
34
|
+
- bin/mandy-hadoop
|
35
|
+
- bin/mandy-local
|
36
|
+
- bin/mandy-map
|
37
|
+
- bin/mandy-get
|
38
|
+
- bin/mandy-put
|
39
|
+
- bin/mandy-reduce
|
40
|
+
- readme.md
|
41
|
+
- Rakefile
|
42
|
+
- lib/mandy.rb
|
43
|
+
- lib/support/tuple.rb
|
44
|
+
- lib/support/formatting.rb
|
45
|
+
- lib/support/array_serializer.rb
|
46
|
+
- lib/task.rb
|
47
|
+
- lib/dsl.rb
|
48
|
+
- lib/job.rb
|
49
|
+
- lib/mappers/base_mapper.rb
|
50
|
+
- lib/mappers/transpose_mapper.rb
|
51
|
+
- lib/mappers/pass_through_mapper.rb
|
52
|
+
- lib/packer.rb
|
53
|
+
- lib/reducers/base_reducer.rb
|
54
|
+
- lib/reducers/transpose_reducer.rb
|
55
|
+
- lib/reducers/pass_through_reducer.rb
|
56
|
+
- lib/reducers/sum_reducer.rb
|
57
|
+
- lib/reducers/max_reducer.rb
|
58
|
+
- lib/reducers/min_reducer.rb
|
59
|
+
- lib/serializers/json.rb
|
60
|
+
- lib/stores/hbase.rb
|
61
|
+
- lib/stores/in_memory.rb
|
62
|
+
- lib/ruby-hbase.rb
|
63
|
+
- lib/ruby-hbase/hbase_table.rb
|
64
|
+
- lib/ruby-hbase/scanner.rb
|
65
|
+
- lib/ruby-hbase/version.rb
|
66
|
+
- lib/ruby-hbase/xml_decoder.rb
|
67
|
+
- lib/test_runner.rb
|
68
|
+
has_rdoc: true
|
69
|
+
homepage: http://github.com/trafficbroker/mandy
|
70
|
+
licenses: []
|
71
|
+
|
72
|
+
post_install_message:
|
73
|
+
rdoc_options: []
|
74
|
+
|
75
|
+
require_paths:
|
76
|
+
- lib
|
77
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: "0"
|
82
|
+
version:
|
83
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: "0"
|
88
|
+
version:
|
89
|
+
requirements: []
|
90
|
+
|
91
|
+
rubyforge_project:
|
92
|
+
rubygems_version: 1.3.4
|
93
|
+
signing_key:
|
94
|
+
specification_version: 2
|
95
|
+
summary: Map/Reduce
|
96
|
+
test_files: []
|
97
|
+
|