trafficbroker-mandy 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/job.rb +9 -0
- data/lib/mandy.rb +16 -4
- data/lib/mappers/base_mapper.rb +3 -20
- data/lib/reducers/base_reducer.rb +1 -17
- data/lib/ruby-hbase/hbase_table.rb +166 -0
- data/lib/ruby-hbase/scanner.rb +55 -0
- data/lib/ruby-hbase/version.rb +9 -0
- data/lib/ruby-hbase/xml_decoder.rb +18 -0
- data/lib/ruby-hbase.rb +10 -0
- data/lib/stores/hbase.rb +24 -0
- data/lib/stores/in_memory.rb +24 -0
- data/lib/task.rb +30 -0
- data/lib/test_runner.rb +2 -2
- metadata +9 -1
data/lib/job.rb
CHANGED
@@ -34,6 +34,15 @@ module Mandy
|
|
34
34
|
set('mapred.reduce.tasks', count)
|
35
35
|
end
|
36
36
|
|
37
|
+
def store(type, name, options={})
|
38
|
+
Mandy.stores[name] = case type
|
39
|
+
when :hbase
|
40
|
+
Stores::HBase.new(options)
|
41
|
+
else
|
42
|
+
raise "Unknown store type #{type}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
37
46
|
def map(klass=nil, &blk)
|
38
47
|
@mapper_class = klass || Mandy::Mappers::Base.compile(&blk)
|
39
48
|
end
|
data/lib/mandy.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
%w(
|
2
|
+
task
|
3
|
+
dsl
|
4
|
+
job
|
2
5
|
support/tuple
|
3
6
|
support/array_serializer
|
4
7
|
mappers/base_mapper
|
@@ -7,8 +10,17 @@
|
|
7
10
|
reducers/pass_through_reducer
|
8
11
|
reducers/sum_reducer
|
9
12
|
reducers/max_reducer
|
10
|
-
reducers/min_reducer
|
11
|
-
|
12
|
-
|
13
|
+
reducers/min_reducer
|
14
|
+
stores/hbase
|
15
|
+
stores/in_memory
|
13
16
|
test_runner
|
14
|
-
|
17
|
+
ruby-hbase
|
18
|
+
).each {|file| require File.join(File.dirname(__FILE__), file) }
|
19
|
+
|
20
|
+
module Mandy
|
21
|
+
class << self
|
22
|
+
def stores
|
23
|
+
@stores||={}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/mappers/base_mapper.rb
CHANGED
@@ -1,13 +1,6 @@
|
|
1
1
|
module Mandy
|
2
2
|
module Mappers
|
3
|
-
class Base
|
4
|
-
|
5
|
-
KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
|
6
|
-
|
7
|
-
def initialize(input=STDIN, output=STDOUT)
|
8
|
-
@input, @output = input, output
|
9
|
-
end
|
10
|
-
|
3
|
+
class Base < Mandy::Task
|
11
4
|
def self.compile(&blk)
|
12
5
|
Class.new(Mandy::Mappers::Base) do
|
13
6
|
self.class_eval do
|
@@ -15,7 +8,7 @@ module Mandy
|
|
15
8
|
end
|
16
9
|
end
|
17
10
|
end
|
18
|
-
|
11
|
+
|
19
12
|
def execute
|
20
13
|
@input.each_line do |line|
|
21
14
|
key, value = line.split(KEY_VALUE_SEPERATOR)
|
@@ -24,22 +17,12 @@ module Mandy
|
|
24
17
|
mapper(key, value)
|
25
18
|
end
|
26
19
|
end
|
27
|
-
|
28
|
-
def emit(key, value=nil)
|
29
|
-
key = 'nil' if key.nil?
|
30
|
-
@output.puts(value.nil? ? key.to_s : "#{serialize(key)}\t#{serialize(value)}")
|
31
|
-
end
|
32
|
-
|
20
|
+
|
33
21
|
private
|
34
22
|
|
35
23
|
def mapper(key,value)
|
36
24
|
#nil
|
37
25
|
end
|
38
|
-
|
39
|
-
def serialize(value)
|
40
|
-
value = ArraySerializer.new(value) if value.is_a?(Array)
|
41
|
-
value.to_s
|
42
|
-
end
|
43
26
|
end
|
44
27
|
end
|
45
28
|
end
|
@@ -1,12 +1,6 @@
|
|
1
1
|
module Mandy
|
2
2
|
module Reducers
|
3
|
-
class Base
|
4
|
-
KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
|
5
|
-
|
6
|
-
def initialize(input=STDIN, output=STDOUT)
|
7
|
-
@input, @output = input, output
|
8
|
-
end
|
9
|
-
|
3
|
+
class Base < Mandy::Task
|
10
4
|
def self.compile(&blk)
|
11
5
|
Class.new(Mandy::Reducers::Base) do
|
12
6
|
self.class_eval do
|
@@ -30,21 +24,11 @@ module Mandy
|
|
30
24
|
reducer(last_key, values)
|
31
25
|
end
|
32
26
|
|
33
|
-
def emit(key, value=nil)
|
34
|
-
key = 'nil' if key.nil?
|
35
|
-
@output.puts(value.nil? ? key.to_s : "#{serialize(key)}\t#{serialize(value)}")
|
36
|
-
end
|
37
|
-
|
38
27
|
private
|
39
28
|
|
40
29
|
def reducer(key,values)
|
41
30
|
#nil
|
42
31
|
end
|
43
|
-
|
44
|
-
def serialize(value)
|
45
|
-
value = ArraySerializer.new(value) if value.is_a?(Array)
|
46
|
-
value.to_s
|
47
|
-
end
|
48
32
|
end
|
49
33
|
end
|
50
34
|
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
module HBase
|
2
|
+
class RowNotFound < Exception
|
3
|
+
def initialize(msg=nil)
|
4
|
+
super
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
class HTable
|
9
|
+
include XmlDecoder
|
10
|
+
|
11
|
+
def initialize(table_uri)
|
12
|
+
@table_uri = table_uri
|
13
|
+
|
14
|
+
@uri = URI.parse(table_uri)
|
15
|
+
|
16
|
+
@host, @table_name = @uri.host, @uri.path.split("/").last
|
17
|
+
end
|
18
|
+
|
19
|
+
def name
|
20
|
+
@table_name
|
21
|
+
end
|
22
|
+
|
23
|
+
######################
|
24
|
+
# Meta-type requests
|
25
|
+
|
26
|
+
def start_keys
|
27
|
+
raise NotImplementedError
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def column_descriptors
|
32
|
+
column_families = []
|
33
|
+
|
34
|
+
# get the xml for the column descriptors
|
35
|
+
response = Net::HTTP.get_response(@uri.host, "/api/#{@table_name}", @uri.port)
|
36
|
+
body = response.body
|
37
|
+
|
38
|
+
# parse the xml into a document
|
39
|
+
doc = XML::Parser.string(body).parse
|
40
|
+
|
41
|
+
doc.find("/table/columnfamilies/columnfamily").each do |node|
|
42
|
+
colfam = {}
|
43
|
+
colfam[:name] = node.find_first("name").content.strip.chop
|
44
|
+
column_families << colfam
|
45
|
+
end
|
46
|
+
column_families
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
#####################
|
51
|
+
# Standard CRUD ops
|
52
|
+
|
53
|
+
DEFAULT_GET_OPTIONS = {:timestamp => nil, :columns => nil}
|
54
|
+
|
55
|
+
def get(key, options = {})
|
56
|
+
opts = DEFAULT_GET_OPTIONS.merge(options)
|
57
|
+
|
58
|
+
columns = Array(opts.delete(:columns)).compact
|
59
|
+
timestamp = opts.delete(:timestamp)
|
60
|
+
timestamp = (timestamp.to_f * 1000).to_i.to_s if timestamp
|
61
|
+
|
62
|
+
Net::HTTP.start(@uri.host, @uri.port) do |session|
|
63
|
+
columns_query = columns.map{ |name| "column=#{name}" }.join("&")
|
64
|
+
|
65
|
+
ts_section = timestamp ? "/#{timestamp}" : ""
|
66
|
+
|
67
|
+
query_string = "?" + columns_query
|
68
|
+
|
69
|
+
query = "/api/#{@table_name}/row/#{url_encode(key)}#{ts_section}#{query_string}"
|
70
|
+
response = session.get(query, {"Accept" => "*/*"})
|
71
|
+
|
72
|
+
case response.code.to_i
|
73
|
+
when 200 #success!
|
74
|
+
body = response.body
|
75
|
+
parse_row_result(body).last
|
76
|
+
when 204 #no data - probably an incorrect colname
|
77
|
+
raise "Didn't get any data back - check your column names!"
|
78
|
+
when 404
|
79
|
+
raise RowNotFound, "Could not find row '#{key}'"
|
80
|
+
else
|
81
|
+
nil
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def put(key, keys_and_values, timestamp = nil)
|
87
|
+
Net::HTTP.start(@uri.host, @uri.port) do |session|
|
88
|
+
xml = "<columns>"
|
89
|
+
|
90
|
+
ts_section = timestamp ? "/#{(timestamp.to_f * 1000).to_i}" : ""
|
91
|
+
|
92
|
+
keys_and_values.each do |name, value|
|
93
|
+
xml << "<column><name>#{name}</name><value>#{[value.to_s].pack("m")}</value></column>"
|
94
|
+
end
|
95
|
+
|
96
|
+
xml << "</columns>"
|
97
|
+
|
98
|
+
query = "/api/#{@table_name}/row/#{url_encode(key)}#{ts_section}"
|
99
|
+
response = session.post(query, xml, {"Content-type" => "text/xml"})
|
100
|
+
|
101
|
+
case response.code.to_i
|
102
|
+
when 200
|
103
|
+
true
|
104
|
+
else
|
105
|
+
unexpected_response(response)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def delete(row, columns = nil, timestamp = nil)
|
111
|
+
Net::HTTP.start(@uri.host, @uri.port) do |session|
|
112
|
+
columns_query = Array(columns).compact.map{ |name| "column=#{name}" }.join("&")
|
113
|
+
|
114
|
+
response = session.delete("/api/#{@table_name}/row/#{row}?#{columns_query}")
|
115
|
+
case response.code.to_i
|
116
|
+
when 202
|
117
|
+
return true
|
118
|
+
else
|
119
|
+
unexpected_response(response)
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
#######################
|
126
|
+
# Scanning interface
|
127
|
+
|
128
|
+
def get_scanner(start_row, end_row, timestamp = nil, columns = nil)
|
129
|
+
start_row_query = start_row ? "start_row=#{start_row}" : nil
|
130
|
+
end_row_query = end_row ? "end_row=#{end_row}" : nil
|
131
|
+
timestamp_section = timestamp ? "/#{(timestamp.to_f * 1000).to_i}" : nil
|
132
|
+
columns_section = columns ? columns.map{ |col| "column=#{col}" }.join("&") : nil
|
133
|
+
|
134
|
+
query_string = [start_row_query, end_row_query,
|
135
|
+
timestamp_section, columns_section].compact.join("&")
|
136
|
+
|
137
|
+
path = ""
|
138
|
+
|
139
|
+
# open the scanner
|
140
|
+
Net::HTTP.start(@uri.host, @uri.port) do |session|
|
141
|
+
response = session.post("/api/#{@table_name}/scanner?#{query_string}",
|
142
|
+
"", {"Accept" => "text/xml"}
|
143
|
+
)
|
144
|
+
|
145
|
+
case response.code.to_i
|
146
|
+
when 201
|
147
|
+
# redirect - grab the path and send
|
148
|
+
Scanner.new(self, "http://#{@uri.host}:#{@uri.port}" + response["Location"])
|
149
|
+
else
|
150
|
+
unexpected_response(response)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
private
|
157
|
+
|
158
|
+
def url_encode(str)
|
159
|
+
ERB::Util.url_encode(str)
|
160
|
+
end
|
161
|
+
|
162
|
+
def unexpected_response(response)
|
163
|
+
raise "Unexpected response code #{response.code.to_i}:\n#{response.body}"
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module HBase
|
2
|
+
class Scanner
|
3
|
+
include XmlDecoder
|
4
|
+
|
5
|
+
def initialize(table, scanner_uri)
|
6
|
+
@table, @scanner_uri = table, scanner_uri
|
7
|
+
end
|
8
|
+
|
9
|
+
def close
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
def next
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
def each
|
18
|
+
parsed_uri = URI.parse(@scanner_uri)
|
19
|
+
Net::HTTP.start(parsed_uri.host, parsed_uri.port) do |session|
|
20
|
+
while true
|
21
|
+
response = session.post(@scanner_uri, "")
|
22
|
+
|
23
|
+
case response.code.to_i
|
24
|
+
when 404
|
25
|
+
# over
|
26
|
+
break
|
27
|
+
when 200
|
28
|
+
# item
|
29
|
+
yield *parse_row_result(response.body)
|
30
|
+
else
|
31
|
+
# error
|
32
|
+
raise "Unexpected response code #{response.code}, body:\n#{response.body}"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# def parse_row(xml)
|
41
|
+
# doc = REXML::Document.new(xml)
|
42
|
+
#
|
43
|
+
# result = {}
|
44
|
+
#
|
45
|
+
# doc.root.each_element("/row/column") do |column|
|
46
|
+
# name = column.get_elements("name")[0].text.strip
|
47
|
+
# value = column.get_elements("value")[0].text.strip.unpack("m").first
|
48
|
+
# result[name] = value
|
49
|
+
# end
|
50
|
+
#
|
51
|
+
# [doc.root.get_elements("name")[0].text.strip, result]
|
52
|
+
# end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module HBase
|
2
|
+
module XmlDecoder
|
3
|
+
def parse_row_result(xml)
|
4
|
+
doc = XML::Parser.string(xml).parse
|
5
|
+
|
6
|
+
name_node = doc.root.find_first("/row/name")
|
7
|
+
name = name_node ? name_node.content.strip : nil
|
8
|
+
|
9
|
+
values = {}
|
10
|
+
|
11
|
+
doc.find("/row/columns/column").each do |node|
|
12
|
+
values[node.find_first("name").content.strip.unpack('m').first] = node.find_first("value").content.strip.unpack("m").first
|
13
|
+
end
|
14
|
+
|
15
|
+
[name, values]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/ruby-hbase.rb
ADDED
data/lib/stores/hbase.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Stores
|
3
|
+
class HBase
|
4
|
+
attr_reader :options
|
5
|
+
|
6
|
+
def initialize(options)
|
7
|
+
@options = options
|
8
|
+
@table = ::HBase::HTable.new(options[:url])
|
9
|
+
end
|
10
|
+
|
11
|
+
def get(key)
|
12
|
+
@table.get(key)
|
13
|
+
end
|
14
|
+
|
15
|
+
def put(key, values)
|
16
|
+
@table.put(key, values)
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
self.class == other.class && self.options == other.options
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Mandy
|
2
|
+
module Stores
|
3
|
+
class InMemory
|
4
|
+
attr_reader :options
|
5
|
+
|
6
|
+
def initialize(options={})
|
7
|
+
@options = options
|
8
|
+
@table = {}
|
9
|
+
end
|
10
|
+
|
11
|
+
def get(key)
|
12
|
+
@table[key.to_s]
|
13
|
+
end
|
14
|
+
|
15
|
+
def put(key, values)
|
16
|
+
@table[key.to_s] = values
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
self.class == other.class && self.options == other.options
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/task.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
module Mandy
|
2
|
+
class Task
|
3
|
+
KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
|
4
|
+
|
5
|
+
def initialize(input=STDIN, output=STDOUT)
|
6
|
+
@input, @output = input, output
|
7
|
+
end
|
8
|
+
|
9
|
+
def emit(key, value=nil)
|
10
|
+
key = 'nil' if key.nil?
|
11
|
+
@output.puts(value.nil? ? key.to_s : "#{serialize(key)}\t#{serialize(value)}")
|
12
|
+
end
|
13
|
+
|
14
|
+
def get(store, key)
|
15
|
+
Mandy.stores[store].get(key)
|
16
|
+
end
|
17
|
+
|
18
|
+
def put(store, key, values)
|
19
|
+
Mandy.stores[store].put(key, values)
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
|
25
|
+
def serialize(value)
|
26
|
+
value = ArraySerializer.new(value) if value.is_a?(Array)
|
27
|
+
value.to_s
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/test_runner.rb
CHANGED
@@ -8,7 +8,7 @@ module Mandy
|
|
8
8
|
|
9
9
|
def map(input_stream, output_stream=StringIO.new(''), &blk)
|
10
10
|
input_stream = input_from_array(input_stream) if input_stream.is_a?(Array)
|
11
|
-
input_stream = StringIO.new(input_stream
|
11
|
+
input_stream = StringIO.new(input_stream) if input_stream.is_a?(String)
|
12
12
|
@job.run_map(input_stream, output_stream, &blk)
|
13
13
|
output_stream.rewind
|
14
14
|
output_stream
|
@@ -16,7 +16,7 @@ module Mandy
|
|
16
16
|
|
17
17
|
def reduce(input_stream, output_stream=StringIO.new(''), &blk)
|
18
18
|
input_stream = input_from_hash(input_stream) if input_stream.is_a?(Hash)
|
19
|
-
input_stream = StringIO.new(input_stream
|
19
|
+
input_stream = StringIO.new(input_stream) if input_stream.is_a?(String)
|
20
20
|
@job.run_reduce(input_stream, output_stream, &blk)
|
21
21
|
output_stream.rewind
|
22
22
|
output_stream
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: trafficbroker-mandy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Kent
|
@@ -39,6 +39,7 @@ files:
|
|
39
39
|
- lib/mandy.rb
|
40
40
|
- lib/support/tuple.rb
|
41
41
|
- lib/support/array_serializer.rb
|
42
|
+
- lib/task.rb
|
42
43
|
- lib/dsl.rb
|
43
44
|
- lib/job.rb
|
44
45
|
- lib/mappers/base_mapper.rb
|
@@ -48,6 +49,13 @@ files:
|
|
48
49
|
- lib/reducers/sum_reducer.rb
|
49
50
|
- lib/reducers/max_reducer.rb
|
50
51
|
- lib/reducers/min_reducer.rb
|
52
|
+
- lib/stores/hbase.rb
|
53
|
+
- lib/stores/in_memory.rb
|
54
|
+
- lib/ruby-hbase.rb
|
55
|
+
- lib/ruby-hbase/hbase_table.rb
|
56
|
+
- lib/ruby-hbase/scanner.rb
|
57
|
+
- lib/ruby-hbase/version.rb
|
58
|
+
- lib/ruby-hbase/xml_decoder.rb
|
51
59
|
- lib/test_runner.rb
|
52
60
|
has_rdoc: false
|
53
61
|
homepage:
|