trafficbroker-mandy 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/job.rb CHANGED
@@ -34,6 +34,15 @@ module Mandy
34
34
  set('mapred.reduce.tasks', count)
35
35
  end
36
36
 
37
+ def store(type, name, options={})
38
+ Mandy.stores[name] = case type
39
+ when :hbase
40
+ Stores::HBase.new(options)
41
+ else
42
+ raise "Unknown store type #{type}"
43
+ end
44
+ end
45
+
37
46
  def map(klass=nil, &blk)
38
47
  @mapper_class = klass || Mandy::Mappers::Base.compile(&blk)
39
48
  end
data/lib/mandy.rb CHANGED
@@ -1,4 +1,7 @@
1
1
  %w(
2
+ task
3
+ dsl
4
+ job
2
5
  support/tuple
3
6
  support/array_serializer
4
7
  mappers/base_mapper
@@ -7,8 +10,17 @@
7
10
  reducers/pass_through_reducer
8
11
  reducers/sum_reducer
9
12
  reducers/max_reducer
10
- reducers/min_reducer
11
- dsl
12
- job
13
+ reducers/min_reducer
14
+ stores/hbase
15
+ stores/in_memory
13
16
  test_runner
14
- ).each {|file| require File.join(File.dirname(__FILE__), file) }
17
+ ruby-hbase
18
+ ).each {|file| require File.join(File.dirname(__FILE__), file) }
19
+
20
+ module Mandy
21
+ class << self
22
+ def stores
23
+ @stores||={}
24
+ end
25
+ end
26
+ end
@@ -1,13 +1,6 @@
1
1
  module Mandy
2
2
  module Mappers
3
- class Base
4
-
5
- KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
6
-
7
- def initialize(input=STDIN, output=STDOUT)
8
- @input, @output = input, output
9
- end
10
-
3
+ class Base < Mandy::Task
11
4
  def self.compile(&blk)
12
5
  Class.new(Mandy::Mappers::Base) do
13
6
  self.class_eval do
@@ -15,7 +8,7 @@ module Mandy
15
8
  end
16
9
  end
17
10
  end
18
-
11
+
19
12
  def execute
20
13
  @input.each_line do |line|
21
14
  key, value = line.split(KEY_VALUE_SEPERATOR)
@@ -24,22 +17,12 @@ module Mandy
24
17
  mapper(key, value)
25
18
  end
26
19
  end
27
-
28
- def emit(key, value=nil)
29
- key = 'nil' if key.nil?
30
- @output.puts(value.nil? ? key.to_s : "#{serialize(key)}\t#{serialize(value)}")
31
- end
32
-
20
+
33
21
  private
34
22
 
35
23
  def mapper(key,value)
36
24
  #nil
37
25
  end
38
-
39
- def serialize(value)
40
- value = ArraySerializer.new(value) if value.is_a?(Array)
41
- value.to_s
42
- end
43
26
  end
44
27
  end
45
28
  end
@@ -1,12 +1,6 @@
1
1
  module Mandy
2
2
  module Reducers
3
- class Base
4
- KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
5
-
6
- def initialize(input=STDIN, output=STDOUT)
7
- @input, @output = input, output
8
- end
9
-
3
+ class Base < Mandy::Task
10
4
  def self.compile(&blk)
11
5
  Class.new(Mandy::Reducers::Base) do
12
6
  self.class_eval do
@@ -30,21 +24,11 @@ module Mandy
30
24
  reducer(last_key, values)
31
25
  end
32
26
 
33
- def emit(key, value=nil)
34
- key = 'nil' if key.nil?
35
- @output.puts(value.nil? ? key.to_s : "#{serialize(key)}\t#{serialize(value)}")
36
- end
37
-
38
27
  private
39
28
 
40
29
  def reducer(key,values)
41
30
  #nil
42
31
  end
43
-
44
- def serialize(value)
45
- value = ArraySerializer.new(value) if value.is_a?(Array)
46
- value.to_s
47
- end
48
32
  end
49
33
  end
50
34
  end
@@ -0,0 +1,166 @@
1
+ module HBase
2
+ class RowNotFound < Exception
3
+ def initialize(msg=nil)
4
+ super
5
+ end
6
+ end
7
+
8
+ class HTable
9
+ include XmlDecoder
10
+
11
+ def initialize(table_uri)
12
+ @table_uri = table_uri
13
+
14
+ @uri = URI.parse(table_uri)
15
+
16
+ @host, @table_name = @uri.host, @uri.path.split("/").last
17
+ end
18
+
19
+ def name
20
+ @table_name
21
+ end
22
+
23
+ ######################
24
+ # Meta-type requests
25
+
26
+ def start_keys
27
+ raise NotImplementedError
28
+ end
29
+
30
+
31
+ def column_descriptors
32
+ column_families = []
33
+
34
+ # get the xml for the column descriptors
35
+ response = Net::HTTP.get_response(@uri.host, "/api/#{@table_name}", @uri.port)
36
+ body = response.body
37
+
38
+ # parse the xml into a document
39
+ doc = XML::Parser.string(body).parse
40
+
41
+ doc.find("/table/columnfamilies/columnfamily").each do |node|
42
+ colfam = {}
43
+ colfam[:name] = node.find_first("name").content.strip.chop
44
+ column_families << colfam
45
+ end
46
+ column_families
47
+ end
48
+
49
+
50
+ #####################
51
+ # Standard CRUD ops
52
+
53
+ DEFAULT_GET_OPTIONS = {:timestamp => nil, :columns => nil}
54
+
55
+ def get(key, options = {})
56
+ opts = DEFAULT_GET_OPTIONS.merge(options)
57
+
58
+ columns = Array(opts.delete(:columns)).compact
59
+ timestamp = opts.delete(:timestamp)
60
+ timestamp = (timestamp.to_f * 1000).to_i.to_s if timestamp
61
+
62
+ Net::HTTP.start(@uri.host, @uri.port) do |session|
63
+ columns_query = columns.map{ |name| "column=#{name}" }.join("&")
64
+
65
+ ts_section = timestamp ? "/#{timestamp}" : ""
66
+
67
+ query_string = "?" + columns_query
68
+
69
+ query = "/api/#{@table_name}/row/#{url_encode(key)}#{ts_section}#{query_string}"
70
+ response = session.get(query, {"Accept" => "*/*"})
71
+
72
+ case response.code.to_i
73
+ when 200 #success!
74
+ body = response.body
75
+ parse_row_result(body).last
76
+ when 204 #no data - probably an incorrect colname
77
+ raise "Didn't get any data back - check your column names!"
78
+ when 404
79
+ raise RowNotFound, "Could not find row '#{key}'"
80
+ else
81
+ nil
82
+ end
83
+ end
84
+ end
85
+
86
+ def put(key, keys_and_values, timestamp = nil)
87
+ Net::HTTP.start(@uri.host, @uri.port) do |session|
88
+ xml = "<columns>"
89
+
90
+ ts_section = timestamp ? "/#{(timestamp.to_f * 1000).to_i}" : ""
91
+
92
+ keys_and_values.each do |name, value|
93
+ xml << "<column><name>#{name}</name><value>#{[value.to_s].pack("m")}</value></column>"
94
+ end
95
+
96
+ xml << "</columns>"
97
+
98
+ query = "/api/#{@table_name}/row/#{url_encode(key)}#{ts_section}"
99
+ response = session.post(query, xml, {"Content-type" => "text/xml"})
100
+
101
+ case response.code.to_i
102
+ when 200
103
+ true
104
+ else
105
+ unexpected_response(response)
106
+ end
107
+ end
108
+ end
109
+
110
+ def delete(row, columns = nil, timestamp = nil)
111
+ Net::HTTP.start(@uri.host, @uri.port) do |session|
112
+ columns_query = Array(columns).compact.map{ |name| "column=#{name}" }.join("&")
113
+
114
+ response = session.delete("/api/#{@table_name}/row/#{row}?#{columns_query}")
115
+ case response.code.to_i
116
+ when 202
117
+ return true
118
+ else
119
+ unexpected_response(response)
120
+ end
121
+
122
+ end
123
+ end
124
+
125
+ #######################
126
+ # Scanning interface
127
+
128
+ def get_scanner(start_row, end_row, timestamp = nil, columns = nil)
129
+ start_row_query = start_row ? "start_row=#{start_row}" : nil
130
+ end_row_query = end_row ? "end_row=#{end_row}" : nil
131
+ timestamp_section = timestamp ? "/#{(timestamp.to_f * 1000).to_i}" : nil
132
+ columns_section = columns ? columns.map{ |col| "column=#{col}" }.join("&") : nil
133
+
134
+ query_string = [start_row_query, end_row_query,
135
+ timestamp_section, columns_section].compact.join("&")
136
+
137
+ path = ""
138
+
139
+ # open the scanner
140
+ Net::HTTP.start(@uri.host, @uri.port) do |session|
141
+ response = session.post("/api/#{@table_name}/scanner?#{query_string}",
142
+ "", {"Accept" => "text/xml"}
143
+ )
144
+
145
+ case response.code.to_i
146
+ when 201
147
+ # redirect - grab the path and send
148
+ Scanner.new(self, "http://#{@uri.host}:#{@uri.port}" + response["Location"])
149
+ else
150
+ unexpected_response(response)
151
+ end
152
+ end
153
+ end
154
+
155
+
156
+ private
157
+
158
+ def url_encode(str)
159
+ ERB::Util.url_encode(str)
160
+ end
161
+
162
+ def unexpected_response(response)
163
+ raise "Unexpected response code #{response.code.to_i}:\n#{response.body}"
164
+ end
165
+ end
166
+ end
@@ -0,0 +1,55 @@
1
+ module HBase
2
+ class Scanner
3
+ include XmlDecoder
4
+
5
+ def initialize(table, scanner_uri)
6
+ @table, @scanner_uri = table, scanner_uri
7
+ end
8
+
9
+ def close
10
+
11
+ end
12
+
13
+ def next
14
+
15
+ end
16
+
17
+ def each
18
+ parsed_uri = URI.parse(@scanner_uri)
19
+ Net::HTTP.start(parsed_uri.host, parsed_uri.port) do |session|
20
+ while true
21
+ response = session.post(@scanner_uri, "")
22
+
23
+ case response.code.to_i
24
+ when 404
25
+ # over
26
+ break
27
+ when 200
28
+ # item
29
+ yield *parse_row_result(response.body)
30
+ else
31
+ # error
32
+ raise "Unexpected response code #{response.code}, body:\n#{response.body}"
33
+ end
34
+ end
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ # def parse_row(xml)
41
+ # doc = REXML::Document.new(xml)
42
+ #
43
+ # result = {}
44
+ #
45
+ # doc.root.each_element("/row/column") do |column|
46
+ # name = column.get_elements("name")[0].text.strip
47
+ # value = column.get_elements("value")[0].text.strip.unpack("m").first
48
+ # result[name] = value
49
+ # end
50
+ #
51
+ # [doc.root.get_elements("name")[0].text.strip, result]
52
+ # end
53
+
54
+ end
55
+ end
@@ -0,0 +1,9 @@
1
+ module RubyHbase #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 0
5
+ TINY = 4
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
@@ -0,0 +1,18 @@
1
+ module HBase
2
+ module XmlDecoder
3
+ def parse_row_result(xml)
4
+ doc = XML::Parser.string(xml).parse
5
+
6
+ name_node = doc.root.find_first("/row/name")
7
+ name = name_node ? name_node.content.strip : nil
8
+
9
+ values = {}
10
+
11
+ doc.find("/row/columns/column").each do |node|
12
+ values[node.find_first("name").content.strip.unpack('m').first] = node.find_first("value").content.strip.unpack("m").first
13
+ end
14
+
15
+ [name, values]
16
+ end
17
+ end
18
+ end
data/lib/ruby-hbase.rb ADDED
@@ -0,0 +1,10 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ require "rubygems"
4
+ require "net/http"
5
+ require "erb"
6
+ require "xml/libxml"
7
+
8
+ require "ruby-hbase/xml_decoder"
9
+ require "ruby-hbase/hbase_table"
10
+ require "ruby-hbase/scanner"
@@ -0,0 +1,24 @@
1
+ module Mandy
2
+ module Stores
3
+ class HBase
4
+ attr_reader :options
5
+
6
+ def initialize(options)
7
+ @options = options
8
+ @table = ::HBase::HTable.new(options[:url])
9
+ end
10
+
11
+ def get(key)
12
+ @table.get(key)
13
+ end
14
+
15
+ def put(key, values)
16
+ @table.put(key, values)
17
+ end
18
+
19
+ def ==(other)
20
+ self.class == other.class && self.options == other.options
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,24 @@
1
+ module Mandy
2
+ module Stores
3
+ class InMemory
4
+ attr_reader :options
5
+
6
+ def initialize(options={})
7
+ @options = options
8
+ @table = {}
9
+ end
10
+
11
+ def get(key)
12
+ @table[key.to_s]
13
+ end
14
+
15
+ def put(key, values)
16
+ @table[key.to_s] = values
17
+ end
18
+
19
+ def ==(other)
20
+ self.class == other.class && self.options == other.options
21
+ end
22
+ end
23
+ end
24
+ end
data/lib/task.rb ADDED
@@ -0,0 +1,30 @@
1
+ module Mandy
2
+ class Task
3
+ KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
4
+
5
+ def initialize(input=STDIN, output=STDOUT)
6
+ @input, @output = input, output
7
+ end
8
+
9
+ def emit(key, value=nil)
10
+ key = 'nil' if key.nil?
11
+ @output.puts(value.nil? ? key.to_s : "#{serialize(key)}\t#{serialize(value)}")
12
+ end
13
+
14
+ def get(store, key)
15
+ Mandy.stores[store].get(key)
16
+ end
17
+
18
+ def put(store, key, values)
19
+ Mandy.stores[store].put(key, values)
20
+ end
21
+
22
+ private
23
+
24
+
25
+ def serialize(value)
26
+ value = ArraySerializer.new(value) if value.is_a?(Array)
27
+ value.to_s
28
+ end
29
+ end
30
+ end
data/lib/test_runner.rb CHANGED
@@ -8,7 +8,7 @@ module Mandy
8
8
 
9
9
  def map(input_stream, output_stream=StringIO.new(''), &blk)
10
10
  input_stream = input_from_array(input_stream) if input_stream.is_a?(Array)
11
- input_stream = StringIO.new(input_stream.to_s) unless input_stream.is_a?(StringIO)
11
+ input_stream = StringIO.new(input_stream) if input_stream.is_a?(String)
12
12
  @job.run_map(input_stream, output_stream, &blk)
13
13
  output_stream.rewind
14
14
  output_stream
@@ -16,7 +16,7 @@ module Mandy
16
16
 
17
17
  def reduce(input_stream, output_stream=StringIO.new(''), &blk)
18
18
  input_stream = input_from_hash(input_stream) if input_stream.is_a?(Hash)
19
- input_stream = StringIO.new(input_stream.to_s) unless input_stream.is_a?(StringIO)
19
+ input_stream = StringIO.new(input_stream) if input_stream.is_a?(String)
20
20
  @job.run_reduce(input_stream, output_stream, &blk)
21
21
  output_stream.rewind
22
22
  output_stream
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: trafficbroker-mandy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Kent
@@ -39,6 +39,7 @@ files:
39
39
  - lib/mandy.rb
40
40
  - lib/support/tuple.rb
41
41
  - lib/support/array_serializer.rb
42
+ - lib/task.rb
42
43
  - lib/dsl.rb
43
44
  - lib/job.rb
44
45
  - lib/mappers/base_mapper.rb
@@ -48,6 +49,13 @@ files:
48
49
  - lib/reducers/sum_reducer.rb
49
50
  - lib/reducers/max_reducer.rb
50
51
  - lib/reducers/min_reducer.rb
52
+ - lib/stores/hbase.rb
53
+ - lib/stores/in_memory.rb
54
+ - lib/ruby-hbase.rb
55
+ - lib/ruby-hbase/hbase_table.rb
56
+ - lib/ruby-hbase/scanner.rb
57
+ - lib/ruby-hbase/version.rb
58
+ - lib/ruby-hbase/xml_decoder.rb
51
59
  - lib/test_runner.rb
52
60
  has_rdoc: false
53
61
  homepage: