trafficbroker-mandy 0.2.12 → 0.2.13
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/job.rb +19 -2
- data/lib/mandy.rb +6 -4
- data/lib/mappers/base_mapper.rb +4 -2
- data/lib/reducers/base_reducer.rb +4 -2
- data/lib/serializers/json.rb +13 -0
- data/lib/support/formatting.rb +27 -0
- data/lib/task.rb +30 -7
- metadata +3 -1
data/lib/job.rb
CHANGED
@@ -26,6 +26,15 @@ module Mandy
|
|
26
26
|
def mixin(*modules)
|
27
27
|
modules.each {|m| @modules << m}
|
28
28
|
end
|
29
|
+
alias_method :serialize, :mixin
|
30
|
+
|
31
|
+
def input_format(format)
|
32
|
+
@input_format = format
|
33
|
+
end
|
34
|
+
|
35
|
+
def output_format(format)
|
36
|
+
@output_format = format
|
37
|
+
end
|
29
38
|
|
30
39
|
def set(key, value)
|
31
40
|
@settings[key.to_s] = value.to_s
|
@@ -61,15 +70,23 @@ module Mandy
|
|
61
70
|
end
|
62
71
|
|
63
72
|
def run_map(input=STDIN, output=STDOUT, &blk)
|
64
|
-
|
73
|
+
@mapper_class.send(:include, Mandy::IO::OutputFormatting) unless reducer_defined?
|
74
|
+
mapper = @mapper_class.new(input, output, @input_format, @output_format)
|
65
75
|
yield(mapper) if blk
|
66
76
|
mapper.execute
|
67
77
|
end
|
68
78
|
|
69
79
|
def run_reduce(input=STDIN, output=STDOUT, &blk)
|
70
|
-
reducer = @reducer_class.new(input, output)
|
80
|
+
reducer = @reducer_class.new(input, output, @input_format, @output_format)
|
71
81
|
yield(reducer) if blk
|
72
82
|
reducer.execute
|
73
83
|
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def reducer_defined?
|
88
|
+
@reducer_class != Mandy::Reducers::PassThroughReducer
|
89
|
+
end
|
90
|
+
|
74
91
|
end
|
75
92
|
end
|
data/lib/mandy.rb
CHANGED
@@ -1,4 +1,9 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "json"
|
3
|
+
require "uri"
|
4
|
+
|
1
5
|
%w(
|
6
|
+
support/formatting
|
2
7
|
task
|
3
8
|
dsl
|
4
9
|
job
|
@@ -14,16 +19,13 @@
|
|
14
19
|
reducers/max_reducer
|
15
20
|
reducers/min_reducer
|
16
21
|
reducers/transpose_reducer
|
22
|
+
serializers/json
|
17
23
|
stores/hbase
|
18
24
|
stores/in_memory
|
19
25
|
test_runner
|
20
26
|
ruby-hbase
|
21
27
|
).each {|file| require File.join(File.dirname(__FILE__), file) }
|
22
28
|
|
23
|
-
require "rubygems"
|
24
|
-
require "json"
|
25
|
-
require "uri"
|
26
|
-
|
27
29
|
module Mandy
|
28
30
|
class << self
|
29
31
|
def stores
|
data/lib/mappers/base_mapper.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
module Mandy
|
2
2
|
module Mappers
|
3
3
|
class Base < Mandy::Task
|
4
|
+
include Mandy::IO::InputFormatting
|
5
|
+
|
4
6
|
def self.compile(&blk)
|
5
7
|
Class.new(Mandy::Mappers::Base) do
|
6
8
|
self.class_eval do
|
@@ -14,10 +16,10 @@ module Mandy
|
|
14
16
|
key, value = line.split(KEY_VALUE_SEPERATOR, 2)
|
15
17
|
key, value = nil, key if value.nil?
|
16
18
|
value.chomp!
|
17
|
-
mapper(key, value)
|
19
|
+
mapper(input_deserialize_key(key), input_deserialize_value(value))
|
18
20
|
end
|
19
21
|
end
|
20
|
-
|
22
|
+
|
21
23
|
private
|
22
24
|
|
23
25
|
def mapper(key,value)
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module Mandy
|
2
2
|
module Reducers
|
3
3
|
class Base < Mandy::Task
|
4
|
+
include Mandy::IO::OutputFormatting
|
5
|
+
|
4
6
|
def self.compile(&blk)
|
5
7
|
Class.new(Mandy::Reducers::Base) do
|
6
8
|
self.class_eval do
|
@@ -21,11 +23,11 @@ module Mandy
|
|
21
23
|
end
|
22
24
|
values << value
|
23
25
|
end
|
24
|
-
reducer(last_key, values)
|
26
|
+
reducer(deserialize_key(last_key), values.map {|v| deserialize_value(v) })
|
25
27
|
end
|
26
28
|
|
27
29
|
private
|
28
|
-
|
30
|
+
|
29
31
|
def reducer(key,values)
|
30
32
|
#nil
|
31
33
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Mandy
|
2
|
+
module IO
|
3
|
+
module InputFormatting
|
4
|
+
def input_deserialize_key(key)
|
5
|
+
return key if input_format && input_format == :plain
|
6
|
+
deserialize_key(key)
|
7
|
+
end
|
8
|
+
|
9
|
+
def input_deserialize_value(value)
|
10
|
+
return value if input_format && input_format == :plain
|
11
|
+
deserialize_value(value)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
module OutputFormatting
|
16
|
+
def output_serialize_key(key)
|
17
|
+
return key if output_format && output_format == :plain
|
18
|
+
serialize_key(key)
|
19
|
+
end
|
20
|
+
|
21
|
+
def output_serialize_value(value)
|
22
|
+
return value if output_format && output_format == :plain
|
23
|
+
serialize_value(value)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/task.rb
CHANGED
@@ -3,18 +3,19 @@ module Mandy
|
|
3
3
|
JSON_PAYLOAD_KEY = "json"
|
4
4
|
KEY_VALUE_SEPERATOR = "\t" unless defined?(KEY_VALUE_SEPERATOR)
|
5
5
|
NUMERIC_PADDING = 16
|
6
|
-
|
7
|
-
|
6
|
+
|
7
|
+
attr_reader :input_format, :output_format
|
8
|
+
|
9
|
+
def initialize(input=STDIN, output=STDOUT, input_format = nil, output_format = nil)
|
8
10
|
@input, @output = input, output
|
11
|
+
@input_format, @output_format = input_format, output_format
|
9
12
|
end
|
10
13
|
|
11
14
|
def emit(key, value=nil)
|
12
15
|
key = 'nil' if key.nil?
|
13
|
-
|
14
|
-
|
15
|
-
@output.puts(value.nil? ? key.to_s : "#{serialize(key)}\t#{serialize(value)}")
|
16
|
+
@output.puts(value.nil? ? key.to_s : "#{output_serialize_key(key)}\t#{output_serialize_value(value)}")
|
16
17
|
end
|
17
|
-
|
18
|
+
|
18
19
|
def get(store, key)
|
19
20
|
Mandy.stores[store].get(key)
|
20
21
|
end
|
@@ -43,10 +44,32 @@ module Mandy
|
|
43
44
|
def json_provided?
|
44
45
|
!ENV[JSON_PAYLOAD_KEY].nil?
|
45
46
|
end
|
47
|
+
|
48
|
+
def deserialize_key(key)
|
49
|
+
key
|
50
|
+
end
|
51
|
+
|
52
|
+
def deserialize_value(value)
|
53
|
+
value
|
54
|
+
end
|
55
|
+
|
56
|
+
def serialize_key(key)
|
57
|
+
key = pad(key) if key.is_a?(Numeric) && key.to_s.length < NUMERIC_PADDING
|
58
|
+
key
|
59
|
+
end
|
46
60
|
|
47
|
-
def
|
61
|
+
def serialize_value(value)
|
48
62
|
value = ArraySerializer.new(value) if value.is_a?(Array)
|
49
63
|
value.to_s
|
50
64
|
end
|
65
|
+
|
66
|
+
def output_serialize_key(key)
|
67
|
+
serialize_key(key)
|
68
|
+
end
|
69
|
+
|
70
|
+
def output_serialize_value(value)
|
71
|
+
serialize_value(value)
|
72
|
+
end
|
73
|
+
|
51
74
|
end
|
52
75
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: trafficbroker-mandy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Kent
|
@@ -40,6 +40,7 @@ files:
|
|
40
40
|
- Rakefile
|
41
41
|
- lib/mandy.rb
|
42
42
|
- lib/support/tuple.rb
|
43
|
+
- lib/support/formatting.rb
|
43
44
|
- lib/support/array_serializer.rb
|
44
45
|
- lib/task.rb
|
45
46
|
- lib/dsl.rb
|
@@ -54,6 +55,7 @@ files:
|
|
54
55
|
- lib/reducers/sum_reducer.rb
|
55
56
|
- lib/reducers/max_reducer.rb
|
56
57
|
- lib/reducers/min_reducer.rb
|
58
|
+
- lib/serializers/json.rb
|
57
59
|
- lib/stores/hbase.rb
|
58
60
|
- lib/stores/in_memory.rb
|
59
61
|
- lib/ruby-hbase.rb
|