hadoop-rubydsl 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +53 -0
- data/Rakefile +18 -0
- data/TODO +2 -0
- data/VERSION +1 -0
- data/bin/hadoop +276 -0
- data/bin/hadoop-ruby.sh +30 -0
- data/conf/hadoop-site.xml +19 -0
- data/examples/apachelog-v2-2.rb +18 -0
- data/examples/apachelog-v2.rb +25 -0
- data/examples/apachelog.rb +15 -0
- data/examples/hive_like_test.rb +14 -0
- data/examples/word_count_test.rb +7 -0
- data/hadoop-rubydsl.gemspec +79 -0
- data/lib/core.rb +108 -0
- data/lib/hive_like.rb +122 -0
- data/lib/init.rb +60 -0
- data/lib/java/.gitignore +1 -0
- data/lib/java/hadoop-ruby.jar +0 -0
- data/lib/log_analysis.rb +165 -0
- data/lib/mapred_factory.rb +43 -0
- data/lib/util.rb +11 -0
- data/lib/word_count.rb +76 -0
- data/spec/core_spec.rb +73 -0
- data/spec/example_spec.rb +82 -0
- data/spec/hive_like_spec.rb +58 -0
- data/spec/init_spec.rb +56 -0
- data/spec/log_analysis_spec.rb +119 -0
- data/spec/mapred_factory_spec.rb +42 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/util_spec.rb +15 -0
- data/spec/word_count_spec.rb +89 -0
- metadata +100 -0
data/lib/hive_like.rb
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
require 'core'
|
2
|
+
require 'enumerator'
|
3
|
+
|
4
|
+
module HadoopDsl::HiveLike
|
5
|
+
include HadoopDsl
|
6
|
+
|
7
|
+
AVAILABLE_METHODS = [:select, :create_table, :table]
|
8
|
+
|
9
|
+
# common
|
10
|
+
module HiveLikeMapRed
|
11
|
+
def pre_process(body)
|
12
|
+
processed = ""
|
13
|
+
body.each do |line|
|
14
|
+
next if line =~ /^#/
|
15
|
+
if line =~ /^(\w*)\s+(.*);$/
|
16
|
+
method = $1
|
17
|
+
args = sprit_and_marge_args($2)
|
18
|
+
processed << "#{method}(#{args})\n"
|
19
|
+
else
|
20
|
+
processed << line + "\n"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
processed
|
24
|
+
end
|
25
|
+
|
26
|
+
def sprit_and_marge_args(raw)
|
27
|
+
raw.gsub(/[\(\)]/, ' ').split.map do |s|
|
28
|
+
stripped = s.gsub(/[\s,"']/, '')
|
29
|
+
%Q!"#{stripped}"!
|
30
|
+
end.join(", ")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# controller
|
35
|
+
class HiveLikeSetup < BaseSetup
|
36
|
+
def load_data(inputs, table)
|
37
|
+
@from = inputs
|
38
|
+
@to = inputs.gsub(/#{File.basename(inputs)}$/, 'outputs')
|
39
|
+
end
|
40
|
+
|
41
|
+
def output_format
|
42
|
+
@conf.output_key_class = Text
|
43
|
+
@conf.output_value_class = Text
|
44
|
+
end
|
45
|
+
|
46
|
+
# might not need but occur error if not exists
|
47
|
+
def select(*args) end
|
48
|
+
|
49
|
+
include HiveLikeMapRed
|
50
|
+
end
|
51
|
+
|
52
|
+
class HiveLikeMapper < BaseMapper
|
53
|
+
def initialize(script, key, value)
|
54
|
+
super(script, HiveLikeMapperModel.new(key, value))
|
55
|
+
end
|
56
|
+
|
57
|
+
include HiveLikeMapRed
|
58
|
+
|
59
|
+
# model methods
|
60
|
+
def_delegators :@model, *AVAILABLE_METHODS
|
61
|
+
end
|
62
|
+
|
63
|
+
class HiveLikeReducer < BaseReducer
|
64
|
+
def initialize(script, key, values)
|
65
|
+
super(script, HiveLikeReducerModel.new(key, values))
|
66
|
+
end
|
67
|
+
|
68
|
+
include HiveLikeMapRed
|
69
|
+
|
70
|
+
# model methods
|
71
|
+
def_delegators :@model, *AVAILABLE_METHODS
|
72
|
+
end
|
73
|
+
|
74
|
+
# model
|
75
|
+
class HiveLikeMapperModel < BaseMapperModel
|
76
|
+
attr_reader :table
|
77
|
+
|
78
|
+
def initialize(key, value)
|
79
|
+
super(key, value)
|
80
|
+
end
|
81
|
+
|
82
|
+
# emitters
|
83
|
+
def create_table(name, *column_and_type)
|
84
|
+
@table = Table.new(name)
|
85
|
+
column_and_type.each_with_index do |column, index|
|
86
|
+
next if index % 2 != 0 # type
|
87
|
+
@table.columns << column_and_type[index]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def select(*args)
|
92
|
+
from_index = args.index('from')
|
93
|
+
if from_index
|
94
|
+
values = args[0...from_index].map do |column|
|
95
|
+
splitted = @value.split(/[,\s]+/)
|
96
|
+
splitted[@table.columns.index(column)]
|
97
|
+
end
|
98
|
+
@controller.emit(args[from_index + 1] => values.join(", "))
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class Table
|
103
|
+
attr_reader :name, :columns
|
104
|
+
|
105
|
+
def initialize(name)
|
106
|
+
@name = name
|
107
|
+
@columns = []
|
108
|
+
end
|
109
|
+
|
110
|
+
def column(index) @columns[index] end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
class HiveLikeReducerModel < BaseReducerModel
|
115
|
+
def initialize(key, values)
|
116
|
+
super(key, values)
|
117
|
+
end
|
118
|
+
|
119
|
+
# emitters
|
120
|
+
def select(*args) identity end
|
121
|
+
end
|
122
|
+
end
|
data/lib/init.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'core'
|
2
|
+
require 'java'
|
3
|
+
require 'mapred_factory'
|
4
|
+
|
5
|
+
import 'org.apache.hadoop.io.IntWritable'
|
6
|
+
import 'org.apache.hadoop.io.Text'
|
7
|
+
|
8
|
+
include HadoopDsl
|
9
|
+
|
10
|
+
# Hadoop IO types
|
11
|
+
HadoopDsl::Text = Text
|
12
|
+
HadoopDsl::IntWritable = IntWritable
|
13
|
+
|
14
|
+
def map(key, value, output, reporter, script)
|
15
|
+
mapper = MapperFactory.create(script, key.to_string, value.to_string)
|
16
|
+
mapper.run
|
17
|
+
|
18
|
+
write(output, mapper)
|
19
|
+
end
|
20
|
+
|
21
|
+
def reduce(key, values, output, reporter, script)
|
22
|
+
ruby_values = values.map {|v| to_ruby(v)}
|
23
|
+
reducer = ReducerFactory.create(script, key.to_string, ruby_values)
|
24
|
+
reducer.run
|
25
|
+
|
26
|
+
write(output, reducer)
|
27
|
+
end
|
28
|
+
|
29
|
+
def setup(conf, script)
|
30
|
+
setup = SetupFactory.create(script, conf)
|
31
|
+
setup.run
|
32
|
+
|
33
|
+
setup.paths.to_java
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def write(output, controller)
|
39
|
+
controller.emitted.each do |e|
|
40
|
+
e.each do |k, v|
|
41
|
+
output.collect(to_hadoop(k), to_hadoop(v))
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def to_ruby(value)
|
47
|
+
case value
|
48
|
+
when IntWritable then value.get
|
49
|
+
when Text then value.to_string
|
50
|
+
else raise "no match class: #{value.class}"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def to_hadoop(value)
|
55
|
+
case value
|
56
|
+
when Integer then IntWritable.new(value)
|
57
|
+
when String then t = Text.new; t.set(value); t
|
58
|
+
else raise "no match class: #{value.class}"
|
59
|
+
end
|
60
|
+
end
|
data/lib/java/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
jruby-complete-*.jar
|
Binary file
|
data/lib/log_analysis.rb
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
require 'core'
|
2
|
+
require 'enumerator'
|
3
|
+
|
4
|
+
module HadoopDsl::LogAnalysis
|
5
|
+
include HadoopDsl
|
6
|
+
|
7
|
+
KEY_SEP = "\t"
|
8
|
+
PREFIX = 'col'
|
9
|
+
PASS = nil
|
10
|
+
AVAILABLE_METHODS = [:separate, :pattern, :column_name, :column, :topic, :value, :count_uniq, :sum]
|
11
|
+
|
12
|
+
# common
|
13
|
+
module LogAnalysisMapRed
|
14
|
+
# entry point
|
15
|
+
def data(description = '', &block) yield end
|
16
|
+
|
17
|
+
def each_line(&block) yield end
|
18
|
+
end
|
19
|
+
|
20
|
+
# controller
|
21
|
+
class LogAnalysisSetup < BaseSetup
|
22
|
+
def initialize(script, conf)
|
23
|
+
super(script, conf)
|
24
|
+
end
|
25
|
+
|
26
|
+
include LogAnalysisMapRed
|
27
|
+
end
|
28
|
+
|
29
|
+
class LogAnalysisMapper < BaseMapper
|
30
|
+
def initialize(script, key, value)
|
31
|
+
super(script, LogAnalysisMapperModel.new(key, value))
|
32
|
+
end
|
33
|
+
|
34
|
+
include LogAnalysisMapRed
|
35
|
+
|
36
|
+
# model methods
|
37
|
+
def_delegators :@model, *AVAILABLE_METHODS
|
38
|
+
end
|
39
|
+
|
40
|
+
class LogAnalysisReducer < BaseReducer
|
41
|
+
def initialize(script, key, values)
|
42
|
+
super(script, LogAnalysisReducerModel.new(key, values))
|
43
|
+
end
|
44
|
+
|
45
|
+
include LogAnalysisMapRed
|
46
|
+
|
47
|
+
# model methods
|
48
|
+
def_delegators :@model, *AVAILABLE_METHODS
|
49
|
+
end
|
50
|
+
|
51
|
+
# model
|
52
|
+
class LogAnalysisMapperModel < BaseMapperModel
|
53
|
+
def initialize(key, value)
|
54
|
+
super(key, value)
|
55
|
+
@columns = ColumnArray.new
|
56
|
+
@topics = []
|
57
|
+
end
|
58
|
+
|
59
|
+
def column; @columns end
|
60
|
+
|
61
|
+
def topic(desc, options = {}, &block)
|
62
|
+
@topics << @current_topic = Topic.new(desc, options[:label])
|
63
|
+
yield if block_given?
|
64
|
+
@current_topic
|
65
|
+
end
|
66
|
+
|
67
|
+
def separate(sep)
|
68
|
+
parts = @value.split(sep)
|
69
|
+
create_or_replace_columns_with(parts) {|column, value| column.value = value}
|
70
|
+
end
|
71
|
+
|
72
|
+
def pattern(re)
|
73
|
+
if @value =~ re
|
74
|
+
md = Regexp.last_match
|
75
|
+
create_or_replace_columns_with(md.captures) {|column, value| column.value = value}
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# column names by String converted to Symbol
|
80
|
+
def column_name(*names)
|
81
|
+
sym_names = names.map {|name| name.is_a?(String) ? name.to_sym : name }
|
82
|
+
create_or_replace_columns_with(sym_names) {|column, name| column.name = name}
|
83
|
+
end
|
84
|
+
|
85
|
+
def create_or_replace_columns_with(array, &block)
|
86
|
+
columns = array.enum_for(:each_with_index).map do |p, i|
|
87
|
+
c = @columns[i] ? @columns[i] : Column.new(i)
|
88
|
+
yield c, p
|
89
|
+
c
|
90
|
+
end
|
91
|
+
@columns = ColumnArray.new(columns)
|
92
|
+
end
|
93
|
+
|
94
|
+
# emitters
|
95
|
+
def count_uniq(column)
|
96
|
+
@controller.emit([@current_topic.label, KEY_SEP, column.value].join => 1)
|
97
|
+
end
|
98
|
+
|
99
|
+
def sum(column)
|
100
|
+
@controller.emit([@current_topic.label].join => column.value.to_i)
|
101
|
+
end
|
102
|
+
|
103
|
+
class ColumnArray < Array
|
104
|
+
def [](key)
|
105
|
+
case key
|
106
|
+
when Integer then at(key)
|
107
|
+
when Symbol then (select {|c| c.name == key}).first
|
108
|
+
when String then (select {|c| c.name == key.to_sym}).first
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
class Column
|
114
|
+
attr_reader :index
|
115
|
+
attr_accessor :value, :name
|
116
|
+
|
117
|
+
def initialize(index, value = nil)
|
118
|
+
@index, @value = index, value
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
class Topic
|
123
|
+
def initialize(desc, label = nil)
|
124
|
+
@desc, @label = desc, label
|
125
|
+
end
|
126
|
+
|
127
|
+
def label
|
128
|
+
@label || @desc.gsub(/\s/, '_')
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
class LogAnalysisReducerModel < BaseReducerModel
|
134
|
+
def initialize(key, values)
|
135
|
+
super(key, values)
|
136
|
+
if key =~ /(\w*)#{KEY_SEP}?(.*)/
|
137
|
+
@topic = Topic.new($1, values)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def topic(desc, options = {}, &block)
|
142
|
+
@current_topic = Topic.new(options[:label] || desc.gsub(/\s/, '_'), nil)
|
143
|
+
yield if block_given?
|
144
|
+
@current_topic
|
145
|
+
end
|
146
|
+
|
147
|
+
def count_uniq(column)
|
148
|
+
aggregate if @topic == @current_topic
|
149
|
+
end
|
150
|
+
|
151
|
+
def sum(column)
|
152
|
+
aggregate if @topic == @current_topic
|
153
|
+
end
|
154
|
+
|
155
|
+
class Topic
|
156
|
+
attr_reader :label, :values
|
157
|
+
|
158
|
+
def initialize(label, values)
|
159
|
+
@label, @values = label, values
|
160
|
+
end
|
161
|
+
|
162
|
+
def ==(rh) self.label == rh.label end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'util'
|
2
|
+
|
3
|
+
module HadoopDsl
|
4
|
+
class MapRedFactory
|
5
|
+
def self.dsl_name(script)
|
6
|
+
read_file(script).each_line do |line|
|
7
|
+
dsl_name = $1 if line =~ /^use\s*'(\w*)'/
|
8
|
+
return dsl_name
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.require_dsl_lib(dsl_name)
|
13
|
+
require snake_case(dsl_name)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class MapperFactory < MapRedFactory
|
18
|
+
def self.create(script, key, value)
|
19
|
+
dsl_name = self.dsl_name(script)
|
20
|
+
require_dsl_lib(dsl_name)
|
21
|
+
mapper_class = "HadoopDsl::#{dsl_name}::#{dsl_name}Mapper"
|
22
|
+
return eval(mapper_class).new(script, key, value)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class ReducerFactory < MapRedFactory
|
27
|
+
def self.create(script, key, values)
|
28
|
+
dsl_name = self.dsl_name(script)
|
29
|
+
require_dsl_lib(dsl_name)
|
30
|
+
reducer_class = "HadoopDsl::#{dsl_name}::#{dsl_name}Reducer"
|
31
|
+
return eval(reducer_class).new(script, key, values)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class SetupFactory < MapRedFactory
|
36
|
+
def self.create(script, conf)
|
37
|
+
dsl_name = self.dsl_name(script)
|
38
|
+
require_dsl_lib(dsl_name)
|
39
|
+
setup_class = "HadoopDsl::#{dsl_name}::#{dsl_name}Setup"
|
40
|
+
eval(setup_class).new(script, conf) rescue HadoopDsl::BaseSetup.new(script, conf)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/util.rb
ADDED
data/lib/word_count.rb
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'core'
|
2
|
+
require 'enumerator'
|
3
|
+
|
4
|
+
module HadoopDsl::WordCount
|
5
|
+
include HadoopDsl
|
6
|
+
|
7
|
+
AVAILABLE_METHODS = [:count_uniq, :total]
|
8
|
+
TOTAL_PREFIX = "\t"
|
9
|
+
|
10
|
+
# common
|
11
|
+
module WordCountMapRed
|
12
|
+
# entry point
|
13
|
+
def data(description = '', &block) yield end
|
14
|
+
end
|
15
|
+
|
16
|
+
# controller
|
17
|
+
class WordCountMapper < BaseMapper
|
18
|
+
def initialize(script, key, value)
|
19
|
+
super(script, WordCountMapperModel.new(key, value))
|
20
|
+
end
|
21
|
+
|
22
|
+
include WordCountMapRed
|
23
|
+
|
24
|
+
# model methods
|
25
|
+
def_delegators :@model, *AVAILABLE_METHODS
|
26
|
+
end
|
27
|
+
|
28
|
+
class WordCountReducer < BaseReducer
|
29
|
+
def initialize(script, key, values)
|
30
|
+
super(script, WordCountReducerModel.new(key, values))
|
31
|
+
end
|
32
|
+
|
33
|
+
include WordCountMapRed
|
34
|
+
|
35
|
+
# model methods
|
36
|
+
def_delegators :@model, *AVAILABLE_METHODS
|
37
|
+
end
|
38
|
+
|
39
|
+
# model
|
40
|
+
class WordCountMapperModel < BaseMapperModel
|
41
|
+
def initialize(key, value)
|
42
|
+
super(key, value)
|
43
|
+
end
|
44
|
+
|
45
|
+
# emitters
|
46
|
+
def count_uniq
|
47
|
+
@value.split.each {|word| @controller.emit(word => 1)}
|
48
|
+
end
|
49
|
+
|
50
|
+
def total(*types)
|
51
|
+
types.each do |type|
|
52
|
+
case type
|
53
|
+
when :bytes
|
54
|
+
@controller.emit("#{TOTAL_PREFIX}total bytes" => @value.gsub(/\s/, '').length)
|
55
|
+
when :words
|
56
|
+
@controller.emit("#{TOTAL_PREFIX}total words" => @value.split.size)
|
57
|
+
when :lines
|
58
|
+
@controller.emit("#{TOTAL_PREFIX}total lines" => 1)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class WordCountReducerModel < BaseReducerModel
|
65
|
+
def initialize(key, values)
|
66
|
+
super(key, values)
|
67
|
+
end
|
68
|
+
|
69
|
+
# emitters
|
70
|
+
def count_uniq; aggregate unless total_value? end
|
71
|
+
def total(*types); aggregate if total_value? end
|
72
|
+
|
73
|
+
private
|
74
|
+
def total_value?; @key =~ /^#{TOTAL_PREFIX}/ end
|
75
|
+
end
|
76
|
+
end
|
data/spec/core_spec.rb
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'init'
|
2
|
+
require 'core'
|
3
|
+
|
4
|
+
include HadoopDsl
|
5
|
+
|
6
|
+
describe 'BaseMapRed' do
|
7
|
+
before(:all) do
|
8
|
+
@script = create_tmp_script(<<-EOF)
|
9
|
+
from 'test/inputs'
|
10
|
+
to 'test/outputs'
|
11
|
+
EOF
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'emit key value' do
|
15
|
+
mapper = BaseMapper.new(@script, BaseMapperModel.new(nil, nil))
|
16
|
+
mapper.emit('key' => 'value')
|
17
|
+
mapper.emitted.should == [{'key' => 'value'}]
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'can run BaseMapper in minimum' do
|
21
|
+
model = BaseMapperModel.new('key', 'value')
|
22
|
+
mapper = BaseMapper.new(@script, model)
|
23
|
+
mapper.run
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'can run BaseReducer in minimum' do
|
27
|
+
model = BaseReducerModel.new('key', 'values')
|
28
|
+
reducer = BaseReducer.new(@script, model)
|
29
|
+
reducer.run
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'can run BaseSetup in minimum' do
|
33
|
+
setup = BaseSetup.new(@script, nil)
|
34
|
+
setup.run
|
35
|
+
end
|
36
|
+
|
37
|
+
describe BaseMapper do
|
38
|
+
it 'can emit as identity' do
|
39
|
+
model = BaseMapperModel.new('key', 'value')
|
40
|
+
mapper = BaseMapper.new(@script, model)
|
41
|
+
model.identity
|
42
|
+
|
43
|
+
mapper.emitted.should == [{'key' => 'value'}]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe BaseReducer do
|
48
|
+
it 'can emit as aggregate' do
|
49
|
+
model = BaseReducerModel.new('key', [1, 2, 3])
|
50
|
+
reducer = BaseReducer.new(@script, model)
|
51
|
+
model.aggregate
|
52
|
+
|
53
|
+
reducer.emitted.should == [{'key' => 6}]
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'can emit as identity' do
|
57
|
+
model = BaseReducerModel.new('key', [1, 2, 3])
|
58
|
+
reducer = BaseReducer.new(@script, model)
|
59
|
+
model.identity
|
60
|
+
|
61
|
+
reducer.emitted.should == [{'key' => 1}, {'key' => 2}, {'key' => 3}]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe BaseSetup do
|
66
|
+
it 'can get paths' do
|
67
|
+
setup = BaseSetup.new(@script, nil)
|
68
|
+
setup.run
|
69
|
+
setup.paths[0].should == 'test/inputs'
|
70
|
+
setup.paths[1].should == 'test/outputs'
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'log_analysis'
|
2
|
+
require 'word_count'
|
3
|
+
|
4
|
+
include HadoopDsl::LogAnalysis
|
5
|
+
describe 'Aapach Log Example' do
|
6
|
+
before(:all) do
|
7
|
+
@script = File.join(File.dirname(__FILE__), '..', 'examples', 'apachelog-v2.rb')
|
8
|
+
@value = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326'
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'can run example by mapper' do
|
12
|
+
mapper = LogAnalysisMapper.new(@script, nil, @value)
|
13
|
+
mapper.run
|
14
|
+
mapper.emitted.first["user\tfrank"].should == 1
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'can run example by reducer' do
|
18
|
+
reducer = LogAnalysisReducer.new(@script, "user\tfrank", [1, 1, 1])
|
19
|
+
reducer.run
|
20
|
+
reducer.emitted.first["user\tfrank"].should == 3
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
include HadoopDsl::WordCount
|
25
|
+
describe 'Word Count Example' do
|
26
|
+
before(:all) do
|
27
|
+
@script = File.join(File.dirname(__FILE__), '..', 'examples', 'word_count_test.rb')
|
28
|
+
@value = 'Lorem ipsum ipsum Lorem sit amet,'
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'can run example by mapper' do
|
32
|
+
mapper = WordCountMapper.new(@script, nil, @value)
|
33
|
+
mapper.run
|
34
|
+
mapper.emitted.size.should == 9
|
35
|
+
mapper.emitted.each do |e|
|
36
|
+
case e.keys.first
|
37
|
+
when 'Lorem'
|
38
|
+
e.values.first.should == 1
|
39
|
+
when 'total words'
|
40
|
+
e.values.first.should == 6
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'can run example by reducer' do
|
46
|
+
reducer = WordCountReducer.new(@script, "Lorem", [1, 1, 1])
|
47
|
+
reducer.run
|
48
|
+
reducer.emitted.first["Lorem"].should == 3
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
include HadoopDsl::HiveLike
|
53
|
+
describe 'Hive Like Example' do
|
54
|
+
before(:all) do
|
55
|
+
@script = File.join(File.dirname(__FILE__), '..', 'examples', 'hive_like_test.rb')
|
56
|
+
@value = 'apple, 3, 100'
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'can run setup' do
|
60
|
+
conf = mock('conf')
|
61
|
+
conf.should_receive(:output_key_class=).once
|
62
|
+
conf.should_receive(:output_value_class=).once
|
63
|
+
|
64
|
+
setup = HiveLikeSetup.new(@script, conf)
|
65
|
+
setup.run
|
66
|
+
setup.paths[0].should == 'hive-like/items.txt'
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'can run example by mapper' do
|
70
|
+
mapper = HiveLikeMapper.new(@script, nil, @value)
|
71
|
+
mapper.run
|
72
|
+
mapper.emitted.size.should == 1
|
73
|
+
mapper.emitted.first['items'].should == '3, 100, apple'
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'can run example by reducer' do
|
77
|
+
values = ['v1', 'v2', 'v3']
|
78
|
+
reducer = HiveLikeReducer.new(@script, "items", values)
|
79
|
+
reducer.run
|
80
|
+
reducer.emitted.first["items"].should == 'v1'
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'init'
|
2
|
+
require 'core'
|
3
|
+
require 'hive_like'
|
4
|
+
|
5
|
+
include HadoopDsl::HiveLike
|
6
|
+
|
7
|
+
describe HiveLikeSetup do
|
8
|
+
it 'should load data' do
|
9
|
+
script = create_tmp_script(%Q!load_data "hive-like/inputs", items;!)
|
10
|
+
conf = mock('conf')
|
11
|
+
conf.should_receive(:output_key_class=).once
|
12
|
+
conf.should_receive(:output_value_class=).once
|
13
|
+
|
14
|
+
setup = HiveLikeSetup.new(script, conf)
|
15
|
+
setup.run
|
16
|
+
setup.paths[0].should == 'hive-like/inputs'
|
17
|
+
setup.paths[1].should == 'hive-like/outputs'
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
describe HiveLikeMapper do
|
22
|
+
before do
|
23
|
+
@value = 'apple, 3, 100'
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should create table' do
|
27
|
+
mapper = HiveLikeMapper.new(nil, nil, @value)
|
28
|
+
mapper.create_table('items', 'item', 'STRING', 'quantity', 'INT', 'price', 'INT');
|
29
|
+
mapper.table.name.should == 'items'
|
30
|
+
mapper.table.column(0).should == 'item'
|
31
|
+
mapper.table.column(1).should == 'quantity'
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should select' do
|
35
|
+
mapper = HiveLikeMapper.new(nil, nil, @value)
|
36
|
+
mapper.create_table('items', 'item', 'STRING', 'quantity', 'INT', 'price', 'INT');
|
37
|
+
mapper.select("item", "quantity", "price", "from", "items")
|
38
|
+
mapper.emitted.first.should == {'items' => 'apple, 3, 100'}
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'should pre process script body' do
|
42
|
+
body = "select foo, bar from table;\n"
|
43
|
+
mapper = HiveLikeMapper.new(nil, nil, @value)
|
44
|
+
processed = mapper.pre_process(body)
|
45
|
+
processed.should == %Q!select("foo", "bar", "from", "table")\n!
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe HiveLikeReducer do
|
50
|
+
it 'should select as identity' do
|
51
|
+
key = 'Lorem'
|
52
|
+
values = [1, 1, 1]
|
53
|
+
reducer = HiveLikeReducer.new(nil, key, values)
|
54
|
+
|
55
|
+
reducer.select
|
56
|
+
reducer.emitted[0].should == {'Lorem' => 1}
|
57
|
+
end
|
58
|
+
end
|