hadoop-rubydsl 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +53 -0
- data/Rakefile +18 -0
- data/TODO +2 -0
- data/VERSION +1 -0
- data/bin/hadoop +276 -0
- data/bin/hadoop-ruby.sh +30 -0
- data/conf/hadoop-site.xml +19 -0
- data/examples/apachelog-v2-2.rb +18 -0
- data/examples/apachelog-v2.rb +25 -0
- data/examples/apachelog.rb +15 -0
- data/examples/hive_like_test.rb +14 -0
- data/examples/word_count_test.rb +7 -0
- data/hadoop-rubydsl.gemspec +79 -0
- data/lib/core.rb +108 -0
- data/lib/hive_like.rb +122 -0
- data/lib/init.rb +60 -0
- data/lib/java/.gitignore +1 -0
- data/lib/java/hadoop-ruby.jar +0 -0
- data/lib/log_analysis.rb +165 -0
- data/lib/mapred_factory.rb +43 -0
- data/lib/util.rb +11 -0
- data/lib/word_count.rb +76 -0
- data/spec/core_spec.rb +73 -0
- data/spec/example_spec.rb +82 -0
- data/spec/hive_like_spec.rb +58 -0
- data/spec/init_spec.rb +56 -0
- data/spec/log_analysis_spec.rb +119 -0
- data/spec/mapred_factory_spec.rb +42 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/util_spec.rb +15 -0
- data/spec/word_count_spec.rb +89 -0
- metadata +100 -0
data/spec/init_spec.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'java'
|
2
|
+
require 'init'
|
3
|
+
|
4
|
+
import 'org.apache.hadoop.io.IntWritable'
|
5
|
+
import 'org.apache.hadoop.io.Text'
|
6
|
+
import 'org.apache.hadoop.mapred.JobConf'
|
7
|
+
|
8
|
+
describe 'mapreduce init' do
|
9
|
+
|
10
|
+
before(:all) do
|
11
|
+
@script = create_tmp_script(<<-EOF)
|
12
|
+
use 'LogAnalysis'
|
13
|
+
data 'test' do
|
14
|
+
from 'test/inputs'
|
15
|
+
to 'test/outputs'
|
16
|
+
|
17
|
+
separate(" ")
|
18
|
+
column_name 'c0', 'c1', 'c2', 'c3'
|
19
|
+
topic 't1' do
|
20
|
+
count_uniq columns(:c1)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
EOF
|
24
|
+
end
|
25
|
+
|
26
|
+
before do
|
27
|
+
@one = IntWritable.new(1)
|
28
|
+
@output = mock('output')
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'can map sucessfully' do
|
32
|
+
key, value = Text.new, Text.new
|
33
|
+
key.set("key")
|
34
|
+
value.set('it should be fine')
|
35
|
+
@output.should_receive(:collect).once #.with(@text, @one)
|
36
|
+
|
37
|
+
map(key, value, @output, nil, @script)
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'can reduce sucessfully' do
|
41
|
+
key, value = Text.new, Text.new
|
42
|
+
key.set("t1\tkey")
|
43
|
+
values = [@one, @one, @one]
|
44
|
+
@output.should_receive(:collect).once #.with(@text, @one)
|
45
|
+
|
46
|
+
reduce(key, values, @output, nil, @script)
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'can set job conf' do
|
50
|
+
conf = JobConf.new
|
51
|
+
paths = setup(conf, @script)
|
52
|
+
|
53
|
+
paths[0].should == 'test/inputs'
|
54
|
+
paths[1].should == 'test/outputs'
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'init'
|
2
|
+
require 'core'
|
3
|
+
require 'log_analysis'
|
4
|
+
|
5
|
+
include HadoopDsl::LogAnalysis
|
6
|
+
|
7
|
+
describe LogAnalysisMapper do
|
8
|
+
before do
|
9
|
+
@apache_log = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326'
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should separate data by space' do
|
13
|
+
value = 'Lorem ipsum dolor sit amet,'
|
14
|
+
mapper = LogAnalysisMapper.new(nil, nil, value)
|
15
|
+
mapper.separate(' ')
|
16
|
+
|
17
|
+
mapper.column[1].value.should == 'ipsum'
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should separate by pattern' do
|
21
|
+
mapper = LogAnalysisMapper.new(nil, nil, @apache_log)
|
22
|
+
mapper.pattern /(.*) (.*) (.*) \[(.*)\] (".*") (\d*) (\d*)/
|
23
|
+
|
24
|
+
mapper.column[2].value.should == 'frank'
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should label column name by string' do
|
28
|
+
mapper = LogAnalysisMapper.new(nil, nil, @apache_log)
|
29
|
+
mapper.pattern /(.*) (.*) (.*) \[(.*)\] (".*") (\d*) (\d*)/
|
30
|
+
mapper.column_name 'remote_host', PASS, 'user', 'access_date', 'request', 'status', 'bytes'
|
31
|
+
|
32
|
+
mapper.column['user'].value.should == 'frank'
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should label column name by symbol' do
|
36
|
+
mapper = LogAnalysisMapper.new(nil, nil, @apache_log)
|
37
|
+
mapper.pattern /(.*) (.*) (.*) \[(.*)\] (".*") (\d*) (\d*)/
|
38
|
+
mapper.column_name :remote_host, PASS, :user, :access_date, :request, :status, :bytes
|
39
|
+
|
40
|
+
mapper.column[:user].value.should == 'frank'
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'should count uniq column' do
|
44
|
+
value = 'count uniq'
|
45
|
+
mapper = LogAnalysisMapper.new(nil, nil, value)
|
46
|
+
mapper.separate(' ')
|
47
|
+
mapper.topic('t1') { mapper.count_uniq mapper.column[1] }
|
48
|
+
|
49
|
+
mapper.emitted.first["t1\tuniq"].should == 1
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'should sum column value' do
|
53
|
+
value = 'sum 123'
|
54
|
+
mapper = LogAnalysisMapper.new(nil, nil, value)
|
55
|
+
mapper.separate(' ')
|
56
|
+
mapper.topic('t1') { mapper.sum mapper.column[1] }
|
57
|
+
|
58
|
+
mapper.emitted.first["t1"].should == 123
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'has topic which returns label' do
|
62
|
+
value = 'Lorem ipsum dolor sit amet,'
|
63
|
+
mapper = LogAnalysisMapper.new(nil, nil, value)
|
64
|
+
mapper.separate(' ')
|
65
|
+
|
66
|
+
topic = mapper.topic('desc', :label => 'label')
|
67
|
+
topic.label.should == 'label'
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'has topic which returns label as desc' do
|
71
|
+
value = 'Lorem ipsum dolor sit amet,'
|
72
|
+
mapper = LogAnalysisMapper.new(nil, nil, value)
|
73
|
+
mapper.separate(' ')
|
74
|
+
|
75
|
+
topic = mapper.topic('desc')
|
76
|
+
topic.label.should == 'desc'
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'has topic which returns label as desc with space' do
|
80
|
+
value = 'Lorem ipsum dolor sit amet,'
|
81
|
+
mapper = LogAnalysisMapper.new(nil, nil, value)
|
82
|
+
mapper.separate(' ')
|
83
|
+
|
84
|
+
topic = mapper.topic('desc with space')
|
85
|
+
topic.label.should == 'desc_with_space'
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe LogAnalysisReducer do
|
90
|
+
it 'should count uniq in the topic' do
|
91
|
+
key = "t1\tuniq"
|
92
|
+
values = [1, 1, 1]
|
93
|
+
reducer = LogAnalysisReducer.new(nil, key, values)
|
94
|
+
reducer.separate(' ')
|
95
|
+
reducer.topic('t1') { reducer.count_uniq(nil) }
|
96
|
+
|
97
|
+
reducer.emitted.first["t1\tuniq"].should == 3
|
98
|
+
end
|
99
|
+
|
100
|
+
it 'should not count uniq of other topic' do
|
101
|
+
key = "t2\tuniq"
|
102
|
+
values = [1, 1, 1]
|
103
|
+
reducer = LogAnalysisReducer.new(nil, key, values)
|
104
|
+
reducer.separate(' ')
|
105
|
+
reducer.topic('t1') { reducer.count_uniq(nil) }
|
106
|
+
|
107
|
+
reducer.emitted.first.should be_nil
|
108
|
+
end
|
109
|
+
|
110
|
+
it 'should sum column value' do
|
111
|
+
key = "t1"
|
112
|
+
values = [123, 456, 789]
|
113
|
+
reducer = LogAnalysisReducer.new(nil, key, values)
|
114
|
+
reducer.separate(' ')
|
115
|
+
reducer.topic('t1') { reducer.sum(nil) }
|
116
|
+
|
117
|
+
reducer.emitted.first["t1"].should == 123+456+789
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__) , 'spec_helper')
|
2
|
+
|
3
|
+
require 'mapred_factory'
|
4
|
+
require 'log_analysis'
|
5
|
+
|
6
|
+
describe 'MapRed Factory' do
|
7
|
+
|
8
|
+
before(:all) do
|
9
|
+
@script = create_tmp_script("use 'LogAnalysis'")
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'can create mapper' do
|
13
|
+
mapper = MapperFactory.create(@script, nil, nil)
|
14
|
+
mapper.class.should == LogAnalysisMapper
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'can create reducer' do
|
18
|
+
reducer = ReducerFactory.create(@script, nil, nil)
|
19
|
+
reducer.class.should == LogAnalysisReducer
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'can create setup' do
|
23
|
+
s = SetupFactory.create(@script, nil)
|
24
|
+
s.class.should == LogAnalysisSetup
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'can create base if not exists in specific DSL' do
|
28
|
+
s = SetupFactory.create(create_tmp_script("use 'WordCount'"), nil)
|
29
|
+
s.class.should == BaseSetup
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'specify dsl name from script' do
|
33
|
+
dsl_name = MapRedFactory.dsl_name(@script)
|
34
|
+
dsl_name.should == 'LogAnalysis'
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'can convert dsl name to dsl lib file and require' do
|
38
|
+
dsl_name = MapRedFactory.dsl_name(@script)
|
39
|
+
MapRedFactory.require_dsl_lib(dsl_name).should_not be_nil
|
40
|
+
LogAnalysisMapper
|
41
|
+
end
|
42
|
+
end
|
data/spec/spec_helper.rb
ADDED
data/spec/util_spec.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__) , 'spec_helper')
|
2
|
+
|
3
|
+
require 'util'
|
4
|
+
|
5
|
+
describe 'utilities' do
|
6
|
+
it 'can change camelcase str to snakecase' do
|
7
|
+
snake_case('CamelCaseStr').should == 'camel_case_str'
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'can read file and get file data to string' do
|
11
|
+
script_body = 'This is a script body.'
|
12
|
+
@script = create_tmp_script(script_body)
|
13
|
+
read_file(@script).should == script_body
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'init'
|
2
|
+
require 'core'
|
3
|
+
require 'word_count'
|
4
|
+
|
5
|
+
include HadoopDsl::WordCount
|
6
|
+
|
7
|
+
describe WordCountMapper do
|
8
|
+
it 'should count uniq' do
|
9
|
+
value = 'Lorem ipsum Lorem sit amet,'
|
10
|
+
mapper = WordCountMapper.new(nil, nil, value)
|
11
|
+
|
12
|
+
mapper.count_uniq
|
13
|
+
mapper.emitted[0].should == {'Lorem' => 1}
|
14
|
+
mapper.emitted[1].should == {'ipsum' => 1}
|
15
|
+
mapper.emitted[2].should == {'Lorem' => 1}
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should count total bytes' do
|
19
|
+
value = 'Lorem ipsum Lorem sit amet,'
|
20
|
+
mapper = WordCountMapper.new(nil, nil, value)
|
21
|
+
|
22
|
+
mapper.total :bytes
|
23
|
+
mapper.emitted[0].should == {"#{TOTAL_PREFIX}total bytes" => 23}
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should count total words' do
|
27
|
+
value = 'Lorem ipsum Lorem sit amet,'
|
28
|
+
mapper = WordCountMapper.new(nil, nil, value)
|
29
|
+
|
30
|
+
mapper.total :words
|
31
|
+
mapper.emitted[0].should == {"#{TOTAL_PREFIX}total words" => 5}
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should count total lines' do
|
35
|
+
value = 'Lorem ipsum Lorem sit amet,'
|
36
|
+
mapper = WordCountMapper.new(nil, nil, value)
|
37
|
+
|
38
|
+
mapper.total :lines
|
39
|
+
mapper.emitted[0].should == {"#{TOTAL_PREFIX}total lines" => 1}
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'should count total bytes, words, lines' do
|
43
|
+
value = 'Lorem ipsum Lorem sit amet,'
|
44
|
+
mapper = WordCountMapper.new(nil, nil, value)
|
45
|
+
|
46
|
+
mapper.total :bytes, :words, :lines
|
47
|
+
mapper.emitted[0].should == {"#{TOTAL_PREFIX}total bytes" => 23}
|
48
|
+
mapper.emitted[1].should == {"#{TOTAL_PREFIX}total words" => 5}
|
49
|
+
mapper.emitted[2].should == {"#{TOTAL_PREFIX}total lines" => 1}
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe WordCountReducer do
|
54
|
+
it 'should count uniq' do
|
55
|
+
key = 'Lorem'
|
56
|
+
values = [1, 1, 1]
|
57
|
+
reducer = WordCountReducer.new(nil, key, values)
|
58
|
+
|
59
|
+
reducer.count_uniq
|
60
|
+
reducer.emitted[0].should == {'Lorem' => 3}
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'should count total bytes' do
|
64
|
+
key = "#{TOTAL_PREFIX}total bytes"
|
65
|
+
values = [12, 23, 45]
|
66
|
+
reducer = WordCountReducer.new(nil, key, values)
|
67
|
+
|
68
|
+
reducer.total :bytes
|
69
|
+
reducer.emitted[0].should == {"#{TOTAL_PREFIX}total bytes" => 12 + 23 + 45}
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'should count total words' do
|
73
|
+
key = "#{TOTAL_PREFIX}total words"
|
74
|
+
values = [3, 4, 5]
|
75
|
+
reducer = WordCountReducer.new(nil, key, values)
|
76
|
+
|
77
|
+
reducer.total :words
|
78
|
+
reducer.emitted[0].should == {"#{TOTAL_PREFIX}total words" => 3 + 4 + 5}
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'should count total lines' do
|
82
|
+
key = "#{TOTAL_PREFIX}total lines"
|
83
|
+
values = [1, 2, 3]
|
84
|
+
reducer = WordCountReducer.new(nil, key, values)
|
85
|
+
|
86
|
+
reducer.total :lines
|
87
|
+
reducer.emitted[0].should == {"#{TOTAL_PREFIX}total lines" => 6}
|
88
|
+
end
|
89
|
+
end
|
metadata
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hadoop-rubydsl
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Koichi Fujikawa
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-12-26 00:00:00 +09:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: jruby-on-hadoop
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
description: Hadoop Ruby DSL
|
26
|
+
email: fujibee@gmail.com
|
27
|
+
executables:
|
28
|
+
- hadoop
|
29
|
+
- hadoop-ruby.sh
|
30
|
+
extensions: []
|
31
|
+
|
32
|
+
extra_rdoc_files:
|
33
|
+
- README
|
34
|
+
- TODO
|
35
|
+
files:
|
36
|
+
- README
|
37
|
+
- Rakefile
|
38
|
+
- TODO
|
39
|
+
- VERSION
|
40
|
+
- bin/hadoop
|
41
|
+
- bin/hadoop-ruby.sh
|
42
|
+
- conf/hadoop-site.xml
|
43
|
+
- examples/apachelog-v2-2.rb
|
44
|
+
- examples/apachelog-v2.rb
|
45
|
+
- examples/apachelog.rb
|
46
|
+
- examples/hive_like_test.rb
|
47
|
+
- examples/word_count_test.rb
|
48
|
+
- hadoop-rubydsl.gemspec
|
49
|
+
- lib/core.rb
|
50
|
+
- lib/hive_like.rb
|
51
|
+
- lib/init.rb
|
52
|
+
- lib/java/.gitignore
|
53
|
+
- lib/java/hadoop-ruby.jar
|
54
|
+
- lib/log_analysis.rb
|
55
|
+
- lib/mapred_factory.rb
|
56
|
+
- lib/util.rb
|
57
|
+
- lib/word_count.rb
|
58
|
+
has_rdoc: true
|
59
|
+
homepage: http://github.com/fujibee/hadoop-rubydsl
|
60
|
+
licenses: []
|
61
|
+
|
62
|
+
post_install_message:
|
63
|
+
rdoc_options:
|
64
|
+
- --charset=UTF-8
|
65
|
+
require_paths:
|
66
|
+
- lib
|
67
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: "0"
|
72
|
+
version:
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: "0"
|
78
|
+
version:
|
79
|
+
requirements: []
|
80
|
+
|
81
|
+
rubyforge_project:
|
82
|
+
rubygems_version: 1.3.5
|
83
|
+
signing_key:
|
84
|
+
specification_version: 3
|
85
|
+
summary: Hadoop Ruby DSL
|
86
|
+
test_files:
|
87
|
+
- spec/spec_helper.rb
|
88
|
+
- spec/core_spec.rb
|
89
|
+
- spec/util_spec.rb
|
90
|
+
- spec/mapred_factory_spec.rb
|
91
|
+
- spec/word_count_spec.rb
|
92
|
+
- spec/hive_like_spec.rb
|
93
|
+
- spec/log_analysis_spec.rb
|
94
|
+
- spec/example_spec.rb
|
95
|
+
- spec/init_spec.rb
|
96
|
+
- examples/apachelog-v2.rb
|
97
|
+
- examples/hive_like_test.rb
|
98
|
+
- examples/word_count_test.rb
|
99
|
+
- examples/apachelog-v2-2.rb
|
100
|
+
- examples/apachelog.rb
|