hadoop-rubydsl 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +53 -0
- data/Rakefile +18 -0
- data/TODO +2 -0
- data/VERSION +1 -0
- data/bin/hadoop +276 -0
- data/bin/hadoop-ruby.sh +30 -0
- data/conf/hadoop-site.xml +19 -0
- data/examples/apachelog-v2-2.rb +18 -0
- data/examples/apachelog-v2.rb +25 -0
- data/examples/apachelog.rb +15 -0
- data/examples/hive_like_test.rb +14 -0
- data/examples/word_count_test.rb +7 -0
- data/hadoop-rubydsl.gemspec +79 -0
- data/lib/core.rb +108 -0
- data/lib/hive_like.rb +122 -0
- data/lib/init.rb +60 -0
- data/lib/java/.gitignore +1 -0
- data/lib/java/hadoop-ruby.jar +0 -0
- data/lib/log_analysis.rb +165 -0
- data/lib/mapred_factory.rb +43 -0
- data/lib/util.rb +11 -0
- data/lib/word_count.rb +76 -0
- data/spec/core_spec.rb +73 -0
- data/spec/example_spec.rb +82 -0
- data/spec/hive_like_spec.rb +58 -0
- data/spec/init_spec.rb +56 -0
- data/spec/log_analysis_spec.rb +119 -0
- data/spec/mapred_factory_spec.rb +42 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/util_spec.rb +15 -0
- data/spec/word_count_spec.rb +89 -0
- metadata +100 -0
data/spec/init_spec.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'java'
|
2
|
+
require 'init'
|
3
|
+
|
4
|
+
import 'org.apache.hadoop.io.IntWritable'
|
5
|
+
import 'org.apache.hadoop.io.Text'
|
6
|
+
import 'org.apache.hadoop.mapred.JobConf'
|
7
|
+
|
8
|
+
describe 'mapreduce init' do
|
9
|
+
|
10
|
+
before(:all) do
|
11
|
+
@script = create_tmp_script(<<-EOF)
|
12
|
+
use 'LogAnalysis'
|
13
|
+
data 'test' do
|
14
|
+
from 'test/inputs'
|
15
|
+
to 'test/outputs'
|
16
|
+
|
17
|
+
separate(" ")
|
18
|
+
column_name 'c0', 'c1', 'c2', 'c3'
|
19
|
+
topic 't1' do
|
20
|
+
count_uniq columns(:c1)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
EOF
|
24
|
+
end
|
25
|
+
|
26
|
+
before do
|
27
|
+
@one = IntWritable.new(1)
|
28
|
+
@output = mock('output')
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'can map sucessfully' do
|
32
|
+
key, value = Text.new, Text.new
|
33
|
+
key.set("key")
|
34
|
+
value.set('it should be fine')
|
35
|
+
@output.should_receive(:collect).once #.with(@text, @one)
|
36
|
+
|
37
|
+
map(key, value, @output, nil, @script)
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'can reduce sucessfully' do
|
41
|
+
key, value = Text.new, Text.new
|
42
|
+
key.set("t1\tkey")
|
43
|
+
values = [@one, @one, @one]
|
44
|
+
@output.should_receive(:collect).once #.with(@text, @one)
|
45
|
+
|
46
|
+
reduce(key, values, @output, nil, @script)
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'can set job conf' do
|
50
|
+
conf = JobConf.new
|
51
|
+
paths = setup(conf, @script)
|
52
|
+
|
53
|
+
paths[0].should == 'test/inputs'
|
54
|
+
paths[1].should == 'test/outputs'
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'init'
|
2
|
+
require 'core'
|
3
|
+
require 'log_analysis'
|
4
|
+
|
5
|
+
include HadoopDsl::LogAnalysis
|
6
|
+
|
7
|
+
describe LogAnalysisMapper do
|
8
|
+
before do
|
9
|
+
@apache_log = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326'
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should separate data by space' do
|
13
|
+
value = 'Lorem ipsum dolor sit amet,'
|
14
|
+
mapper = LogAnalysisMapper.new(nil, nil, value)
|
15
|
+
mapper.separate(' ')
|
16
|
+
|
17
|
+
mapper.column[1].value.should == 'ipsum'
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should separate by pattern' do
|
21
|
+
mapper = LogAnalysisMapper.new(nil, nil, @apache_log)
|
22
|
+
mapper.pattern /(.*) (.*) (.*) \[(.*)\] (".*") (\d*) (\d*)/
|
23
|
+
|
24
|
+
mapper.column[2].value.should == 'frank'
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should label column name by string' do
|
28
|
+
mapper = LogAnalysisMapper.new(nil, nil, @apache_log)
|
29
|
+
mapper.pattern /(.*) (.*) (.*) \[(.*)\] (".*") (\d*) (\d*)/
|
30
|
+
mapper.column_name 'remote_host', PASS, 'user', 'access_date', 'request', 'status', 'bytes'
|
31
|
+
|
32
|
+
mapper.column['user'].value.should == 'frank'
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should label column name by symbol' do
|
36
|
+
mapper = LogAnalysisMapper.new(nil, nil, @apache_log)
|
37
|
+
mapper.pattern /(.*) (.*) (.*) \[(.*)\] (".*") (\d*) (\d*)/
|
38
|
+
mapper.column_name :remote_host, PASS, :user, :access_date, :request, :status, :bytes
|
39
|
+
|
40
|
+
mapper.column[:user].value.should == 'frank'
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'should count uniq column' do
|
44
|
+
value = 'count uniq'
|
45
|
+
mapper = LogAnalysisMapper.new(nil, nil, value)
|
46
|
+
mapper.separate(' ')
|
47
|
+
mapper.topic('t1') { mapper.count_uniq mapper.column[1] }
|
48
|
+
|
49
|
+
mapper.emitted.first["t1\tuniq"].should == 1
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'should sum column value' do
|
53
|
+
value = 'sum 123'
|
54
|
+
mapper = LogAnalysisMapper.new(nil, nil, value)
|
55
|
+
mapper.separate(' ')
|
56
|
+
mapper.topic('t1') { mapper.sum mapper.column[1] }
|
57
|
+
|
58
|
+
mapper.emitted.first["t1"].should == 123
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'has topic which returns label' do
|
62
|
+
value = 'Lorem ipsum dolor sit amet,'
|
63
|
+
mapper = LogAnalysisMapper.new(nil, nil, value)
|
64
|
+
mapper.separate(' ')
|
65
|
+
|
66
|
+
topic = mapper.topic('desc', :label => 'label')
|
67
|
+
topic.label.should == 'label'
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'has topic which returns label as desc' do
|
71
|
+
value = 'Lorem ipsum dolor sit amet,'
|
72
|
+
mapper = LogAnalysisMapper.new(nil, nil, value)
|
73
|
+
mapper.separate(' ')
|
74
|
+
|
75
|
+
topic = mapper.topic('desc')
|
76
|
+
topic.label.should == 'desc'
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'has topic which returns label as desc with space' do
|
80
|
+
value = 'Lorem ipsum dolor sit amet,'
|
81
|
+
mapper = LogAnalysisMapper.new(nil, nil, value)
|
82
|
+
mapper.separate(' ')
|
83
|
+
|
84
|
+
topic = mapper.topic('desc with space')
|
85
|
+
topic.label.should == 'desc_with_space'
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe LogAnalysisReducer do
|
90
|
+
it 'should count uniq in the topic' do
|
91
|
+
key = "t1\tuniq"
|
92
|
+
values = [1, 1, 1]
|
93
|
+
reducer = LogAnalysisReducer.new(nil, key, values)
|
94
|
+
reducer.separate(' ')
|
95
|
+
reducer.topic('t1') { reducer.count_uniq(nil) }
|
96
|
+
|
97
|
+
reducer.emitted.first["t1\tuniq"].should == 3
|
98
|
+
end
|
99
|
+
|
100
|
+
it 'should not count uniq of other topic' do
|
101
|
+
key = "t2\tuniq"
|
102
|
+
values = [1, 1, 1]
|
103
|
+
reducer = LogAnalysisReducer.new(nil, key, values)
|
104
|
+
reducer.separate(' ')
|
105
|
+
reducer.topic('t1') { reducer.count_uniq(nil) }
|
106
|
+
|
107
|
+
reducer.emitted.first.should be_nil
|
108
|
+
end
|
109
|
+
|
110
|
+
it 'should sum column value' do
|
111
|
+
key = "t1"
|
112
|
+
values = [123, 456, 789]
|
113
|
+
reducer = LogAnalysisReducer.new(nil, key, values)
|
114
|
+
reducer.separate(' ')
|
115
|
+
reducer.topic('t1') { reducer.sum(nil) }
|
116
|
+
|
117
|
+
reducer.emitted.first["t1"].should == 123+456+789
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__) , 'spec_helper')
|
2
|
+
|
3
|
+
require 'mapred_factory'
|
4
|
+
require 'log_analysis'
|
5
|
+
|
6
|
+
describe 'MapRed Factory' do
|
7
|
+
|
8
|
+
before(:all) do
|
9
|
+
@script = create_tmp_script("use 'LogAnalysis'")
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'can create mapper' do
|
13
|
+
mapper = MapperFactory.create(@script, nil, nil)
|
14
|
+
mapper.class.should == LogAnalysisMapper
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'can create reducer' do
|
18
|
+
reducer = ReducerFactory.create(@script, nil, nil)
|
19
|
+
reducer.class.should == LogAnalysisReducer
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'can create setup' do
|
23
|
+
s = SetupFactory.create(@script, nil)
|
24
|
+
s.class.should == LogAnalysisSetup
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'can create base if not exists in specific DSL' do
|
28
|
+
s = SetupFactory.create(create_tmp_script("use 'WordCount'"), nil)
|
29
|
+
s.class.should == BaseSetup
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'specify dsl name from script' do
|
33
|
+
dsl_name = MapRedFactory.dsl_name(@script)
|
34
|
+
dsl_name.should == 'LogAnalysis'
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'can convert dsl name to dsl lib file and require' do
|
38
|
+
dsl_name = MapRedFactory.dsl_name(@script)
|
39
|
+
MapRedFactory.require_dsl_lib(dsl_name).should_not be_nil
|
40
|
+
LogAnalysisMapper
|
41
|
+
end
|
42
|
+
end
|
data/spec/spec_helper.rb
ADDED
data/spec/util_spec.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__) , 'spec_helper')
|
2
|
+
|
3
|
+
require 'util'
|
4
|
+
|
5
|
+
describe 'utilities' do
|
6
|
+
it 'can change camelcase str to snakecase' do
|
7
|
+
snake_case('CamelCaseStr').should == 'camel_case_str'
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'can read file and get file data to string' do
|
11
|
+
script_body = 'This is a script body.'
|
12
|
+
@script = create_tmp_script(script_body)
|
13
|
+
read_file(@script).should == script_body
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'init'
|
2
|
+
require 'core'
|
3
|
+
require 'word_count'
|
4
|
+
|
5
|
+
include HadoopDsl::WordCount
|
6
|
+
|
7
|
+
describe WordCountMapper do
|
8
|
+
it 'should count uniq' do
|
9
|
+
value = 'Lorem ipsum Lorem sit amet,'
|
10
|
+
mapper = WordCountMapper.new(nil, nil, value)
|
11
|
+
|
12
|
+
mapper.count_uniq
|
13
|
+
mapper.emitted[0].should == {'Lorem' => 1}
|
14
|
+
mapper.emitted[1].should == {'ipsum' => 1}
|
15
|
+
mapper.emitted[2].should == {'Lorem' => 1}
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should count total bytes' do
|
19
|
+
value = 'Lorem ipsum Lorem sit amet,'
|
20
|
+
mapper = WordCountMapper.new(nil, nil, value)
|
21
|
+
|
22
|
+
mapper.total :bytes
|
23
|
+
mapper.emitted[0].should == {"#{TOTAL_PREFIX}total bytes" => 23}
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should count total words' do
|
27
|
+
value = 'Lorem ipsum Lorem sit amet,'
|
28
|
+
mapper = WordCountMapper.new(nil, nil, value)
|
29
|
+
|
30
|
+
mapper.total :words
|
31
|
+
mapper.emitted[0].should == {"#{TOTAL_PREFIX}total words" => 5}
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should count total lines' do
|
35
|
+
value = 'Lorem ipsum Lorem sit amet,'
|
36
|
+
mapper = WordCountMapper.new(nil, nil, value)
|
37
|
+
|
38
|
+
mapper.total :lines
|
39
|
+
mapper.emitted[0].should == {"#{TOTAL_PREFIX}total lines" => 1}
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'should count total bytes, words, lines' do
|
43
|
+
value = 'Lorem ipsum Lorem sit amet,'
|
44
|
+
mapper = WordCountMapper.new(nil, nil, value)
|
45
|
+
|
46
|
+
mapper.total :bytes, :words, :lines
|
47
|
+
mapper.emitted[0].should == {"#{TOTAL_PREFIX}total bytes" => 23}
|
48
|
+
mapper.emitted[1].should == {"#{TOTAL_PREFIX}total words" => 5}
|
49
|
+
mapper.emitted[2].should == {"#{TOTAL_PREFIX}total lines" => 1}
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe WordCountReducer do
|
54
|
+
it 'should count uniq' do
|
55
|
+
key = 'Lorem'
|
56
|
+
values = [1, 1, 1]
|
57
|
+
reducer = WordCountReducer.new(nil, key, values)
|
58
|
+
|
59
|
+
reducer.count_uniq
|
60
|
+
reducer.emitted[0].should == {'Lorem' => 3}
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'should count total bytes' do
|
64
|
+
key = "#{TOTAL_PREFIX}total bytes"
|
65
|
+
values = [12, 23, 45]
|
66
|
+
reducer = WordCountReducer.new(nil, key, values)
|
67
|
+
|
68
|
+
reducer.total :bytes
|
69
|
+
reducer.emitted[0].should == {"#{TOTAL_PREFIX}total bytes" => 12 + 23 + 45}
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'should count total words' do
|
73
|
+
key = "#{TOTAL_PREFIX}total words"
|
74
|
+
values = [3, 4, 5]
|
75
|
+
reducer = WordCountReducer.new(nil, key, values)
|
76
|
+
|
77
|
+
reducer.total :words
|
78
|
+
reducer.emitted[0].should == {"#{TOTAL_PREFIX}total words" => 3 + 4 + 5}
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'should count total lines' do
|
82
|
+
key = "#{TOTAL_PREFIX}total lines"
|
83
|
+
values = [1, 2, 3]
|
84
|
+
reducer = WordCountReducer.new(nil, key, values)
|
85
|
+
|
86
|
+
reducer.total :lines
|
87
|
+
reducer.emitted[0].should == {"#{TOTAL_PREFIX}total lines" => 6}
|
88
|
+
end
|
89
|
+
end
|
metadata
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hadoop-rubydsl
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Koichi Fujikawa
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-12-26 00:00:00 +09:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: jruby-on-hadoop
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
description: Hadoop Ruby DSL
|
26
|
+
email: fujibee@gmail.com
|
27
|
+
executables:
|
28
|
+
- hadoop
|
29
|
+
- hadoop-ruby.sh
|
30
|
+
extensions: []
|
31
|
+
|
32
|
+
extra_rdoc_files:
|
33
|
+
- README
|
34
|
+
- TODO
|
35
|
+
files:
|
36
|
+
- README
|
37
|
+
- Rakefile
|
38
|
+
- TODO
|
39
|
+
- VERSION
|
40
|
+
- bin/hadoop
|
41
|
+
- bin/hadoop-ruby.sh
|
42
|
+
- conf/hadoop-site.xml
|
43
|
+
- examples/apachelog-v2-2.rb
|
44
|
+
- examples/apachelog-v2.rb
|
45
|
+
- examples/apachelog.rb
|
46
|
+
- examples/hive_like_test.rb
|
47
|
+
- examples/word_count_test.rb
|
48
|
+
- hadoop-rubydsl.gemspec
|
49
|
+
- lib/core.rb
|
50
|
+
- lib/hive_like.rb
|
51
|
+
- lib/init.rb
|
52
|
+
- lib/java/.gitignore
|
53
|
+
- lib/java/hadoop-ruby.jar
|
54
|
+
- lib/log_analysis.rb
|
55
|
+
- lib/mapred_factory.rb
|
56
|
+
- lib/util.rb
|
57
|
+
- lib/word_count.rb
|
58
|
+
has_rdoc: true
|
59
|
+
homepage: http://github.com/fujibee/hadoop-rubydsl
|
60
|
+
licenses: []
|
61
|
+
|
62
|
+
post_install_message:
|
63
|
+
rdoc_options:
|
64
|
+
- --charset=UTF-8
|
65
|
+
require_paths:
|
66
|
+
- lib
|
67
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: "0"
|
72
|
+
version:
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: "0"
|
78
|
+
version:
|
79
|
+
requirements: []
|
80
|
+
|
81
|
+
rubyforge_project:
|
82
|
+
rubygems_version: 1.3.5
|
83
|
+
signing_key:
|
84
|
+
specification_version: 3
|
85
|
+
summary: Hadoop Ruby DSL
|
86
|
+
test_files:
|
87
|
+
- spec/spec_helper.rb
|
88
|
+
- spec/core_spec.rb
|
89
|
+
- spec/util_spec.rb
|
90
|
+
- spec/mapred_factory_spec.rb
|
91
|
+
- spec/word_count_spec.rb
|
92
|
+
- spec/hive_like_spec.rb
|
93
|
+
- spec/log_analysis_spec.rb
|
94
|
+
- spec/example_spec.rb
|
95
|
+
- spec/init_spec.rb
|
96
|
+
- examples/apachelog-v2.rb
|
97
|
+
- examples/hive_like_test.rb
|
98
|
+
- examples/word_count_test.rb
|
99
|
+
- examples/apachelog-v2-2.rb
|
100
|
+
- examples/apachelog.rb
|