hadoop-rubydsl 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/spec/init_spec.rb ADDED
@@ -0,0 +1,56 @@
1
+ require 'java'
2
+ require 'init'
3
+
4
+ import 'org.apache.hadoop.io.IntWritable'
5
+ import 'org.apache.hadoop.io.Text'
6
+ import 'org.apache.hadoop.mapred.JobConf'
7
+
8
+ describe 'mapreduce init' do
9
+
10
+ before(:all) do
11
+ @script = create_tmp_script(<<-EOF)
12
+ use 'LogAnalysis'
13
+ data 'test' do
14
+ from 'test/inputs'
15
+ to 'test/outputs'
16
+
17
+ separate(" ")
18
+ column_name 'c0', 'c1', 'c2', 'c3'
19
+ topic 't1' do
20
+ count_uniq columns(:c1)
21
+ end
22
+ end
23
+ EOF
24
+ end
25
+
26
+ before do
27
+ @one = IntWritable.new(1)
28
+ @output = mock('output')
29
+ end
30
+
31
+ it 'can map sucessfully' do
32
+ key, value = Text.new, Text.new
33
+ key.set("key")
34
+ value.set('it should be fine')
35
+ @output.should_receive(:collect).once #.with(@text, @one)
36
+
37
+ map(key, value, @output, nil, @script)
38
+ end
39
+
40
+ it 'can reduce sucessfully' do
41
+ key, value = Text.new, Text.new
42
+ key.set("t1\tkey")
43
+ values = [@one, @one, @one]
44
+ @output.should_receive(:collect).once #.with(@text, @one)
45
+
46
+ reduce(key, values, @output, nil, @script)
47
+ end
48
+
49
+ it 'can set job conf' do
50
+ conf = JobConf.new
51
+ paths = setup(conf, @script)
52
+
53
+ paths[0].should == 'test/inputs'
54
+ paths[1].should == 'test/outputs'
55
+ end
56
+ end
@@ -0,0 +1,119 @@
1
+ require 'init'
2
+ require 'core'
3
+ require 'log_analysis'
4
+
5
+ include HadoopDsl::LogAnalysis
6
+
7
+ describe LogAnalysisMapper do
8
+ before do
9
+ @apache_log = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326'
10
+ end
11
+
12
+ it 'should separate data by space' do
13
+ value = 'Lorem ipsum dolor sit amet,'
14
+ mapper = LogAnalysisMapper.new(nil, nil, value)
15
+ mapper.separate(' ')
16
+
17
+ mapper.column[1].value.should == 'ipsum'
18
+ end
19
+
20
+ it 'should separate by pattern' do
21
+ mapper = LogAnalysisMapper.new(nil, nil, @apache_log)
22
+ mapper.pattern /(.*) (.*) (.*) \[(.*)\] (".*") (\d*) (\d*)/
23
+
24
+ mapper.column[2].value.should == 'frank'
25
+ end
26
+
27
+ it 'should label column name by string' do
28
+ mapper = LogAnalysisMapper.new(nil, nil, @apache_log)
29
+ mapper.pattern /(.*) (.*) (.*) \[(.*)\] (".*") (\d*) (\d*)/
30
+ mapper.column_name 'remote_host', PASS, 'user', 'access_date', 'request', 'status', 'bytes'
31
+
32
+ mapper.column['user'].value.should == 'frank'
33
+ end
34
+
35
+ it 'should label column name by symbol' do
36
+ mapper = LogAnalysisMapper.new(nil, nil, @apache_log)
37
+ mapper.pattern /(.*) (.*) (.*) \[(.*)\] (".*") (\d*) (\d*)/
38
+ mapper.column_name :remote_host, PASS, :user, :access_date, :request, :status, :bytes
39
+
40
+ mapper.column[:user].value.should == 'frank'
41
+ end
42
+
43
+ it 'should count uniq column' do
44
+ value = 'count uniq'
45
+ mapper = LogAnalysisMapper.new(nil, nil, value)
46
+ mapper.separate(' ')
47
+ mapper.topic('t1') { mapper.count_uniq mapper.column[1] }
48
+
49
+ mapper.emitted.first["t1\tuniq"].should == 1
50
+ end
51
+
52
+ it 'should sum column value' do
53
+ value = 'sum 123'
54
+ mapper = LogAnalysisMapper.new(nil, nil, value)
55
+ mapper.separate(' ')
56
+ mapper.topic('t1') { mapper.sum mapper.column[1] }
57
+
58
+ mapper.emitted.first["t1"].should == 123
59
+ end
60
+
61
+ it 'has topic which returns label' do
62
+ value = 'Lorem ipsum dolor sit amet,'
63
+ mapper = LogAnalysisMapper.new(nil, nil, value)
64
+ mapper.separate(' ')
65
+
66
+ topic = mapper.topic('desc', :label => 'label')
67
+ topic.label.should == 'label'
68
+ end
69
+
70
+ it 'has topic which returns label as desc' do
71
+ value = 'Lorem ipsum dolor sit amet,'
72
+ mapper = LogAnalysisMapper.new(nil, nil, value)
73
+ mapper.separate(' ')
74
+
75
+ topic = mapper.topic('desc')
76
+ topic.label.should == 'desc'
77
+ end
78
+
79
+ it 'has topic which returns label as desc with space' do
80
+ value = 'Lorem ipsum dolor sit amet,'
81
+ mapper = LogAnalysisMapper.new(nil, nil, value)
82
+ mapper.separate(' ')
83
+
84
+ topic = mapper.topic('desc with space')
85
+ topic.label.should == 'desc_with_space'
86
+ end
87
+ end
88
+
89
+ describe LogAnalysisReducer do
90
+ it 'should count uniq in the topic' do
91
+ key = "t1\tuniq"
92
+ values = [1, 1, 1]
93
+ reducer = LogAnalysisReducer.new(nil, key, values)
94
+ reducer.separate(' ')
95
+ reducer.topic('t1') { reducer.count_uniq(nil) }
96
+
97
+ reducer.emitted.first["t1\tuniq"].should == 3
98
+ end
99
+
100
+ it 'should not count uniq of other topic' do
101
+ key = "t2\tuniq"
102
+ values = [1, 1, 1]
103
+ reducer = LogAnalysisReducer.new(nil, key, values)
104
+ reducer.separate(' ')
105
+ reducer.topic('t1') { reducer.count_uniq(nil) }
106
+
107
+ reducer.emitted.first.should be_nil
108
+ end
109
+
110
+ it 'should sum column value' do
111
+ key = "t1"
112
+ values = [123, 456, 789]
113
+ reducer = LogAnalysisReducer.new(nil, key, values)
114
+ reducer.separate(' ')
115
+ reducer.topic('t1') { reducer.sum(nil) }
116
+
117
+ reducer.emitted.first["t1"].should == 123+456+789
118
+ end
119
+ end
@@ -0,0 +1,42 @@
1
+ require File.join(File.dirname(__FILE__) , 'spec_helper')
2
+
3
+ require 'mapred_factory'
4
+ require 'log_analysis'
5
+
6
+ describe 'MapRed Factory' do
7
+
8
+ before(:all) do
9
+ @script = create_tmp_script("use 'LogAnalysis'")
10
+ end
11
+
12
+ it 'can create mapper' do
13
+ mapper = MapperFactory.create(@script, nil, nil)
14
+ mapper.class.should == LogAnalysisMapper
15
+ end
16
+
17
+ it 'can create reducer' do
18
+ reducer = ReducerFactory.create(@script, nil, nil)
19
+ reducer.class.should == LogAnalysisReducer
20
+ end
21
+
22
+ it 'can create setup' do
23
+ s = SetupFactory.create(@script, nil)
24
+ s.class.should == LogAnalysisSetup
25
+ end
26
+
27
+ it 'can create base if not exists in specific DSL' do
28
+ s = SetupFactory.create(create_tmp_script("use 'WordCount'"), nil)
29
+ s.class.should == BaseSetup
30
+ end
31
+
32
+ it 'specify dsl name from script' do
33
+ dsl_name = MapRedFactory.dsl_name(@script)
34
+ dsl_name.should == 'LogAnalysis'
35
+ end
36
+
37
+ it 'can convert dsl name to dsl lib file and require' do
38
+ dsl_name = MapRedFactory.dsl_name(@script)
39
+ MapRedFactory.require_dsl_lib(dsl_name).should_not be_nil
40
+ LogAnalysisMapper
41
+ end
42
+ end
@@ -0,0 +1,11 @@
1
+ # spec helper
2
+
3
+ require 'tempfile'
4
+
5
+ def create_tmp_script(body)
6
+ tmp = Tempfile.new('test.rb')
7
+ tmp.print body
8
+ tmp.close
9
+ tmp.path
10
+ end
11
+
data/spec/util_spec.rb ADDED
@@ -0,0 +1,15 @@
1
+ require File.join(File.dirname(__FILE__) , 'spec_helper')
2
+
3
+ require 'util'
4
+
5
+ describe 'utilities' do
6
+ it 'can change camelcase str to snakecase' do
7
+ snake_case('CamelCaseStr').should == 'camel_case_str'
8
+ end
9
+
10
+ it 'can read file and get file data to string' do
11
+ script_body = 'This is a script body.'
12
+ @script = create_tmp_script(script_body)
13
+ read_file(@script).should == script_body
14
+ end
15
+ end
@@ -0,0 +1,89 @@
1
+ require 'init'
2
+ require 'core'
3
+ require 'word_count'
4
+
5
+ include HadoopDsl::WordCount
6
+
7
+ describe WordCountMapper do
8
+ it 'should count uniq' do
9
+ value = 'Lorem ipsum Lorem sit amet,'
10
+ mapper = WordCountMapper.new(nil, nil, value)
11
+
12
+ mapper.count_uniq
13
+ mapper.emitted[0].should == {'Lorem' => 1}
14
+ mapper.emitted[1].should == {'ipsum' => 1}
15
+ mapper.emitted[2].should == {'Lorem' => 1}
16
+ end
17
+
18
+ it 'should count total bytes' do
19
+ value = 'Lorem ipsum Lorem sit amet,'
20
+ mapper = WordCountMapper.new(nil, nil, value)
21
+
22
+ mapper.total :bytes
23
+ mapper.emitted[0].should == {"#{TOTAL_PREFIX}total bytes" => 23}
24
+ end
25
+
26
+ it 'should count total words' do
27
+ value = 'Lorem ipsum Lorem sit amet,'
28
+ mapper = WordCountMapper.new(nil, nil, value)
29
+
30
+ mapper.total :words
31
+ mapper.emitted[0].should == {"#{TOTAL_PREFIX}total words" => 5}
32
+ end
33
+
34
+ it 'should count total lines' do
35
+ value = 'Lorem ipsum Lorem sit amet,'
36
+ mapper = WordCountMapper.new(nil, nil, value)
37
+
38
+ mapper.total :lines
39
+ mapper.emitted[0].should == {"#{TOTAL_PREFIX}total lines" => 1}
40
+ end
41
+
42
+ it 'should count total bytes, words, lines' do
43
+ value = 'Lorem ipsum Lorem sit amet,'
44
+ mapper = WordCountMapper.new(nil, nil, value)
45
+
46
+ mapper.total :bytes, :words, :lines
47
+ mapper.emitted[0].should == {"#{TOTAL_PREFIX}total bytes" => 23}
48
+ mapper.emitted[1].should == {"#{TOTAL_PREFIX}total words" => 5}
49
+ mapper.emitted[2].should == {"#{TOTAL_PREFIX}total lines" => 1}
50
+ end
51
+ end
52
+
53
+ describe WordCountReducer do
54
+ it 'should count uniq' do
55
+ key = 'Lorem'
56
+ values = [1, 1, 1]
57
+ reducer = WordCountReducer.new(nil, key, values)
58
+
59
+ reducer.count_uniq
60
+ reducer.emitted[0].should == {'Lorem' => 3}
61
+ end
62
+
63
+ it 'should count total bytes' do
64
+ key = "#{TOTAL_PREFIX}total bytes"
65
+ values = [12, 23, 45]
66
+ reducer = WordCountReducer.new(nil, key, values)
67
+
68
+ reducer.total :bytes
69
+ reducer.emitted[0].should == {"#{TOTAL_PREFIX}total bytes" => 12 + 23 + 45}
70
+ end
71
+
72
+ it 'should count total words' do
73
+ key = "#{TOTAL_PREFIX}total words"
74
+ values = [3, 4, 5]
75
+ reducer = WordCountReducer.new(nil, key, values)
76
+
77
+ reducer.total :words
78
+ reducer.emitted[0].should == {"#{TOTAL_PREFIX}total words" => 3 + 4 + 5}
79
+ end
80
+
81
+ it 'should count total lines' do
82
+ key = "#{TOTAL_PREFIX}total lines"
83
+ values = [1, 2, 3]
84
+ reducer = WordCountReducer.new(nil, key, values)
85
+
86
+ reducer.total :lines
87
+ reducer.emitted[0].should == {"#{TOTAL_PREFIX}total lines" => 6}
88
+ end
89
+ end
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hadoop-rubydsl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Koichi Fujikawa
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-26 00:00:00 +09:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: jruby-on-hadoop
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ description: Hadoop Ruby DSL
26
+ email: fujibee@gmail.com
27
+ executables:
28
+ - hadoop
29
+ - hadoop-ruby.sh
30
+ extensions: []
31
+
32
+ extra_rdoc_files:
33
+ - README
34
+ - TODO
35
+ files:
36
+ - README
37
+ - Rakefile
38
+ - TODO
39
+ - VERSION
40
+ - bin/hadoop
41
+ - bin/hadoop-ruby.sh
42
+ - conf/hadoop-site.xml
43
+ - examples/apachelog-v2-2.rb
44
+ - examples/apachelog-v2.rb
45
+ - examples/apachelog.rb
46
+ - examples/hive_like_test.rb
47
+ - examples/word_count_test.rb
48
+ - hadoop-rubydsl.gemspec
49
+ - lib/core.rb
50
+ - lib/hive_like.rb
51
+ - lib/init.rb
52
+ - lib/java/.gitignore
53
+ - lib/java/hadoop-ruby.jar
54
+ - lib/log_analysis.rb
55
+ - lib/mapred_factory.rb
56
+ - lib/util.rb
57
+ - lib/word_count.rb
58
+ has_rdoc: true
59
+ homepage: http://github.com/fujibee/hadoop-rubydsl
60
+ licenses: []
61
+
62
+ post_install_message:
63
+ rdoc_options:
64
+ - --charset=UTF-8
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: "0"
72
+ version:
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: "0"
78
+ version:
79
+ requirements: []
80
+
81
+ rubyforge_project:
82
+ rubygems_version: 1.3.5
83
+ signing_key:
84
+ specification_version: 3
85
+ summary: Hadoop Ruby DSL
86
+ test_files:
87
+ - spec/spec_helper.rb
88
+ - spec/core_spec.rb
89
+ - spec/util_spec.rb
90
+ - spec/mapred_factory_spec.rb
91
+ - spec/word_count_spec.rb
92
+ - spec/hive_like_spec.rb
93
+ - spec/log_analysis_spec.rb
94
+ - spec/example_spec.rb
95
+ - spec/init_spec.rb
96
+ - examples/apachelog-v2.rb
97
+ - examples/hive_like_test.rb
98
+ - examples/word_count_test.rb
99
+ - examples/apachelog-v2-2.rb
100
+ - examples/apachelog.rb