hadoop-rubydsl 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/spec/init_spec.rb ADDED
@@ -0,0 +1,56 @@
1
+ require 'java'
2
+ require 'init'
3
+
4
+ import 'org.apache.hadoop.io.IntWritable'
5
+ import 'org.apache.hadoop.io.Text'
6
+ import 'org.apache.hadoop.mapred.JobConf'
7
+
8
+ describe 'mapreduce init' do
9
+
10
+ before(:all) do
11
+ @script = create_tmp_script(<<-EOF)
12
+ use 'LogAnalysis'
13
+ data 'test' do
14
+ from 'test/inputs'
15
+ to 'test/outputs'
16
+
17
+ separate(" ")
18
+ column_name 'c0', 'c1', 'c2', 'c3'
19
+ topic 't1' do
20
+ count_uniq columns(:c1)
21
+ end
22
+ end
23
+ EOF
24
+ end
25
+
26
+ before do
27
+ @one = IntWritable.new(1)
28
+ @output = mock('output')
29
+ end
30
+
31
+ it 'can map sucessfully' do
32
+ key, value = Text.new, Text.new
33
+ key.set("key")
34
+ value.set('it should be fine')
35
+ @output.should_receive(:collect).once #.with(@text, @one)
36
+
37
+ map(key, value, @output, nil, @script)
38
+ end
39
+
40
+ it 'can reduce sucessfully' do
41
+ key, value = Text.new, Text.new
42
+ key.set("t1\tkey")
43
+ values = [@one, @one, @one]
44
+ @output.should_receive(:collect).once #.with(@text, @one)
45
+
46
+ reduce(key, values, @output, nil, @script)
47
+ end
48
+
49
+ it 'can set job conf' do
50
+ conf = JobConf.new
51
+ paths = setup(conf, @script)
52
+
53
+ paths[0].should == 'test/inputs'
54
+ paths[1].should == 'test/outputs'
55
+ end
56
+ end
@@ -0,0 +1,119 @@
1
+ require 'init'
2
+ require 'core'
3
+ require 'log_analysis'
4
+
5
+ include HadoopDsl::LogAnalysis
6
+
7
+ describe LogAnalysisMapper do
8
+ before do
9
+ @apache_log = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326'
10
+ end
11
+
12
+ it 'should separate data by space' do
13
+ value = 'Lorem ipsum dolor sit amet,'
14
+ mapper = LogAnalysisMapper.new(nil, nil, value)
15
+ mapper.separate(' ')
16
+
17
+ mapper.column[1].value.should == 'ipsum'
18
+ end
19
+
20
+ it 'should separate by pattern' do
21
+ mapper = LogAnalysisMapper.new(nil, nil, @apache_log)
22
+ mapper.pattern /(.*) (.*) (.*) \[(.*)\] (".*") (\d*) (\d*)/
23
+
24
+ mapper.column[2].value.should == 'frank'
25
+ end
26
+
27
+ it 'should label column name by string' do
28
+ mapper = LogAnalysisMapper.new(nil, nil, @apache_log)
29
+ mapper.pattern /(.*) (.*) (.*) \[(.*)\] (".*") (\d*) (\d*)/
30
+ mapper.column_name 'remote_host', PASS, 'user', 'access_date', 'request', 'status', 'bytes'
31
+
32
+ mapper.column['user'].value.should == 'frank'
33
+ end
34
+
35
+ it 'should label column name by symbol' do
36
+ mapper = LogAnalysisMapper.new(nil, nil, @apache_log)
37
+ mapper.pattern /(.*) (.*) (.*) \[(.*)\] (".*") (\d*) (\d*)/
38
+ mapper.column_name :remote_host, PASS, :user, :access_date, :request, :status, :bytes
39
+
40
+ mapper.column[:user].value.should == 'frank'
41
+ end
42
+
43
+ it 'should count uniq column' do
44
+ value = 'count uniq'
45
+ mapper = LogAnalysisMapper.new(nil, nil, value)
46
+ mapper.separate(' ')
47
+ mapper.topic('t1') { mapper.count_uniq mapper.column[1] }
48
+
49
+ mapper.emitted.first["t1\tuniq"].should == 1
50
+ end
51
+
52
+ it 'should sum column value' do
53
+ value = 'sum 123'
54
+ mapper = LogAnalysisMapper.new(nil, nil, value)
55
+ mapper.separate(' ')
56
+ mapper.topic('t1') { mapper.sum mapper.column[1] }
57
+
58
+ mapper.emitted.first["t1"].should == 123
59
+ end
60
+
61
+ it 'has topic which returns label' do
62
+ value = 'Lorem ipsum dolor sit amet,'
63
+ mapper = LogAnalysisMapper.new(nil, nil, value)
64
+ mapper.separate(' ')
65
+
66
+ topic = mapper.topic('desc', :label => 'label')
67
+ topic.label.should == 'label'
68
+ end
69
+
70
+ it 'has topic which returns label as desc' do
71
+ value = 'Lorem ipsum dolor sit amet,'
72
+ mapper = LogAnalysisMapper.new(nil, nil, value)
73
+ mapper.separate(' ')
74
+
75
+ topic = mapper.topic('desc')
76
+ topic.label.should == 'desc'
77
+ end
78
+
79
+ it 'has topic which returns label as desc with space' do
80
+ value = 'Lorem ipsum dolor sit amet,'
81
+ mapper = LogAnalysisMapper.new(nil, nil, value)
82
+ mapper.separate(' ')
83
+
84
+ topic = mapper.topic('desc with space')
85
+ topic.label.should == 'desc_with_space'
86
+ end
87
+ end
88
+
89
+ describe LogAnalysisReducer do
90
+ it 'should count uniq in the topic' do
91
+ key = "t1\tuniq"
92
+ values = [1, 1, 1]
93
+ reducer = LogAnalysisReducer.new(nil, key, values)
94
+ reducer.separate(' ')
95
+ reducer.topic('t1') { reducer.count_uniq(nil) }
96
+
97
+ reducer.emitted.first["t1\tuniq"].should == 3
98
+ end
99
+
100
+ it 'should not count uniq of other topic' do
101
+ key = "t2\tuniq"
102
+ values = [1, 1, 1]
103
+ reducer = LogAnalysisReducer.new(nil, key, values)
104
+ reducer.separate(' ')
105
+ reducer.topic('t1') { reducer.count_uniq(nil) }
106
+
107
+ reducer.emitted.first.should be_nil
108
+ end
109
+
110
+ it 'should sum column value' do
111
+ key = "t1"
112
+ values = [123, 456, 789]
113
+ reducer = LogAnalysisReducer.new(nil, key, values)
114
+ reducer.separate(' ')
115
+ reducer.topic('t1') { reducer.sum(nil) }
116
+
117
+ reducer.emitted.first["t1"].should == 123+456+789
118
+ end
119
+ end
@@ -0,0 +1,42 @@
1
+ require File.join(File.dirname(__FILE__) , 'spec_helper')
2
+
3
+ require 'mapred_factory'
4
+ require 'log_analysis'
5
+
6
+ describe 'MapRed Factory' do
7
+
8
+ before(:all) do
9
+ @script = create_tmp_script("use 'LogAnalysis'")
10
+ end
11
+
12
+ it 'can create mapper' do
13
+ mapper = MapperFactory.create(@script, nil, nil)
14
+ mapper.class.should == LogAnalysisMapper
15
+ end
16
+
17
+ it 'can create reducer' do
18
+ reducer = ReducerFactory.create(@script, nil, nil)
19
+ reducer.class.should == LogAnalysisReducer
20
+ end
21
+
22
+ it 'can create setup' do
23
+ s = SetupFactory.create(@script, nil)
24
+ s.class.should == LogAnalysisSetup
25
+ end
26
+
27
+ it 'can create base if not exists in specific DSL' do
28
+ s = SetupFactory.create(create_tmp_script("use 'WordCount'"), nil)
29
+ s.class.should == BaseSetup
30
+ end
31
+
32
+ it 'specify dsl name from script' do
33
+ dsl_name = MapRedFactory.dsl_name(@script)
34
+ dsl_name.should == 'LogAnalysis'
35
+ end
36
+
37
+ it 'can convert dsl name to dsl lib file and require' do
38
+ dsl_name = MapRedFactory.dsl_name(@script)
39
+ MapRedFactory.require_dsl_lib(dsl_name).should_not be_nil
40
+ LogAnalysisMapper
41
+ end
42
+ end
@@ -0,0 +1,11 @@
1
+ # spec helper
2
+
3
+ require 'tempfile'
4
+
5
+ def create_tmp_script(body)
6
+ tmp = Tempfile.new('test.rb')
7
+ tmp.print body
8
+ tmp.close
9
+ tmp.path
10
+ end
11
+
data/spec/util_spec.rb ADDED
@@ -0,0 +1,15 @@
1
+ require File.join(File.dirname(__FILE__) , 'spec_helper')
2
+
3
+ require 'util'
4
+
5
+ describe 'utilities' do
6
+ it 'can change camelcase str to snakecase' do
7
+ snake_case('CamelCaseStr').should == 'camel_case_str'
8
+ end
9
+
10
+ it 'can read file and get file data to string' do
11
+ script_body = 'This is a script body.'
12
+ @script = create_tmp_script(script_body)
13
+ read_file(@script).should == script_body
14
+ end
15
+ end
@@ -0,0 +1,89 @@
1
+ require 'init'
2
+ require 'core'
3
+ require 'word_count'
4
+
5
+ include HadoopDsl::WordCount
6
+
7
+ describe WordCountMapper do
8
+ it 'should count uniq' do
9
+ value = 'Lorem ipsum Lorem sit amet,'
10
+ mapper = WordCountMapper.new(nil, nil, value)
11
+
12
+ mapper.count_uniq
13
+ mapper.emitted[0].should == {'Lorem' => 1}
14
+ mapper.emitted[1].should == {'ipsum' => 1}
15
+ mapper.emitted[2].should == {'Lorem' => 1}
16
+ end
17
+
18
+ it 'should count total bytes' do
19
+ value = 'Lorem ipsum Lorem sit amet,'
20
+ mapper = WordCountMapper.new(nil, nil, value)
21
+
22
+ mapper.total :bytes
23
+ mapper.emitted[0].should == {"#{TOTAL_PREFIX}total bytes" => 23}
24
+ end
25
+
26
+ it 'should count total words' do
27
+ value = 'Lorem ipsum Lorem sit amet,'
28
+ mapper = WordCountMapper.new(nil, nil, value)
29
+
30
+ mapper.total :words
31
+ mapper.emitted[0].should == {"#{TOTAL_PREFIX}total words" => 5}
32
+ end
33
+
34
+ it 'should count total lines' do
35
+ value = 'Lorem ipsum Lorem sit amet,'
36
+ mapper = WordCountMapper.new(nil, nil, value)
37
+
38
+ mapper.total :lines
39
+ mapper.emitted[0].should == {"#{TOTAL_PREFIX}total lines" => 1}
40
+ end
41
+
42
+ it 'should count total bytes, words, lines' do
43
+ value = 'Lorem ipsum Lorem sit amet,'
44
+ mapper = WordCountMapper.new(nil, nil, value)
45
+
46
+ mapper.total :bytes, :words, :lines
47
+ mapper.emitted[0].should == {"#{TOTAL_PREFIX}total bytes" => 23}
48
+ mapper.emitted[1].should == {"#{TOTAL_PREFIX}total words" => 5}
49
+ mapper.emitted[2].should == {"#{TOTAL_PREFIX}total lines" => 1}
50
+ end
51
+ end
52
+
53
+ describe WordCountReducer do
54
+ it 'should count uniq' do
55
+ key = 'Lorem'
56
+ values = [1, 1, 1]
57
+ reducer = WordCountReducer.new(nil, key, values)
58
+
59
+ reducer.count_uniq
60
+ reducer.emitted[0].should == {'Lorem' => 3}
61
+ end
62
+
63
+ it 'should count total bytes' do
64
+ key = "#{TOTAL_PREFIX}total bytes"
65
+ values = [12, 23, 45]
66
+ reducer = WordCountReducer.new(nil, key, values)
67
+
68
+ reducer.total :bytes
69
+ reducer.emitted[0].should == {"#{TOTAL_PREFIX}total bytes" => 12 + 23 + 45}
70
+ end
71
+
72
+ it 'should count total words' do
73
+ key = "#{TOTAL_PREFIX}total words"
74
+ values = [3, 4, 5]
75
+ reducer = WordCountReducer.new(nil, key, values)
76
+
77
+ reducer.total :words
78
+ reducer.emitted[0].should == {"#{TOTAL_PREFIX}total words" => 3 + 4 + 5}
79
+ end
80
+
81
+ it 'should count total lines' do
82
+ key = "#{TOTAL_PREFIX}total lines"
83
+ values = [1, 2, 3]
84
+ reducer = WordCountReducer.new(nil, key, values)
85
+
86
+ reducer.total :lines
87
+ reducer.emitted[0].should == {"#{TOTAL_PREFIX}total lines" => 6}
88
+ end
89
+ end
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hadoop-rubydsl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Koichi Fujikawa
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-26 00:00:00 +09:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: jruby-on-hadoop
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ description: Hadoop Ruby DSL
26
+ email: fujibee@gmail.com
27
+ executables:
28
+ - hadoop
29
+ - hadoop-ruby.sh
30
+ extensions: []
31
+
32
+ extra_rdoc_files:
33
+ - README
34
+ - TODO
35
+ files:
36
+ - README
37
+ - Rakefile
38
+ - TODO
39
+ - VERSION
40
+ - bin/hadoop
41
+ - bin/hadoop-ruby.sh
42
+ - conf/hadoop-site.xml
43
+ - examples/apachelog-v2-2.rb
44
+ - examples/apachelog-v2.rb
45
+ - examples/apachelog.rb
46
+ - examples/hive_like_test.rb
47
+ - examples/word_count_test.rb
48
+ - hadoop-rubydsl.gemspec
49
+ - lib/core.rb
50
+ - lib/hive_like.rb
51
+ - lib/init.rb
52
+ - lib/java/.gitignore
53
+ - lib/java/hadoop-ruby.jar
54
+ - lib/log_analysis.rb
55
+ - lib/mapred_factory.rb
56
+ - lib/util.rb
57
+ - lib/word_count.rb
58
+ has_rdoc: true
59
+ homepage: http://github.com/fujibee/hadoop-rubydsl
60
+ licenses: []
61
+
62
+ post_install_message:
63
+ rdoc_options:
64
+ - --charset=UTF-8
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: "0"
72
+ version:
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: "0"
78
+ version:
79
+ requirements: []
80
+
81
+ rubyforge_project:
82
+ rubygems_version: 1.3.5
83
+ signing_key:
84
+ specification_version: 3
85
+ summary: Hadoop Ruby DSL
86
+ test_files:
87
+ - spec/spec_helper.rb
88
+ - spec/core_spec.rb
89
+ - spec/util_spec.rb
90
+ - spec/mapred_factory_spec.rb
91
+ - spec/word_count_spec.rb
92
+ - spec/hive_like_spec.rb
93
+ - spec/log_analysis_spec.rb
94
+ - spec/example_spec.rb
95
+ - spec/init_spec.rb
96
+ - examples/apachelog-v2.rb
97
+ - examples/hive_like_test.rb
98
+ - examples/word_count_test.rb
99
+ - examples/apachelog-v2-2.rb
100
+ - examples/apachelog.rb