hadoop-jruby-connector 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/hjc/hadoop_streaming.rb +8 -1
- data/spec/hjc/hadoop_streaming_spec.rb +10 -2
- metadata +3 -3
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.0.
|
|
1
|
+
0.0.4
|
data/lib/hjc/hadoop_streaming.rb
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
module Hjc
|
|
2
2
|
class HadoopStreaming
|
|
3
|
-
attr_accessor :input_path, :output_path, :mapper_path, :reducer_path, :jobconf
|
|
3
|
+
attr_accessor :input_path, :output_path, :mapper_path, :reducer_path, :jobconf, :language
|
|
4
4
|
attr_accessor :local, :debug
|
|
5
5
|
|
|
6
6
|
def initialize
|
|
@@ -30,11 +30,13 @@ module Hjc
|
|
|
30
30
|
end
|
|
31
31
|
|
|
32
32
|
def mapper=(mapper)
|
|
33
|
+
mapper = with_shebang(mapper, @language)
|
|
33
34
|
@files['mapper'] = file = Util.to_temp_file('mapper', mapper, :mod => 0700)
|
|
34
35
|
@mapper_path = File.basename(file.path)
|
|
35
36
|
end
|
|
36
37
|
|
|
37
38
|
def reducer=(reducer)
|
|
39
|
+
reducer = with_shebang(reducer, @language)
|
|
38
40
|
@files['reducer'] = file = Util.to_temp_file('reducer', reducer, :mod => 0700)
|
|
39
41
|
@reducer_path = File.basename(file.path)
|
|
40
42
|
end
|
|
@@ -60,5 +62,10 @@ module Hjc
|
|
|
60
62
|
puts "args: #{concated_args.join(' ')}" if @debug
|
|
61
63
|
concated_args
|
|
62
64
|
end
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
def with_shebang(body, language)
|
|
68
|
+
"#!/usr/bin/env #{language}\n#{body}"
|
|
69
|
+
end
|
|
63
70
|
end
|
|
64
71
|
end
|
|
@@ -34,6 +34,16 @@ module Hjc
|
|
|
34
34
|
job.args.join(" ").should match(/reducer/)
|
|
35
35
|
end
|
|
36
36
|
|
|
37
|
+
it 'adds shebang if language exists' do
|
|
38
|
+
job = HadoopStreaming.new
|
|
39
|
+
job.language = 'ruby'
|
|
40
|
+
job.mapper = 'mapper'
|
|
41
|
+
job.reducer = 'reducer'
|
|
42
|
+
|
|
43
|
+
File.read(File.join(TMP_DIR, job.mapper_path)).should match(%Q:#!/usr/bin/env ruby:)
|
|
44
|
+
File.read(File.join(TMP_DIR, job.reducer_path)).should match(%Q:#!/usr/bin/env ruby:)
|
|
45
|
+
end
|
|
46
|
+
|
|
37
47
|
it 'can run Hadoop streaming job with path' do
|
|
38
48
|
pending 'path does not work'
|
|
39
49
|
job = HadoopStreaming.new
|
|
@@ -91,7 +101,6 @@ module Hjc
|
|
|
91
101
|
end
|
|
92
102
|
|
|
93
103
|
MAPPER = <<-'EOF'
|
|
94
|
-
#!/usr/bin/env ruby
|
|
95
104
|
ARGF.each do |line|
|
|
96
105
|
line.chomp!
|
|
97
106
|
line.split.each do |word|
|
|
@@ -101,7 +110,6 @@ end
|
|
|
101
110
|
EOF
|
|
102
111
|
|
|
103
112
|
REDUCER = <<-'EOF'
|
|
104
|
-
#!/usr/bin/env ruby
|
|
105
113
|
count = Hash.new {|h,k| h[k] = 0}
|
|
106
114
|
ARGF.each do |line|
|
|
107
115
|
line.chomp!
|
metadata
CHANGED
|
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
|
5
5
|
segments:
|
|
6
6
|
- 0
|
|
7
7
|
- 0
|
|
8
|
-
-
|
|
9
|
-
version: 0.0.
|
|
8
|
+
- 4
|
|
9
|
+
version: 0.0.4
|
|
10
10
|
platform: ruby
|
|
11
11
|
authors:
|
|
12
12
|
- Koichi Fujikawa
|
|
@@ -14,7 +14,7 @@ autorequire:
|
|
|
14
14
|
bindir: bin
|
|
15
15
|
cert_chain: []
|
|
16
16
|
|
|
17
|
-
date: 2011-04-
|
|
17
|
+
date: 2011-04-16 00:00:00 +09:00
|
|
18
18
|
default_executable:
|
|
19
19
|
dependencies: []
|
|
20
20
|
|