trafficbroker-mandy 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +6 -0
- data/bin/mandy-hadoop +19 -13
- data/bin/mandy-local +9 -2
- data/bin/mandy-map +2 -1
- data/bin/mandy-reduce +2 -1
- data/lib/dsl.rb +5 -19
- data/lib/job.rb +15 -1
- data/lib/test_runner.rb +1 -1
- metadata +1 -1
data/Rakefile
CHANGED
@@ -8,4 +8,10 @@ task :default => :spec
|
|
8
8
|
Spec::Rake::SpecTask.new(:spec) do |t|
|
9
9
|
t.spec_files = FileList['spec/lib/**/*_spec.rb']
|
10
10
|
t.spec_opts = %w{-f s -c -L mtime}
|
11
|
+
end
|
12
|
+
|
13
|
+
task :gem do
|
14
|
+
`sudo gem build mandy.gemspec`
|
15
|
+
`mkdir pkg; mv mandy-*.gem pkg/`
|
16
|
+
`sudo gem install pkg/mandy-*.gem`
|
11
17
|
end
|
data/bin/mandy-hadoop
CHANGED
@@ -7,22 +7,28 @@ end
|
|
7
7
|
file = ARGV[0]
|
8
8
|
filename = File.basename(file)
|
9
9
|
input = ARGV[1]
|
10
|
-
|
10
|
+
output_folder = ARGV[2]
|
11
11
|
config = ARGV[3]
|
12
12
|
|
13
13
|
require absolute_path(file)
|
14
14
|
|
15
|
-
|
15
|
+
Mandy::Job.jobs.each_with_index do |job, i|
|
16
|
+
|
17
|
+
jobconf = job.settings.map { |key, value| %(-jobconf #{key}="#{value}") }.join(' ')
|
18
|
+
|
19
|
+
output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
|
20
|
+
|
21
|
+
command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar \
|
22
|
+
-additionalconfspec "#{config}" \
|
23
|
+
-input "#{input}" \
|
24
|
+
-mapper "mandy-map #{filename} '#{job.name}'" \
|
25
|
+
-reducer "mandy-reduce #{filename} '#{job.name}'" \
|
26
|
+
-file "#{file}" \
|
27
|
+
-output "#{output}" \
|
28
|
+
#{jobconf})
|
16
29
|
|
17
|
-
command
|
18
|
-
-additionalconfspec "#{config}" \
|
19
|
-
-input "#{input}" \
|
20
|
-
-mapper "mandy-map #{filename}" \
|
21
|
-
-reducer "mandy-reduce #{filename}" \
|
22
|
-
-file "#{file}" \
|
23
|
-
-output "#{output}" \
|
24
|
-
#{jobconf})
|
30
|
+
`#{command}`
|
25
31
|
|
26
|
-
|
27
|
-
|
28
|
-
|
32
|
+
# puts "#{command}"
|
33
|
+
input = output
|
34
|
+
end
|
data/bin/mandy-local
CHANGED
@@ -1,11 +1,18 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
require "fileutils"
|
4
|
+
|
3
5
|
def absolute_path(path)
|
4
6
|
path =~ /^\// ? path : File.join(Dir.pwd, path)
|
5
7
|
end
|
6
8
|
|
7
9
|
file = absolute_path(ARGV[0])
|
8
10
|
input = absolute_path(ARGV[1])
|
9
|
-
|
11
|
+
output_folder = FileUtils.mkdir_p(absolute_path(ARGV[2]))
|
12
|
+
require file
|
10
13
|
|
11
|
-
|
14
|
+
Mandy::Job.jobs.each_with_index do |job, i|
|
15
|
+
out = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
|
16
|
+
`cat #{input} | mandy-map #{file} "#{job.name}" | sort | mandy-reduce #{file} "#{job.name}" > #{out}`
|
17
|
+
input = out
|
18
|
+
end
|
data/bin/mandy-map
CHANGED
data/bin/mandy-reduce
CHANGED
data/lib/dsl.rb
CHANGED
@@ -1,24 +1,10 @@
|
|
1
1
|
module Mandy
|
2
2
|
module DSL
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
def set(key, value)
|
9
|
-
Mandy::Job.default.settings[key.to_s] = value
|
10
|
-
end
|
11
|
-
|
12
|
-
def job_name(name)
|
13
|
-
set "mapred.job.name", name.to_s
|
14
|
-
end
|
15
|
-
|
16
|
-
def map(&blk)
|
17
|
-
Mandy::Job.default.map(&blk)
|
18
|
-
end
|
19
|
-
|
20
|
-
def reduce(&blk)
|
21
|
-
Mandy::Job.default.reduce(&blk)
|
3
|
+
def job(name, &blk)
|
4
|
+
job = Mandy::Job.new(name)
|
5
|
+
job.instance_eval(&blk) unless blk.nil?
|
6
|
+
Mandy::Job.jobs << job
|
7
|
+
job
|
22
8
|
end
|
23
9
|
end
|
24
10
|
end
|
data/lib/job.rb
CHANGED
@@ -1,17 +1,31 @@
|
|
1
1
|
module Mandy
|
2
2
|
class Job
|
3
3
|
class << self
|
4
|
-
|
4
|
+
def jobs
|
5
|
+
@jobs ||= []
|
6
|
+
end
|
7
|
+
|
8
|
+
def find_by_name(name)
|
9
|
+
jobs.find {|job| job.name == name }
|
10
|
+
end
|
5
11
|
end
|
6
12
|
|
7
13
|
attr_reader :settings
|
14
|
+
attr_reader :name
|
8
15
|
|
9
16
|
def initialize(name, &blk)
|
10
17
|
@name = name
|
11
18
|
@settings = {}
|
19
|
+
@mapper_class = Mandy::Mapper
|
20
|
+
@reducer_class = Mandy::Reducer
|
21
|
+
set('mapred.job.name', name)
|
12
22
|
instance_eval(&blk) if blk
|
13
23
|
end
|
14
24
|
|
25
|
+
def set(key, value)
|
26
|
+
@settings[key.to_s] = value.to_s
|
27
|
+
end
|
28
|
+
|
15
29
|
def map(&blk)
|
16
30
|
@mapper_class = Mandy::Mapper.compile(&blk)
|
17
31
|
end
|
data/lib/test_runner.rb
CHANGED