mandy 0.4.83 → 0.4.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -1
- data/bin/mandy +0 -1
- data/lib/mandy.rb +1 -0
- metadata +1 -5
- data/bin/mandy-run +0 -58
- data/lib/wrappers/mandy_local_wrapper.rb +0 -58
- data/lib/wrappers/mandy_wrapper.rb +0 -94
data/Gemfile
CHANGED
data/bin/mandy
CHANGED
|
@@ -35,7 +35,6 @@ puts '------------------------'
|
|
|
35
35
|
'mandy-install' => 'Installs the Mandy Rubygem on several hosts via ssh.',
|
|
36
36
|
'mandy-local' => 'Run a Map/Reduce task locally without requiring hadoop',
|
|
37
37
|
'mandy-hadoop' => 'Run a Map/Reduce task on hadoop using the provided cluster config',
|
|
38
|
-
'mandy-run' => 'Run an entire Map/Reduce workflow with one command',
|
|
39
38
|
'mandy-rm' => 'remove a file or directory from HDFS',
|
|
40
39
|
'mandy-put' => 'upload a file into HDFS',
|
|
41
40
|
'mandy-map' => 'Run a map task reading on STDIN and writing to STDOUT',
|
data/lib/mandy.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: mandy
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.86
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andy Kent
|
|
@@ -49,7 +49,6 @@ executables:
|
|
|
49
49
|
- mandy-mkdir
|
|
50
50
|
- mandy-exists
|
|
51
51
|
- mandy-install
|
|
52
|
-
- mandy-run
|
|
53
52
|
extensions: []
|
|
54
53
|
|
|
55
54
|
extra_rdoc_files: []
|
|
@@ -61,7 +60,6 @@ files:
|
|
|
61
60
|
- bin/mandy-get
|
|
62
61
|
- bin/mandy-put
|
|
63
62
|
- bin/mandy-reduce
|
|
64
|
-
- bin/mandy-run
|
|
65
63
|
- readme.md
|
|
66
64
|
- Rakefile
|
|
67
65
|
- bootstrap.rb
|
|
@@ -93,8 +91,6 @@ files:
|
|
|
93
91
|
- lib/ruby-hbase/version.rb
|
|
94
92
|
- lib/ruby-hbase/xml_decoder.rb
|
|
95
93
|
- lib/test_runner.rb
|
|
96
|
-
- lib/wrappers/mandy_wrapper.rb
|
|
97
|
-
- lib/wrappers/mandy_local_wrapper.rb
|
|
98
94
|
has_rdoc: true
|
|
99
95
|
homepage: http://github.com/trafficbroker/mandy
|
|
100
96
|
licenses: []
|
data/bin/mandy-run
DELETED
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
require "rubygems"
|
|
3
|
-
require "mandy"
|
|
4
|
-
require 'optparse'
|
|
5
|
-
require 'ostruct'
|
|
6
|
-
require 'json'
|
|
7
|
-
|
|
8
|
-
options = OpenStruct.new
|
|
9
|
-
|
|
10
|
-
OptionParser.new do |opts|
|
|
11
|
-
opts.banner = "USAGE: mandy-run script input [options]"
|
|
12
|
-
|
|
13
|
-
opts.on("-p", "--payload PAYLOAD", "Add a working directory to be sent to the cluster.") do |payload|
|
|
14
|
-
options.payload = payload
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
opts.on("-o", "--output OUTPUT", "Specify output path for your results.") do |payload|
|
|
18
|
-
options.payload = payload
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
|
|
22
|
-
options.config = config
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
opts.on("-j", '--json "{\"key\":\"1 value\"}"', "Pass JSON encoded parameters to jobs") do |config|
|
|
26
|
-
options.json = config
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
opts.on("-l", "--local", "Run script using mandy-local.") do
|
|
30
|
-
options.local = true
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
opts.on_tail("-h", "--help", "Show this message") do
|
|
34
|
-
puts opts
|
|
35
|
-
exit
|
|
36
|
-
end
|
|
37
|
-
end.parse!
|
|
38
|
-
|
|
39
|
-
exec('mandy-local -h') unless ARGV.size >= 2
|
|
40
|
-
|
|
41
|
-
file = ARGV[0]
|
|
42
|
-
input = File.new(ARGV[1])
|
|
43
|
-
|
|
44
|
-
params = {}
|
|
45
|
-
params[:output_file] = options.output if options.output
|
|
46
|
-
params[:lib] = options.payload if options.payload
|
|
47
|
-
params[:parameters] = JSON.parse(options.json) if options.json
|
|
48
|
-
|
|
49
|
-
if options.local
|
|
50
|
-
require 'wrappers/mandy_local_wrapper'
|
|
51
|
-
else
|
|
52
|
-
require 'wrappers/mandy_wrapper'
|
|
53
|
-
set_mandy_config options.config || 'cluster.xml'
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
output_file = run_mandy(file, input, params)
|
|
57
|
-
|
|
58
|
-
puts "Results stored in: #{output_file}"
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
module Mandy
|
|
2
|
-
module Local
|
|
3
|
-
module Wrapper
|
|
4
|
-
SESSION_ID = Process.pid
|
|
5
|
-
|
|
6
|
-
def run_mandy(script, input_files, options = {})
|
|
7
|
-
begin
|
|
8
|
-
#doing this will load all the mandy jobs in memory which will be useful later on
|
|
9
|
-
require script
|
|
10
|
-
|
|
11
|
-
input_file = concat_input_files(input_files)
|
|
12
|
-
output_file_path = run_mandy_local(script, input_file, options)
|
|
13
|
-
return output_file_path unless block_given?
|
|
14
|
-
#if a block is given then yield the output file path and then delete this file before returning
|
|
15
|
-
yield output_file_path
|
|
16
|
-
ensure
|
|
17
|
-
File.delete(input_file) if File.exists?(input_file)
|
|
18
|
-
File.delete(output_file_path) if output_file_path && File.exists?(output_file_path) if block_given?
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
private
|
|
23
|
-
def concat_input_files(inputs)
|
|
24
|
-
inputs = [inputs] unless inputs.is_a?(Array)
|
|
25
|
-
base_dir = File.dirname(inputs.first.path)
|
|
26
|
-
input_file = "#{base_dir}/#{SESSION_ID}.csv"
|
|
27
|
-
`cat #{inputs.collect{|f| f.path}.join(' ')} > #{input_file}`
|
|
28
|
-
input_file
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
def run_mandy_local(script, input, options)
|
|
32
|
-
mandy_job_params = options.include?(:parameters) ? options[:parameters] : {}
|
|
33
|
-
param_args = "export json='#{mandy_job_params.to_json}' &&"
|
|
34
|
-
|
|
35
|
-
if options.include?(:lib)
|
|
36
|
-
FileUtils.cp(script, options[:lib])
|
|
37
|
-
script = File.join(options[:lib], File.basename(script))
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
output_path = options[:output_file] || generate_output_path
|
|
41
|
-
output_file = `#{param_args} mandy-local #{script} #{input} #{output_path}`
|
|
42
|
-
output_file = output_file.split("\n").last
|
|
43
|
-
output_file
|
|
44
|
-
ensure
|
|
45
|
-
File.delete(script) if options.include?(:lib)
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
def generate_output_path
|
|
49
|
-
output_dir = "/tmp/mandy_local_output"
|
|
50
|
-
FileUtils.mkdir_p(output_dir)
|
|
51
|
-
file_name = Mandy::Job.jobs.last.name.downcase.gsub(/\W/, '-')
|
|
52
|
-
"#{output_dir}/#{file_name}_#{DateTime.now.strftime('%Y%m%d%H%M%S')}"
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
Object.send(:include, Mandy::Local::Wrapper)
|
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
module Mandy
|
|
2
|
-
module Wrapper
|
|
3
|
-
SESSION_ID = Process.pid
|
|
4
|
-
|
|
5
|
-
def set_mandy_config(file_path)
|
|
6
|
-
@@config_path = file_path
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
def run_mandy(script, inputs, options = {})
|
|
10
|
-
begin
|
|
11
|
-
#doing this will load all the mandy jobs in memory which will be useful later on
|
|
12
|
-
require script
|
|
13
|
-
inputs = [inputs] unless inputs.is_a?(Array)
|
|
14
|
-
|
|
15
|
-
hdfs_input = inputs.all? {|i| i.is_a?(File)} ? process_files(inputs) : process_hdfs_locations(inputs)
|
|
16
|
-
|
|
17
|
-
run_mandy_hadoop(hdfs_input, script, options)
|
|
18
|
-
|
|
19
|
-
output_file_path = get_file_from_hdfs(hdfs_path, options)
|
|
20
|
-
return output_file_path unless block_given?
|
|
21
|
-
#if a block is given then yield the output file path and then delete this file before returning
|
|
22
|
-
yield output_file_path
|
|
23
|
-
ensure
|
|
24
|
-
File.delete(output_file_path) if output_file_path && File.exists?(output_file_path) if block_given?
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
private
|
|
29
|
-
def process_files(input_files)
|
|
30
|
-
hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
|
|
31
|
-
put_files_on_hdfs(hdfs_path, input_files)
|
|
32
|
-
hdfs_path
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def process_locations(input_locations)
|
|
36
|
-
return input_locations.first if input_locations.size == 1
|
|
37
|
-
|
|
38
|
-
hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
|
|
39
|
-
input_locations.each_with_index do |location, index|
|
|
40
|
-
run_command "mandy-cp #{location} #{hdfs_path}/input#{index}"
|
|
41
|
-
end
|
|
42
|
-
hdfs_path
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
def put_files_on_hdfs(hdfs_path, input_files)
|
|
46
|
-
input_files.each do |input_file|
|
|
47
|
-
input_file_path = input_file.is_a?(File) ? File.expand_path(input_file.path) : input_file
|
|
48
|
-
base_filename = input_file_path.split("/").last
|
|
49
|
-
dest_file = ["input/#{hdfs_path}", base_filename].join("/")
|
|
50
|
-
run_command "mandy-put #{input_file_path} #{dest_file}"
|
|
51
|
-
end
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
def run_mandy_hadoop(hdfs_path, script, options)
|
|
55
|
-
mandy_job_params = options.include?(:parameters) ? options[:parameters] : {}
|
|
56
|
-
param_args = "-j '#{mandy_job_params.to_json}'"
|
|
57
|
-
param_args += " -p '#{options[:lib]}'" if options.include?(:lib)
|
|
58
|
-
|
|
59
|
-
hdfs_output_path = "output/#{hdfs_path}"
|
|
60
|
-
run_command "mandy-rm output/#{hdfs_path}"
|
|
61
|
-
run_command "mandy-hadoop #{script} input/#{hdfs_path} output/#{hdfs_path} #{param_args}"
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
def get_file_from_hdfs(hdfs_path, options)
|
|
65
|
-
output_file_path = options[:output_file] || generate_output_path
|
|
66
|
-
hdfs_output_path = "output/#{hdfs_path}"
|
|
67
|
-
run_command "mandy-get #{get_hdfs_output(hdfs_output_path)} #{output_file_path}"
|
|
68
|
-
run_command "mandy-rm input/#{hdfs_path}"
|
|
69
|
-
run_command "mandy-rm output/#{hdfs_path}"
|
|
70
|
-
output_file_path
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
def run_command(command)
|
|
74
|
-
command = "#{command} -c #{@@config_path}"
|
|
75
|
-
respond_to?(:logger) ? logger.info(command) : p(command)
|
|
76
|
-
@output = `#{command}`
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
def generate_output_path
|
|
80
|
-
output_dir = "/tmp/mandy_output"
|
|
81
|
-
FileUtils.mkdir_p(output_dir)
|
|
82
|
-
file_name = Mandy::Job.jobs.last.name.downcase.gsub(/\W/, '-')
|
|
83
|
-
"#{output_dir}/#{file_name}_#{DateTime.now.strftime('%Y%m%d%H%M%S')}"
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
def get_hdfs_output(hdfs_output_path)
|
|
87
|
-
@output.each_line do |line|
|
|
88
|
-
return line.chomp.strip if line.include?(hdfs_output_path)
|
|
89
|
-
end
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
Object.send(:include, Mandy::Wrapper)
|