mandy 0.4.8 → 0.4.10
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/mandy-cp +27 -0
- data/lib/support/hdfs_location.rb +13 -0
- data/lib/wrappers/mandy_wrapper.rb +22 -5
- metadata +3 -1
data/bin/mandy-cp
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
exec('mandy-cp -h') unless ARGV.size >= 2
|
6
|
+
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
|
10
|
+
OptionParser.new do |opts|
|
11
|
+
opts.banner = "USAGE: mandy-cp source destination [options]"
|
12
|
+
|
13
|
+
opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
|
14
|
+
options.config = config
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
18
|
+
puts opts
|
19
|
+
exit
|
20
|
+
end
|
21
|
+
end.parse!
|
22
|
+
|
23
|
+
source = ARGV[0]
|
24
|
+
destination = ARGV[1]
|
25
|
+
config = options.config || 'cluster.xml'
|
26
|
+
|
27
|
+
`$HADOOP_HOME/bin/hadoop fs -conf #{config} -cp #{source} #{destination}`
|
@@ -6,14 +6,16 @@ module Mandy
|
|
6
6
|
@@config_path = file_path
|
7
7
|
end
|
8
8
|
|
9
|
-
def run_mandy(script,
|
9
|
+
def run_mandy(script, inputs, options = {})
|
10
10
|
begin
|
11
11
|
#doing this will load all the mandy jobs in memory which will be useful later on
|
12
12
|
require script
|
13
|
+
inputs = [inputs] unless inputs.is_a?(Array)
|
14
|
+
|
15
|
+
hdfs_input = inputs.all? {|i| i.is_a?(File)} ? process_files(inputs) : process_hdfs_locations(inputs)
|
16
|
+
|
17
|
+
run_mandy_hadoop(hdfs_input, script, options)
|
13
18
|
|
14
|
-
hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
|
15
|
-
put_files_on_hdfs(hdfs_path, input_files)
|
16
|
-
run_mandy_hadoop(hdfs_path, script, options)
|
17
19
|
output_file_path = get_file_from_hdfs(hdfs_path, options)
|
18
20
|
return output_file_path unless block_given?
|
19
21
|
#if a block is given then yield the output file path and then delete this file before returning
|
@@ -24,8 +26,23 @@ module Mandy
|
|
24
26
|
end
|
25
27
|
|
26
28
|
private
|
29
|
+
def process_files(input_files)
|
30
|
+
hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
|
31
|
+
put_files_on_hdfs(hdfs_path, input_files)
|
32
|
+
hdfs_path
|
33
|
+
end
|
34
|
+
|
35
|
+
def process_locations(input_locations)
|
36
|
+
return input_locations.first if input_locations.size == 1
|
37
|
+
|
38
|
+
hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
|
39
|
+
input_locations.each_with_index do |location, index|
|
40
|
+
run_command "mandy-cp #{location} #{hdfs_path}/input#{index}"
|
41
|
+
end
|
42
|
+
hdfs_path
|
43
|
+
end
|
44
|
+
|
27
45
|
def put_files_on_hdfs(hdfs_path, input_files)
|
28
|
-
input_files = [input_files] unless input_files.is_a?(Array)
|
29
46
|
input_files.each do |input_file|
|
30
47
|
input_file_path = input_file.is_a?(File) ? File.expand_path(input_file.path) : input_file
|
31
48
|
base_filename = input_file_path.split("/").last
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mandy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Kent
|
@@ -45,6 +45,7 @@ executables:
|
|
45
45
|
- mandy-reduce
|
46
46
|
- mandy-rm
|
47
47
|
- mandy-mv
|
48
|
+
- mandy-cp
|
48
49
|
- mandy-mkdir
|
49
50
|
- mandy-exists
|
50
51
|
- mandy-install
|
@@ -69,6 +70,7 @@ files:
|
|
69
70
|
- lib/support/tuple.rb
|
70
71
|
- lib/support/formatting.rb
|
71
72
|
- lib/support/array_serializer.rb
|
73
|
+
- lib/support/hdfs_location.rb
|
72
74
|
- lib/task.rb
|
73
75
|
- lib/dsl.rb
|
74
76
|
- lib/job.rb
|