mandy 0.4.8 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/mandy-cp +27 -0
- data/lib/support/hdfs_location.rb +13 -0
- data/lib/wrappers/mandy_wrapper.rb +22 -5
- metadata +3 -1
data/bin/mandy-cp
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'ostruct'
|
4
|
+
|
5
|
+
exec('mandy-cp -h') unless ARGV.size >= 2
|
6
|
+
|
7
|
+
|
8
|
+
options = OpenStruct.new
|
9
|
+
|
10
|
+
OptionParser.new do |opts|
|
11
|
+
opts.banner = "USAGE: mandy-cp source destination [options]"
|
12
|
+
|
13
|
+
opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
|
14
|
+
options.config = config
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
18
|
+
puts opts
|
19
|
+
exit
|
20
|
+
end
|
21
|
+
end.parse!
|
22
|
+
|
23
|
+
source = ARGV[0]
|
24
|
+
destination = ARGV[1]
|
25
|
+
config = options.config || 'cluster.xml'
|
26
|
+
|
27
|
+
`$HADOOP_HOME/bin/hadoop fs -conf #{config} -cp #{source} #{destination}`
|
@@ -6,14 +6,16 @@ module Mandy
|
|
6
6
|
@@config_path = file_path
|
7
7
|
end
|
8
8
|
|
9
|
-
def run_mandy(script,
|
9
|
+
def run_mandy(script, inputs, options = {})
|
10
10
|
begin
|
11
11
|
#doing this will load all the mandy jobs in memory which will be useful later on
|
12
12
|
require script
|
13
|
+
inputs = [inputs] unless inputs.is_a?(Array)
|
14
|
+
|
15
|
+
hdfs_input = inputs.all? {|i| i.is_a?(File)} ? process_files(inputs) : process_hdfs_locations(inputs)
|
16
|
+
|
17
|
+
run_mandy_hadoop(hdfs_input, script, options)
|
13
18
|
|
14
|
-
hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
|
15
|
-
put_files_on_hdfs(hdfs_path, input_files)
|
16
|
-
run_mandy_hadoop(hdfs_path, script, options)
|
17
19
|
output_file_path = get_file_from_hdfs(hdfs_path, options)
|
18
20
|
return output_file_path unless block_given?
|
19
21
|
#if a block is given then yield the output file path and then delete this file before returning
|
@@ -24,8 +26,23 @@ module Mandy
|
|
24
26
|
end
|
25
27
|
|
26
28
|
private
|
29
|
+
def process_files(input_files)
|
30
|
+
hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
|
31
|
+
put_files_on_hdfs(hdfs_path, input_files)
|
32
|
+
hdfs_path
|
33
|
+
end
|
34
|
+
|
35
|
+
def process_locations(input_locations)
|
36
|
+
return input_locations.first if input_locations.size == 1
|
37
|
+
|
38
|
+
hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
|
39
|
+
input_locations.each_with_index do |location, index|
|
40
|
+
run_command "mandy-cp #{location} #{hdfs_path}/input#{index}"
|
41
|
+
end
|
42
|
+
hdfs_path
|
43
|
+
end
|
44
|
+
|
27
45
|
def put_files_on_hdfs(hdfs_path, input_files)
|
28
|
-
input_files = [input_files] unless input_files.is_a?(Array)
|
29
46
|
input_files.each do |input_file|
|
30
47
|
input_file_path = input_file.is_a?(File) ? File.expand_path(input_file.path) : input_file
|
31
48
|
base_filename = input_file_path.split("/").last
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mandy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Kent
|
@@ -45,6 +45,7 @@ executables:
|
|
45
45
|
- mandy-reduce
|
46
46
|
- mandy-rm
|
47
47
|
- mandy-mv
|
48
|
+
- mandy-cp
|
48
49
|
- mandy-mkdir
|
49
50
|
- mandy-exists
|
50
51
|
- mandy-install
|
@@ -69,6 +70,7 @@ files:
|
|
69
70
|
- lib/support/tuple.rb
|
70
71
|
- lib/support/formatting.rb
|
71
72
|
- lib/support/array_serializer.rb
|
73
|
+
- lib/support/hdfs_location.rb
|
72
74
|
- lib/task.rb
|
73
75
|
- lib/dsl.rb
|
74
76
|
- lib/job.rb
|