mandy 0.4.8 → 0.4.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+ require 'ostruct'
4
+
5
+ exec('mandy-cp -h') unless ARGV.size >= 2
6
+
7
+
8
+ options = OpenStruct.new
9
+
10
+ OptionParser.new do |opts|
11
+ opts.banner = "USAGE: mandy-cp source destination [options]"
12
+
13
+ opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
14
+ options.config = config
15
+ end
16
+
17
+ opts.on_tail("-h", "--help", "Show this message") do
18
+ puts opts
19
+ exit
20
+ end
21
+ end.parse!
22
+
23
+ source = ARGV[0]
24
+ destination = ARGV[1]
25
+ config = options.config || 'cluster.xml'
26
+
27
+ `$HADOOP_HOME/bin/hadoop fs -conf #{config} -cp #{source} #{destination}`
@@ -0,0 +1,13 @@
1
+ module Mandy
2
+ class HdfsLocation
3
+ attr_reader :path
4
+
5
+ def initialize(path)
6
+ @path = path
7
+ end
8
+
9
+ def to_s
10
+ @path
11
+ end
12
+ end
13
+ end
@@ -6,14 +6,16 @@ module Mandy
6
6
  @@config_path = file_path
7
7
  end
8
8
 
9
- def run_mandy(script, input_files, options = {})
9
+ def run_mandy(script, inputs, options = {})
10
10
  begin
11
11
  #doing this will load all the mandy jobs in memory which will be useful later on
12
12
  require script
13
+ inputs = [inputs] unless inputs.is_a?(Array)
14
+
15
+ hdfs_input = inputs.all? {|i| i.is_a?(File)} ? process_files(inputs) : process_hdfs_locations(inputs)
16
+
17
+ run_mandy_hadoop(hdfs_input, script, options)
13
18
 
14
- hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
15
- put_files_on_hdfs(hdfs_path, input_files)
16
- run_mandy_hadoop(hdfs_path, script, options)
17
19
  output_file_path = get_file_from_hdfs(hdfs_path, options)
18
20
  return output_file_path unless block_given?
19
21
  #if a block is given then yield the output file path and then delete this file before returning
@@ -24,8 +26,23 @@ module Mandy
24
26
  end
25
27
 
26
28
  private
29
+ def process_files(input_files)
30
+ hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
31
+ put_files_on_hdfs(hdfs_path, input_files)
32
+ hdfs_path
33
+ end
34
+
35
+ def process_locations(input_locations)
36
+ return input_locations.first if input_locations.size == 1
37
+
38
+ hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
39
+ input_locations.each_with_index do |location, index|
40
+ run_command "mandy-cp #{location} #{hdfs_path}/input#{index}"
41
+ end
42
+ hdfs_path
43
+ end
44
+
27
45
  def put_files_on_hdfs(hdfs_path, input_files)
28
- input_files = [input_files] unless input_files.is_a?(Array)
29
46
  input_files.each do |input_file|
30
47
  input_file_path = input_file.is_a?(File) ? File.expand_path(input_file.path) : input_file
31
48
  base_filename = input_file_path.split("/").last
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mandy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.8
4
+ version: 0.4.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Kent
@@ -45,6 +45,7 @@ executables:
45
45
  - mandy-reduce
46
46
  - mandy-rm
47
47
  - mandy-mv
48
+ - mandy-cp
48
49
  - mandy-mkdir
49
50
  - mandy-exists
50
51
  - mandy-install
@@ -69,6 +70,7 @@ files:
69
70
  - lib/support/tuple.rb
70
71
  - lib/support/formatting.rb
71
72
  - lib/support/array_serializer.rb
73
+ - lib/support/hdfs_location.rb
72
74
  - lib/task.rb
73
75
  - lib/dsl.rb
74
76
  - lib/job.rb