mandy 0.4.8 → 0.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+ require 'ostruct'
4
+
5
+ exec('mandy-cp -h') unless ARGV.size >= 2
6
+
7
+
8
+ options = OpenStruct.new
9
+
10
+ OptionParser.new do |opts|
11
+ opts.banner = "USAGE: mandy-cp source destination [options]"
12
+
13
+ opts.on("-c", "--conf HADOOP_CONF", "Use this cluster xml config file.") do |config|
14
+ options.config = config
15
+ end
16
+
17
+ opts.on_tail("-h", "--help", "Show this message") do
18
+ puts opts
19
+ exit
20
+ end
21
+ end.parse!
22
+
23
+ source = ARGV[0]
24
+ destination = ARGV[1]
25
+ config = options.config || 'cluster.xml'
26
+
27
+ `$HADOOP_HOME/bin/hadoop fs -conf #{config} -cp #{source} #{destination}`
@@ -0,0 +1,13 @@
1
+ module Mandy
2
+ class HdfsLocation
3
+ attr_reader :path
4
+
5
+ def initialize(path)
6
+ @path = path
7
+ end
8
+
9
+ def to_s
10
+ @path
11
+ end
12
+ end
13
+ end
@@ -6,14 +6,16 @@ module Mandy
6
6
  @@config_path = file_path
7
7
  end
8
8
 
9
- def run_mandy(script, input_files, options = {})
9
+ def run_mandy(script, inputs, options = {})
10
10
  begin
11
11
  #doing this will load all the mandy jobs in memory which will be useful later on
12
12
  require script
13
+ inputs = [inputs] unless inputs.is_a?(Array)
14
+
15
+ hdfs_input = inputs.all? {|i| i.is_a?(File)} ? process_files(inputs) : process_hdfs_locations(inputs)
16
+
17
+ run_mandy_hadoop(hdfs_input, script, options)
13
18
 
14
- hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
15
- put_files_on_hdfs(hdfs_path, input_files)
16
- run_mandy_hadoop(hdfs_path, script, options)
17
19
  output_file_path = get_file_from_hdfs(hdfs_path, options)
18
20
  return output_file_path unless block_given?
19
21
  #if a block is given then yield the output file path and then delete this file before returning
@@ -24,8 +26,23 @@ module Mandy
24
26
  end
25
27
 
26
28
  private
29
+ def process_files(input_files)
30
+ hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
31
+ put_files_on_hdfs(hdfs_path, input_files)
32
+ hdfs_path
33
+ end
34
+
35
+ def process_locations(input_locations)
36
+ return input_locations.first if input_locations.size == 1
37
+
38
+ hdfs_path = "#{self.class.to_s.split('::').join('-').downcase}/#{SESSION_ID}"
39
+ input_locations.each_with_index do |location, index|
40
+ run_command "mandy-cp #{location} #{hdfs_path}/input#{index}"
41
+ end
42
+ hdfs_path
43
+ end
44
+
27
45
  def put_files_on_hdfs(hdfs_path, input_files)
28
- input_files = [input_files] unless input_files.is_a?(Array)
29
46
  input_files.each do |input_file|
30
47
  input_file_path = input_file.is_a?(File) ? File.expand_path(input_file.path) : input_file
31
48
  base_filename = input_file_path.split("/").last
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mandy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.8
4
+ version: 0.4.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Kent
@@ -45,6 +45,7 @@ executables:
45
45
  - mandy-reduce
46
46
  - mandy-rm
47
47
  - mandy-mv
48
+ - mandy-cp
48
49
  - mandy-mkdir
49
50
  - mandy-exists
50
51
  - mandy-install
@@ -69,6 +70,7 @@ files:
69
70
  - lib/support/tuple.rb
70
71
  - lib/support/formatting.rb
71
72
  - lib/support/array_serializer.rb
73
+ - lib/support/hdfs_location.rb
72
74
  - lib/task.rb
73
75
  - lib/dsl.rb
74
76
  - lib/job.rb