mandy 0.4.2 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
data/bin/mandy-hadoop CHANGED
@@ -75,14 +75,15 @@ Mandy::Job.jobs.each_with_index do |job, i|
75
75
  output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
76
76
 
77
77
  bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
78
+ inputreader = job.input_format == :xml ? "StreamXmlRecordReader,begin=<#{job.input_format_options[:xml_tag]} ,end=</#{job.input_format_options[:xml_tag]}>" : nil
78
79
 
79
80
  command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
81
+ -files "#{payload}","#{bootstrap_file}" \
80
82
  -conf '#{config}' \
81
83
  -input "#{input}" \
84
+ #{ inputreader.nil? ? '' : "-inputreader \"#{inputreader}\"" } \
82
85
  -mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
83
86
  -reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
84
- -file "#{payload}" \
85
- -file "#{bootstrap_file}" \
86
87
  #{ cmdenv.nil? ? '' : "-cmdenv #{cmdenv}" }\
87
88
  -output "#{output}")
88
89
 
data/lib/job.rb CHANGED
@@ -12,7 +12,8 @@ module Mandy
12
12
 
13
13
  attr_reader :settings
14
14
  attr_reader :name
15
-
15
+ attr_reader :input_format_options
16
+
16
17
  def initialize(name, &blk)
17
18
  @name = name
18
19
  @settings = {}
@@ -25,11 +26,14 @@ module Mandy
25
26
  modules.each {|m| @modules << m}
26
27
  end
27
28
  alias_method :serialize, :mixin
28
-
29
- def input_format(format)
29
+
30
+ def input_format(format=nil, options={})
31
+ return @input_format if format.nil?
32
+
30
33
  @input_format = format
34
+ @input_format_options = options
31
35
  end
32
-
36
+
33
37
  def output_format(format)
34
38
  @output_format = format
35
39
  end
@@ -27,7 +27,7 @@ module Mandy
27
27
  def put_files_on_hdfs(hdfs_path, input_files)
28
28
  input_files = [input_files] unless input_files.is_a?(Array)
29
29
  input_files.each do |input_file|
30
- input_file_path = File.expand_path(input_file.path)
30
+ input_file_path = input_file.is_a?(File) ? File.expand_path(input_file.path) : input_file
31
31
  base_filename = input_file_path.split("/").last
32
32
  dest_file = ["input/#{hdfs_path}", base_filename].join("/")
33
33
  run_command "mandy-put #{input_file_path} #{dest_file}"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mandy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Kent