mandy 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/mandy-hadoop CHANGED
@@ -75,14 +75,15 @@ Mandy::Job.jobs.each_with_index do |job, i|
75
75
  output = File.join(output_folder, "#{i+1}-#{job.name.downcase.gsub(/\W/, '-')}")
76
76
 
77
77
  bootstrap_file = File.expand_path(File.join(File.dirname(__FILE__), '..', 'bootstrap.rb'))
78
+ inputreader = job.input_format == :xml ? "StreamXmlRecordReader,begin=<#{job.input_format_options[:xml_tag]} ,end=</#{job.input_format_options[:xml_tag]}>" : nil
78
79
 
79
80
  command = %($HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-*-streaming.jar #{jobconf}\
81
+ -files "#{payload}","#{bootstrap_file}" \
80
82
  -conf '#{config}' \
81
83
  -input "#{input}" \
84
+ #{ inputreader.nil? ? '' : "-inputreader \"#{inputreader}\"" } \
82
85
  -mapper "ruby bootstrap.rb #{File.basename(payload)} map #{filename} '#{job.name}'" \
83
86
  -reducer "ruby bootstrap.rb #{File.basename(payload)} reduce #{filename} '#{job.name}'" \
84
- -file "#{payload}" \
85
- -file "#{bootstrap_file}" \
86
87
  #{ cmdenv.nil? ? '' : "-cmdenv #{cmdenv}" }\
87
88
  -output "#{output}")
88
89
 
data/lib/job.rb CHANGED
@@ -12,7 +12,8 @@ module Mandy
12
12
 
13
13
  attr_reader :settings
14
14
  attr_reader :name
15
-
15
+ attr_reader :input_format_options
16
+
16
17
  def initialize(name, &blk)
17
18
  @name = name
18
19
  @settings = {}
@@ -25,11 +26,14 @@ module Mandy
25
26
  modules.each {|m| @modules << m}
26
27
  end
27
28
  alias_method :serialize, :mixin
28
-
29
- def input_format(format)
29
+
30
+ def input_format(format=nil, options={})
31
+ return @input_format if format.nil?
32
+
30
33
  @input_format = format
34
+ @input_format_options = options
31
35
  end
32
-
36
+
33
37
  def output_format(format)
34
38
  @output_format = format
35
39
  end
@@ -27,7 +27,7 @@ module Mandy
27
27
  def put_files_on_hdfs(hdfs_path, input_files)
28
28
  input_files = [input_files] unless input_files.is_a?(Array)
29
29
  input_files.each do |input_file|
30
- input_file_path = File.expand_path(input_file.path)
30
+ input_file_path = input_file.is_a?(File) ? File.expand_path(input_file.path) : input_file
31
31
  base_filename = input_file_path.split("/").last
32
32
  dest_file = ["input/#{hdfs_path}", base_filename].join("/")
33
33
  run_command "mandy-put #{input_file_path} #{dest_file}"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mandy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andy Kent