wukong 1.4.0 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +34 -7
- data/bin/cutc +1 -1
- data/bin/cuttab +1 -1
- data/bin/greptrue +1 -3
- data/bin/hdp-cat +1 -1
- data/bin/hdp-catd +1 -1
- data/bin/hdp-du +11 -6
- data/bin/hdp-get +1 -1
- data/bin/hdp-kill +1 -1
- data/bin/hdp-ls +1 -1
- data/bin/hdp-mkdir +1 -1
- data/bin/hdp-mv +1 -1
- data/bin/hdp-ps +1 -1
- data/bin/hdp-put +1 -1
- data/bin/hdp-rm +1 -1
- data/bin/hdp-sort +39 -19
- data/bin/hdp-stream +39 -19
- data/bin/hdp-stream-flat +9 -5
- data/bin/hdp-stream2 +39 -0
- data/bin/tabchar +1 -1
- data/bin/wu-date +13 -0
- data/bin/wu-datetime +13 -0
- data/bin/wu-plus +9 -0
- data/docpages/INSTALL.textile +0 -2
- data/docpages/index.textile +4 -2
- data/examples/apache_log_parser.rb +26 -14
- data/examples/graph/gen_symmetric_links.rb +10 -0
- data/examples/sample_records.rb +6 -8
- data/lib/wukong/datatypes/enum.rb +2 -2
- data/lib/wukong/dfs.rb +10 -9
- data/lib/wukong/encoding.rb +22 -4
- data/lib/wukong/extensions/emittable.rb +1 -1
- data/lib/wukong/extensions/hash_keys.rb +16 -0
- data/lib/wukong/extensions/hash_like.rb +17 -0
- data/lib/wukong/models/graph.rb +18 -20
- data/lib/wukong/schema.rb +13 -11
- data/lib/wukong/script.rb +26 -8
- data/lib/wukong/script/hadoop_command.rb +108 -2
- data/lib/wukong/streamer.rb +2 -0
- data/lib/wukong/streamer/base.rb +1 -0
- data/lib/wukong/streamer/record_streamer.rb +14 -0
- data/lib/wukong/streamer/struct_streamer.rb +2 -2
- data/spec/data/a_atsigns_b.tsv +64 -0
- data/spec/data/a_follows_b.tsv +53 -0
- data/spec/data/tweet.tsv +167 -0
- data/spec/data/twitter_user.tsv +55 -0
- data/wukong.gemspec +13 -3
- metadata +13 -23
data/docpages/INSTALL.textile
CHANGED
@@ -27,8 +27,6 @@ pre. $ sudo gem install {{ site.gemname }} --source=http://gemcutter.org
|
|
27
27
|
|
28
28
|
You can instead download this project in either "zip":http://github.com/mrflip/{{ site.gemname }}/zipball/master or "tar":http://github.com/mrflip/{{ site.gemname }}/tarball/master formats.
|
29
29
|
|
30
|
-
<notextile></div><div class="toggle"></notextile>
|
31
|
-
|
32
30
|
h3. Get the Dependencies
|
33
31
|
|
34
32
|
* Hadoop
|
data/docpages/index.textile
CHANGED
@@ -71,7 +71,7 @@ h2. Documentation index
|
|
71
71
|
|
72
72
|
h2. Credits
|
73
73
|
|
74
|
-
Wukong was written by "Philip (flip) Kromer":http://mrflip.com (flip@infochimps.org) for the "infochimps project":http://infochimps.org
|
74
|
+
Wukong was written by "Philip (flip) Kromer":http://mrflip.com (flip@infochimps.org / "@mrflip":http://twitter.com/mrflip) for the "infochimps project":http://infochimps.org
|
75
75
|
|
76
76
|
Patches submitted by:
|
77
77
|
* gemified by Ben Woosley (ben.woosley@gmail.com)
|
@@ -81,10 +81,12 @@ Thanks to:
|
|
81
81
|
* "Brad Heintz":http://www.bradheintz.com/no1thing/talks/ for his early feedback
|
82
82
|
* "Phil Ripperger":http://blog.pdatasolutions.com for his "wukong in the Amazon AWS cloud":http://blog.pdatasolutions.com/post/191978092/ruby-on-hadoop-quickstart tutorial.
|
83
83
|
|
84
|
-
<notextile><div class="toggle"></notextile>
|
84
|
+
<notextile></div><div class="toggle"></notextile>
|
85
85
|
|
86
86
|
h2. Help!
|
87
87
|
|
88
88
|
Send Wukong questions to the "Infinite Monkeywrench mailing list":http://groups.google.com/group/infochimps-code
|
89
89
|
|
90
90
|
<notextile></div></notextile>
|
91
|
+
|
92
|
+
{% include news.html %}
|
@@ -5,21 +5,21 @@ require 'wukong'
|
|
5
5
|
module ApacheLogParser
|
6
6
|
class Mapper < Wukong::Streamer::LineStreamer
|
7
7
|
|
8
|
+
# regular expression for apache-style log lines
|
9
|
+
# note that we strip out the google analytics listener.
|
10
|
+
LOG_RE = %r{\A
|
11
|
+
(\d+\.\d+\.\d+\.\d+) # IP addr
|
12
|
+
\s([^\s]+)\s # -
|
13
|
+
\s([^\s]+) # -
|
14
|
+
\s\[(\d\d/\w+/\d+):(\d\d:\d\d:\d\d)([^\]]*)\] # [07/Jun/2008:20:37:11 +0000]
|
15
|
+
\s(\d+) # 400
|
16
|
+
\s"([^\"]*(?:\" \+ gaJsHost \+ \"[^\"]*)?)" # "GET /faq" + gaJsHost + "google-analytics.com/ga.js HTTP/1.1"
|
17
|
+
\s(\d+) # 173
|
18
|
+
\s"([^\"]*)" "([^\"]*)" "([^\"]*)" # "-" "-" "-"
|
19
|
+
\z}x
|
8
20
|
|
9
|
-
|
10
|
-
|
11
|
-
if m
|
12
|
-
[''] + m.captures
|
13
|
-
else
|
14
|
-
[req, '', '', '']
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
|
19
|
-
# regular expression to match on apache-style log lines
|
20
|
-
# IP addr - - [07/Jun/2008:20:37:11 +0000] 400 "GET /faq" + gaJsHost + "google-analytics.com/ga.js HTTP/1.1" 173 "-" "-" "-"
|
21
|
-
LOG_RE = %r{\A(\d+\.\d+\.\d+\.\d+) ([^\s]+) ([^\s]+) \[(\d\d/\w+/\d+):(\d\d:\d\d:\d\d)([^\]]*)\] (\d+) "([^\"]*(?:\" \+ gaJsHost \+ \"[^\"]*)?)" (\d+) "([^\"]*)" "([^\"]*)" "([^\"]*)"\z}
|
22
|
-
|
21
|
+
# Use the regex to break line into fields
|
22
|
+
# Emit each record as flat line
|
23
23
|
def process line
|
24
24
|
line.chomp
|
25
25
|
m = LOG_RE.match(line)
|
@@ -32,8 +32,20 @@ module ApacheLogParser
|
|
32
32
|
yield [:unparseable, line]
|
33
33
|
end
|
34
34
|
end
|
35
|
+
|
36
|
+
|
37
|
+
def parse_request req
|
38
|
+
m = %r{\A(\w+) (.*) (\w+/[\w\.]+)\z}.match(req)
|
39
|
+
if m
|
40
|
+
[''] + m.captures
|
41
|
+
else
|
42
|
+
[req, '', '', '']
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
35
46
|
end
|
36
47
|
|
48
|
+
|
37
49
|
class Reducer < Wukong::Streamer::LineStreamer
|
38
50
|
end
|
39
51
|
|
@@ -17,6 +17,16 @@ module Wukong::Streamer
|
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
+
#
|
21
|
+
# Find symmetric links
|
22
|
+
#
|
23
|
+
# Takes adjacency list for a directed graph and emits only edges where
|
24
|
+
# A->B and B->A
|
25
|
+
#
|
26
|
+
# The output will list each such symmetric edge exactly once as
|
27
|
+
# a_symmetric_b node1 node2
|
28
|
+
# where node1 is lexicographically less than node2.
|
29
|
+
#
|
20
30
|
module FindSymmetricLinks
|
21
31
|
|
22
32
|
class Mapper < Wukong::Streamer::EdgeStreamer
|
data/examples/sample_records.rb
CHANGED
@@ -8,7 +8,7 @@ require 'wukong'
|
|
8
8
|
# Set the sampling fraction at the command line using the
|
9
9
|
# --sampling_fraction=
|
10
10
|
# option: for example, to take a random 1/1000th of the lines in huge_files,
|
11
|
-
# ./examples/sample_records.rb --sampling_fraction=0.001 --
|
11
|
+
# ./examples/sample_records.rb --sampling_fraction=0.001 --run huge_files sampled_files
|
12
12
|
#
|
13
13
|
class Mapper < Wukong::Streamer::LineStreamer
|
14
14
|
include Wukong::Streamer::Filter
|
@@ -32,13 +32,11 @@ class Mapper < Wukong::Streamer::LineStreamer
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
class Script < Wukong::Script
|
36
|
-
def default_options
|
37
|
-
super.merge :reduce_tasks => 0
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
35
|
#
|
42
36
|
# Executes the script
|
43
37
|
#
|
44
|
-
Script.new( Mapper,
|
38
|
+
Wukong::Script.new( Mapper,
|
39
|
+
nil,
|
40
|
+
:reduce_tasks => 0,
|
41
|
+
:reuse_jvms => true
|
42
|
+
).run
|
@@ -76,7 +76,7 @@ module Wukong
|
|
76
76
|
# Note that bin 0 is
|
77
77
|
#
|
78
78
|
class Binned < Enum
|
79
|
-
|
79
|
+
class_inheritable_accessor :bins, :empty_bin_name
|
80
80
|
@@empty_bin_name = '(none)'
|
81
81
|
|
82
82
|
def bins
|
@@ -95,7 +95,7 @@ module Wukong
|
|
95
95
|
|
96
96
|
def self.enumerates *bins
|
97
97
|
options = bins.extract_options!
|
98
|
-
|
98
|
+
self.bins = bins
|
99
99
|
last_top = bins.shift
|
100
100
|
# bins.unshift bins.first if last_top == -Infinity
|
101
101
|
names = bins.map do |bin_top|
|
data/lib/wukong/dfs.rb
CHANGED
@@ -22,15 +22,16 @@ module Wukong
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
25
|
+
HFile = TypedStruct.new(
|
26
|
+
[:mode_str, String],
|
27
|
+
[:i_count, String],
|
28
|
+
[:owner, String],
|
29
|
+
[:group, String],
|
30
|
+
[:size, Integer],
|
31
|
+
[:date, Bignum],
|
32
|
+
[:path, String]
|
33
|
+
)
|
34
|
+
HFile.class_eval do
|
34
35
|
def self.new_from_ls ls_line
|
35
36
|
mode, ic, o, g, sz, dt, tm, path = ls_line.chomp.split(/\s+/)
|
36
37
|
date = Time.parse("#{dt} #{tm}").utc.to_flat
|
data/lib/wukong/encoding.rb
CHANGED
@@ -1,6 +1,23 @@
|
|
1
1
|
require 'htmlentities'
|
2
2
|
require 'addressable/uri'
|
3
3
|
|
4
|
+
# Fix a bug (?) in the HTMLEntities encoder class with $KCODE='NONE'
|
5
|
+
HTMLEntities::Encoder.class_eval do
|
6
|
+
private
|
7
|
+
def extended_entity_regexp
|
8
|
+
@extended_entity_regexp ||= (
|
9
|
+
if encoding_aware?
|
10
|
+
regexp = '[^\u{20}-\u{7E}]'
|
11
|
+
else
|
12
|
+
# regexp = '[^\x20-\x7E]'
|
13
|
+
regexp = '[\x00-\x1f]|[\xc0-\xfd][\x80-\xbf]+'
|
14
|
+
end
|
15
|
+
regexp += "|'" if @flavor == 'html4'
|
16
|
+
Regexp.new(regexp)
|
17
|
+
)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
4
21
|
module Wukong
|
5
22
|
#
|
6
23
|
# By default (or explicitly with the :xml strategy), convert string to
|
@@ -24,7 +41,9 @@ module Wukong
|
|
24
41
|
#
|
25
42
|
# Wukong.decode_str(Wukong.encode_str(str)) returns the original str
|
26
43
|
#
|
27
|
-
#
|
44
|
+
# If you're seeing bad_encoding errors, try
|
45
|
+
# $KCODE='u' unless "1.9".respond_to?(:encoding)
|
46
|
+
# at the start of your script.
|
28
47
|
#
|
29
48
|
def self.encode_str str, strategy=:xml
|
30
49
|
begin
|
@@ -34,8 +53,7 @@ module Wukong
|
|
34
53
|
else raise "Don't know how to encode with strategy #{strategy}"
|
35
54
|
end
|
36
55
|
rescue ArgumentError => e
|
37
|
-
str.gsub
|
38
|
-
'!!bad_encoding!! ' + str
|
56
|
+
'!bad_encoding!! ' + str.gsub(/[^\w\s\.\-@#%]+/, '')
|
39
57
|
end
|
40
58
|
end
|
41
59
|
# HTMLEntities encoder instance
|
@@ -89,7 +107,7 @@ String.class_eval do
|
|
89
107
|
replace self.wukong_decode(*args)
|
90
108
|
end
|
91
109
|
|
92
|
-
def wukong_decode
|
110
|
+
def wukong_decode(*args)
|
93
111
|
Wukong.decode_str(self, *args)
|
94
112
|
end
|
95
113
|
|
@@ -18,7 +18,7 @@ Struct.class_eval do
|
|
18
18
|
#
|
19
19
|
# Flatten for packing as resource name followed by all fields
|
20
20
|
#
|
21
|
-
def to_flat include_key=
|
21
|
+
def to_flat include_key=false
|
22
22
|
if include_key.is_a? Proc
|
23
23
|
sort_key = include_key.call(self)
|
24
24
|
elsif (! include_key.blank?) && respond_to?(:key)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
class Hash
|
2
|
+
|
3
|
+
# Return a new hash with all keys converted to symbols.
|
4
|
+
def symbolize_keys
|
5
|
+
inject({}) do |options, (key, value)|
|
6
|
+
options[(key.to_sym rescue key) || key] = value
|
7
|
+
options
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
# Destructively convert all keys to symbols.
|
12
|
+
def symbolize_keys!
|
13
|
+
self.replace(self.symbolize_keys)
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -114,6 +114,23 @@ module Wukong
|
|
114
114
|
extend ClassMethods
|
115
115
|
end
|
116
116
|
end
|
117
|
+
|
118
|
+
def coerce_attr attr, coerce_blank_to_nil=false, &block
|
119
|
+
orig_val = self.send(attr)
|
120
|
+
new_val = (coerce_blank_to_nil && orig_val.blank?) ? nil : block.call(orig_val)
|
121
|
+
self.send("#{attr}=", new_val)
|
122
|
+
end
|
123
|
+
|
124
|
+
def coerce_to_int! attr, *args
|
125
|
+
coerce_attr(attr, *args) do |val|
|
126
|
+
val.to_i
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def coerce_to_date! attr, *args
|
131
|
+
coerce_attr(attr, *args){|val| val.is_a?(DateTime) ? val : DateTime.parse(val) rescue nil }
|
132
|
+
end
|
133
|
+
|
117
134
|
end
|
118
135
|
|
119
136
|
end
|
data/lib/wukong/models/graph.rb
CHANGED
@@ -1,27 +1,25 @@
|
|
1
1
|
|
2
2
|
module Wukong
|
3
3
|
module Models
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
end
|
4
|
+
Edge = TypedStruct.new(
|
5
|
+
[:src, Integer],
|
6
|
+
[:dest, Integer]
|
7
|
+
)
|
9
8
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
end
|
9
|
+
MultiEdge = TypedStruct.new(
|
10
|
+
[:src, Integer],
|
11
|
+
[:dest, Integer],
|
12
|
+
[:a_follows_b, Integer],
|
13
|
+
[:b_follows_a, Integer],
|
14
|
+
[:a_replies_b, Integer],
|
15
|
+
[:b_replies_a, Integer],
|
16
|
+
[:a_atsigns_b, Integer],
|
17
|
+
[:b_atsigns_a, Integer],
|
18
|
+
[:a_retweets_b, Integer],
|
19
|
+
[:b_retweets_a, Integer],
|
20
|
+
[:a_favorites_b, Integer],
|
21
|
+
[:b_favorites_a, Integer]
|
22
|
+
)
|
25
23
|
|
26
24
|
end
|
27
25
|
end
|
data/lib/wukong/schema.rb
CHANGED
@@ -106,11 +106,11 @@ module Wukong
|
|
106
106
|
# should, follow with an immediate GENERATE to ditch that field.)
|
107
107
|
#
|
108
108
|
def pig_load filename=nil
|
109
|
-
filename ||=
|
109
|
+
filename ||= resource_name.to_s+'.tsv'
|
110
110
|
cmd = [
|
111
|
-
"%-23s" %
|
112
|
-
"= LOAD
|
113
|
-
"AS ( rsrc:chararray,", self.to_pig, ')',
|
111
|
+
"%-23s" % self.to_s.gsub(/^.*\W/, ""),
|
112
|
+
"= LOAD '#{filename}'",
|
113
|
+
"AS ( rsrc:chararray,", self.to_pig, ') ;',
|
114
114
|
].join(" ")
|
115
115
|
end
|
116
116
|
|
@@ -125,7 +125,7 @@ module Wukong
|
|
125
125
|
sql_str = []
|
126
126
|
members.zip(mtypes).each do |attr, type|
|
127
127
|
type_str = type.respond_to?(:to_sql) ? type.to_sql : type.to_s.upcase
|
128
|
-
sql_str << " %-
|
128
|
+
sql_str << " %-29s\t%s" %["`#{attr}`", type_str]
|
129
129
|
end
|
130
130
|
sql_str.join(",\n")
|
131
131
|
end
|
@@ -184,13 +184,15 @@ module Wukong
|
|
184
184
|
# different objects jumbled together, you can just dump in the whole file,
|
185
185
|
# landing each object in its correct table.
|
186
186
|
#
|
187
|
-
def sql_load_mysql
|
187
|
+
def sql_load_mysql(filename=nil)
|
188
|
+
filename ||= ":resource_name.tsv"
|
189
|
+
filename.gsub!(/:resource_name/, self.table_name)
|
188
190
|
str = []
|
189
191
|
# disable indexing during bulk load
|
190
|
-
str << %Q{ALTER TABLE `#{self.
|
192
|
+
str << %Q{ALTER TABLE `#{self.table_name}` DISABLE KEYS; }
|
191
193
|
# Bulk load the tab-separated-values file.
|
192
|
-
str << %Q{LOAD DATA LOCAL INFILE '#{
|
193
|
-
str << %Q{ REPLACE INTO TABLE `#{self.
|
194
|
+
str << %Q{LOAD DATA LOCAL INFILE '#{filename}'}
|
195
|
+
str << %Q{ REPLACE INTO TABLE `#{self.table_name}` }
|
194
196
|
str << %Q{ COLUMNS }
|
195
197
|
str << %Q{ TERMINATED BY '\\t' }
|
196
198
|
str << %Q{ OPTIONALLY ENCLOSED BY '' }
|
@@ -200,9 +202,9 @@ module Wukong
|
|
200
202
|
str << ' '+self.sql_members
|
201
203
|
str << %Q{\n ); }
|
202
204
|
# Re-enable indexing
|
203
|
-
str << %Q{ALTER TABLE `#{self.
|
205
|
+
str << %Q{ALTER TABLE `#{self.table_name}` ENABLE KEYS ; }
|
204
206
|
# Show it loaded correctly
|
205
|
-
str << %Q{SELECT
|
207
|
+
str << %Q{SELECT NOW(), COUNT(*), '#{self.table_name}' FROM `#{self.table_name}`; }
|
206
208
|
str.join("\n")
|
207
209
|
end
|
208
210
|
|
data/lib/wukong/script.rb
CHANGED
@@ -124,12 +124,13 @@ module Wukong
|
|
124
124
|
#
|
125
125
|
def process_argv!
|
126
126
|
options[:all_args] = []
|
127
|
-
|
128
|
-
|
127
|
+
options[:rest] = []
|
128
|
+
args = ARGV.dup
|
129
|
+
while (! args.blank?) do
|
129
130
|
arg = args.shift
|
130
131
|
case
|
131
132
|
when arg == '--'
|
132
|
-
|
133
|
+
options[:rest] += args
|
133
134
|
when arg =~ /\A--(\w+)(?:=(.+))?\z/
|
134
135
|
opt, val = [$1, $2]
|
135
136
|
opt = opt.to_sym
|
@@ -137,11 +138,12 @@ module Wukong
|
|
137
138
|
self.options[opt] = val
|
138
139
|
options[:all_args] << arg unless std_options.include?(opt)
|
139
140
|
else
|
140
|
-
|
141
|
+
options[:all_args] << arg
|
142
|
+
options[:rest] << arg
|
141
143
|
end
|
144
|
+
# p [options, arg, args]
|
142
145
|
end
|
143
146
|
options[:all_args] = options[:all_args].join(" ")
|
144
|
-
options[:rest] = args
|
145
147
|
end
|
146
148
|
|
147
149
|
def this_script_filename
|
@@ -203,7 +205,7 @@ module Wukong
|
|
203
205
|
def input_output_paths
|
204
206
|
# input / output paths
|
205
207
|
input_path, output_path = options[:rest][0..1]
|
206
|
-
raise "You need to specify a parsed input directory and a directory for output. Got #{ARGV.inspect}" if (! options[:
|
208
|
+
raise "You need to specify a parsed input directory and a directory for output. Got #{ARGV.inspect}" if (! options[:dry_run]) && (input_path.blank? || output_path.blank?)
|
207
209
|
[input_path, output_path]
|
208
210
|
end
|
209
211
|
|
@@ -223,7 +225,7 @@ module Wukong
|
|
223
225
|
maybe_overwrite_output_paths! output_path
|
224
226
|
command = runner_command(input_path, output_path)
|
225
227
|
$stderr.puts command
|
226
|
-
|
228
|
+
unless options[:dry_run]
|
227
229
|
$stdout.puts `#{command}`
|
228
230
|
end
|
229
231
|
end
|
@@ -255,8 +257,24 @@ module Wukong
|
|
255
257
|
#{$0} --run=local input_hdfs_path output_hdfs_dir # run the script on local filesystem using unix pipes
|
256
258
|
#{$0} --run input_hdfs_path output_hdfs_dir # run the script with the mode given in config/wukong*.yaml
|
257
259
|
#{$0} --map
|
258
|
-
#{$0} --reduce
|
260
|
+
#{$0} --reduce # dispatch to the mapper or reducer
|
259
261
|
|
262
|
+
All flags must precede the input and output paths.
|
263
|
+
Additional flags:
|
264
|
+
--dry_run
|
265
|
+
Hadoop Options (see hadoop documentation)
|
266
|
+
--max_node_map_tasks => 'mapred.tasktracker.map.tasks.maximum',
|
267
|
+
--max_node_reduce_tasks => 'mapred.tasktracker.reduce.tasks.maximum',
|
268
|
+
--map_tasks => 'mapred.map.tasks',
|
269
|
+
--reduce_tasks => 'mapred.reduce.tasks',
|
270
|
+
--sort_fields => 'stream.num.map.output.key.fields',
|
271
|
+
--key_field_separator => 'map.output.key.field.separator',
|
272
|
+
--partition_fields => 'num.key.fields.for.partition',
|
273
|
+
--output_field_separator => 'stream.map.output.field.separator',
|
274
|
+
--map_speculative => 'mapred.map.tasks.speculative.execution',
|
275
|
+
--timeout => 'mapred.task.timeout',
|
276
|
+
--reuse_jvms => 'mapred.job.reuse.jvm.num.tasks',
|
277
|
+
--ignore_exit_status => 'stream.non.zero.exit.status.is.failure',
|
260
278
|
You can specify as well arbitrary script-specific command line flags; they are added to your options[] hash.
|
261
279
|
}
|
262
280
|
end
|