wukong 1.4.6 → 1.4.7

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.textile CHANGED
@@ -1,3 +1,15 @@
1
+ h2. Wukong v1.4.7 2010-03-05
2
+
3
+ Lots more examples:
4
+ * examples/stats/avg_value_frequency.rb does an Average Value Frequency histogram
5
+ * examples/server_logs has a quite useful apache log file parser
6
+ * Made the base streamer use each_record, opening the door for alternative record injection (eg Datamapper!)
7
+ * wukong/streamer/counting_reducer.rb is an um reducer and it counts things.
8
+
9
+ h2. Wukong v1.4.6 2010-01-26
10
+
11
+ * A HELLA AWESOME working example from retail web analytics by @lenbust
12
+
1
13
  h2. Wukong v1.4.5 2010-01-18
2
14
 
3
15
  * In @--run=local@ mode, you can use '-' alone as a filename to indicate STDIN / STDOUT as input/output respectively.
data/README.textile CHANGED
@@ -220,6 +220,7 @@ Patches submitted by:
220
220
  * ruby interpreter path fix by "Yuichiro MASUI":http://github.com/masuidrive - masui at masuidrive.jp - http://blog.masuidrive.jp/
221
221
 
222
222
  Thanks to:
223
+ * "Fredrik Möllerstrand (@lenbust)":http://twitter.com/lenbust for the examples/contrib/jeans working example
223
224
  * "Brad Heintz":http://www.bradheintz.com/no1thing/talks/ for his early feedback
224
225
  * "Phil Ripperger":http://blog.pdatasolutions.com for his "wukong in the Amazon AWS cloud":http://blog.pdatasolutions.com/post/191978092/ruby-on-hadoop-quickstart tutorial.
225
226
 
@@ -1,22 +1,45 @@
1
1
  #!/usr/bin/env ruby
2
2
  $: << File.dirname(__FILE__)+'/../lib'
3
+ require 'rubygems'
3
4
  require 'wukong'
4
5
 
6
+ MONTHS = {
7
+ 'Jan' => '01',
8
+ 'Feb' => '02',
9
+ 'Mar' => '03',
10
+ 'Apr' => '04',
11
+ 'May' => '05',
12
+ 'Jun' => '06',
13
+ 'Jul' => '07',
14
+ 'Aug' => '08',
15
+ 'Sep' => '09',
16
+ 'Oct' => '10',
17
+ 'Nov' => '11',
18
+ 'Dec' => '12',
19
+ }
5
20
  module ApacheLogParser
6
21
  class Mapper < Wukong::Streamer::LineStreamer
7
22
 
8
- # regular expression for apache-style log lines
9
- # note that we strip out the google analytics listener.
10
- LOG_RE = %r{\A
11
- (\d+\.\d+\.\d+\.\d+) # IP addr
12
- \s([^\s]+)\s # -
13
- \s([^\s]+) # -
14
- \s\[(\d\d/\w+/\d+):(\d\d:\d\d:\d\d)([^\]]*)\] # [07/Jun/2008:20:37:11 +0000]
15
- \s(\d+) # 400
16
- \s"([^\"]*(?:\" \+ gaJsHost \+ \"[^\"]*)?)" # "GET /faq" + gaJsHost + "google-analytics.com/ga.js HTTP/1.1"
17
- \s(\d+) # 173
18
- \s"([^\"]*)" "([^\"]*)" "([^\"]*)" # "-" "-" "-"
19
- \z}x
23
+ #
24
+ # Regular expression to parse an apache log line.
25
+ #
26
+ # 83.240.154.3 - - [07/Jun/2008:20:37:11 +0000] "GET /faq HTTP/1.1" 200 569 "http://infochimps.org/search?query=CAC" "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.9.0.16) Gecko/2009120208 Firefox/3.0.16"
27
+ #
28
+ LOG_RE = Regexp.compile(%r{\A
29
+ (\S+) # ip 83.240.154.3
30
+ \s(\S+) # j1 -
31
+ \s(\S+) # j2 -
32
+ \s\[(\d+)/(\w+)/(\d+) # date part [07/Jun/2008
33
+ :(\d+):(\d+):(\d+) # time part :20:37:11
34
+ \s(\+.*)\] # timezone +0000]
35
+ \s\"(?:(\S+) # http_method "GET
36
+ \s(\S+) # path /faq
37
+ \s(\S+)|-)" # protocol HTTP/1.1"
38
+ \s(\d+) # response_code 200
39
+ \s(\d+) # duration 569
40
+ \s\"([^\"]*)\" # referer "http://infochimps.org/search?query=CAC"
41
+ \s\"([^\"]*)\" # ua "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.9.0.16) Gecko/2009120208 Firefox/3.0.16"
42
+ \z}x)
20
43
 
21
44
  # Use the regex to break line into fields
22
45
  # Emit each record as flat line
@@ -24,42 +47,28 @@ module ApacheLogParser
24
47
  line.chomp
25
48
  m = LOG_RE.match(line)
26
49
  if m
27
- ip, j1, j2, datepart, timepart, tzpart, resp, req, j3, ref, ua, j4 = m.captures
28
- req_date = DateTime.parse("#{datepart} #{timepart} #{tzpart}").to_flat
29
- req, method, path, protocol = parse_request(req)
30
- yield [:logline, method, path, protocol, ip, j1, j2, req_date, resp, req, j3, ref, ua, j4]
50
+ (ip, j1, j2,
51
+ ts_day, ts_mo, ts_year,
52
+ ts_hour, ts_min, ts_sec, req_tz,
53
+ http_method, path, protocol,
54
+ response_code, duration,
55
+ referer, ua, *cruft) = m.captures
56
+ # DateTime.parse("#{datepart} #{timepart}").to_flat # this takes way too long
57
+ req_date = [ts_year, MONTHS[ts_mo], ts_day].join("")
58
+ req_time = [ts_hour, ts_min, ts_sec].join("")
59
+ yield [:logline, ip, req_date, req_time, http_method, protocol, path, response_code, duration, referer, ua, req_tz]
31
60
  else
32
61
  yield [:unparseable, line]
33
62
  end
34
63
  end
35
64
 
36
-
37
- def parse_request req
38
- m = %r{\A(\w+) (.*) (\w+/[\w\.]+)\z}.match(req)
39
- if m
40
- [''] + m.captures
41
- else
42
- [req, '', '', '']
43
- end
44
- end
45
-
46
65
  end
66
+ end
47
67
 
68
+ Wukong::Script.new(ApacheLogParser::Mapper, nil, :sort_fields => 7).run
69
+
70
+ # 55.55.155.55 - - [04/Feb/2008:11:37:52 +0000] 301 "GET /robots.txt HTTP/1.1" 185 "-" "WebAlta Crawler/2.0 (http://www.webalta.net/ru/about_webmaster.html) (Windows; U; Windows NT 5.1; ru-RU)" "-"
48
71
 
49
- class Reducer < Wukong::Streamer::LineStreamer
50
- end
51
72
 
52
- # Execute the script
53
- class Script < Wukong::Script
54
- def reduce_command
55
- "/usr/bin/uniq"
56
- end
57
- def default_options
58
- super.merge :sort_fields => 8 # , :reduce_tasks => 0
59
- end
60
- end
61
73
 
62
- Script.new(Mapper,nil).run
63
- end
64
74
 
65
- # 55.55.155.55 - - [04/Feb/2008:11:37:52 +0000] 301 "GET /robots.txt HTTP/1.1" 185 "-" "WebAlta Crawler/2.0 (http://www.webalta.net/ru/about_webmaster.html) (Windows; U; Windows NT 5.1; ru-RU)" "-"
@@ -0,0 +1,36 @@
1
+ #
2
+ # Group all visitors, and then troll through all the pages they've visited
3
+ # breaking each into distinct visits (where more than an [hour|day|whatever]
4
+ # separate subsequent pageviews
5
+ #
6
+
7
+ #
8
+ # Mapper parses log files and created a visitor_id from the visitor's user_id,
9
+ # cookie or ip. It emits
10
+ #
11
+ # <visitor_id> <datetime> <url_path>
12
+ #
13
+ # where the partition key is visitor_id, and we sort by visitor_id and datetime.
14
+ #
15
+
16
+ #
17
+ # Reducer:
18
+ #
19
+ # The reducer is given all page requests for the given visitor id, sorted by
20
+ # timestamp.
21
+ #
22
+ # It group by visits (pageviews separated by more than DISTINCT_VISIT_TIMEGAP)
23
+ # and emits
24
+ #
25
+ # trail <visitor_id> <n_pages_in_visit> <duration> <timestamp> < page1,page2,... >
26
+ #
27
+ # where the last is a comma-separated string of URL encoded paths (any internal comma is converted to %2C).
28
+ #
29
+ # You can instead emit
30
+ #
31
+ # page_trails <page1> <n_pages_in_visit> <duration> <timestamp> < page1,page2,... >
32
+ # page_trails <page2> <n_pages_in_visit> <duration> <timestamp> < page1,page2,... >
33
+ # ....
34
+ # page_trails <pagen> <n_pages_in_visit> <duration> <timestamp> < page1,page2,... >
35
+ #
36
+ # to discover all trails passing through a given page.
@@ -0,0 +1,40 @@
1
+
2
+
3
+ # For later, if we want to parse user agents:
4
+ # http://code.google.com/p/browserscope/source/browse/trunk/models/user_agent.py
5
+ # http://www.useragentstring.com/pages/All/
6
+ # http://github.com/jaxn/parse-user-agent
7
+ # http://code.google.com/p/browserscope/wiki/UserAgentParsing
8
+ # http://code.google.com/p/ua-parser/source/browse/
9
+ # http://github.com/shenoudab/active_device/tree/master/lib/active_device/
10
+
11
+
12
+ #
13
+ # * Mozilla based
14
+ # * Mozilla version
15
+ # * X11 based
16
+ # * Security
17
+ # * OS
18
+ # * CPU family
19
+ # * Language Tag
20
+ # * Renderer (i.e. Webkit, Trident, Presto)
21
+ # * Renderer Version
22
+ # * I don't see a utility for the "KHTML" and "like Gecko" bits, but whatever.
23
+ # * Based on
24
+ # * Browser Build (not really sure about this either)
25
+
26
+ # * Browser Family (i.e. Firefox, IE, Chrome, etc..)
27
+ # * Project Name (optional, i.e. Namoroka, Shiretoko)
28
+ # * Major Version
29
+ # * Minor Version
30
+ # * Version Third Bit
31
+ # * Version Fourth Bit
32
+ # * Open Question: How should we handle the "alpha/beta" bit, like apre1? I'm inclined to say we put it in its own datapoint and let people group together how ever they want, but not leave it attached to any of the version bits.
33
+
34
+ # Bot
35
+ # Brand
36
+ # Browser
37
+ # Engine
38
+ # Handset
39
+ # Model
40
+ # OS
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env ruby
2
+ # run like so:
3
+ # $> ruby average_value_frequecy.rb --run=local data/stats.tsv data/avf_out.tsv
4
+ require 'rubygems'
5
+ require 'wukong'
6
+
7
+ #
8
+ # Calculate the average value frequency (AVF) for each data row. AVF for a data
9
+ # point with m attributes is defined as:
10
+ #
11
+ # avf = (1/m)* sum (frequencies of attributes 1..m)
12
+ #
13
+ # so with the data
14
+ #
15
+ # 1 15 30 25
16
+ # 2 10 10 20
17
+ # 3 50 30 30
18
+ #
19
+ # for the first row, avf = (1/3)*(1+2+1) ~= 1.33. An outlier is identified by
20
+ # a low AVF.
21
+ #
22
+ module AverageValueFrequency
23
+ # Names for each column's attribute, in order
24
+ ATTR_NAMES = %w[length width height]
25
+
26
+ class HistogramMapper < Wukong::Streamer::RecordStreamer
27
+ # unroll each row from
28
+ # [id, val1, val2, ....]
29
+ # into
30
+ # [attr1, val1]
31
+ # [attr2, val2]
32
+ # ...
33
+ def process id, *values
34
+ ATTR_NAMES.zip(values).each do |attr, val|
35
+ yield [attr, val]
36
+ end
37
+ end
38
+ end
39
+
40
+ #
41
+ # Build a histogram of values
42
+ #
43
+ class HistogramReducer < Wukong::Streamer::CountingReducer
44
+ # use the attr and val as the key
45
+ def get_key attr, val=nil, *_
46
+ [attr, val]
47
+ end
48
+ end
49
+
50
+ class AvfRecordMapper < Wukong::Streamer::RecordStreamer
51
+ # average the frequency of each value
52
+ def process id, *values
53
+ sum = 0.0
54
+ ATTR_NAMES.zip(values).each do |attr, val|
55
+ sum += histogram[ [attr, val] ].to_i
56
+ end
57
+ avf = sum / ATTR_NAMES.length.to_f
58
+ yield [id, avf, *values]
59
+ end
60
+
61
+ # Load the histogram from a tab-separated file with
62
+ # attr val freq
63
+ def histogram
64
+ return @histogram if @histogram
65
+ @histogram = { }
66
+ File.open(options[:histogram_file]).each do |line|
67
+ attr, val, freq = line.chomp.split("\t")
68
+ @histogram[ [attr, val] ] = freq
69
+ end
70
+ @histogram
71
+ end
72
+ end
73
+ end
74
+
75
+ Settings.use :commandline, :define
76
+ Settings.define :histogram, :description => "Run the first pass to calculate a histogram"
77
+ Settings.define :avf, :description => "Run the second pass, to run back over the records with the histogram and find the AVF for each row."
78
+ Settings.define :histogram_file, :description => "File to load the histogram from (supply name of the output file from first pass)"
79
+ Settings.resolve!
80
+ if Settings[:histogram]
81
+ Wukong::Script.new(AverageValueFrequency::HistogramMapper, AverageValueFrequency::HistogramReducer).run
82
+ elsif Settings[:avf]
83
+ Wukong::Script.new(AverageValueFrequency::AvfRecordMapper, nil).run
84
+ else
85
+ raise "Please specify either --histogram (for first round) or --avf (second round)"
86
+ end
@@ -0,0 +1,3 @@
1
+ 1 15 30 25
2
+ 2 10 10 20
3
+ 3 50 30 30
data/lib/wukong/script.rb CHANGED
@@ -121,6 +121,7 @@ module Wukong
121
121
  def initialize mapper_klass, reducer_klass, extra_options={}
122
122
  self.options = Settings.dup
123
123
  options.resolve!
124
+ options.merge! self.default_options
124
125
  options.merge! extra_options
125
126
  self.mapper_klass = mapper_klass
126
127
  self.reducer_klass = reducer_klass
@@ -27,6 +27,8 @@ module Wukong
27
27
  Settings.define :timeout, :jobconf => true, :description => 'mapred.task.timeout', :wukong => true
28
28
  Settings.define :reuse_jvms, :jobconf => true, :description => 'mapred.job.reuse.jvm.num.tasks', :wukong => true
29
29
  Settings.define :respect_exit_status, :jobconf => true, :description => 'stream.non.zero.exit.is.failure', :wukong => true
30
+ Settings.define :noempty, :description => "don't create zero-byte reduce files (hadoop mode only)", :wukong => true
31
+ # mapred.linerecordreader.maxlength :description => "Safeguards against corrupted data: lines longer than this (in bytes) are treated as bad records."
30
32
 
31
33
  # emit a -jobconf hadoop option if the simplified command line arg is present
32
34
  # if not, the resulting nil will be elided later
@@ -66,7 +68,8 @@ module Wukong
66
68
  end
67
69
 
68
70
  def hadoop_other_args
69
- extra_str_args = [ options[:extra_args] ]
71
+ extra_str_args = [ options[:extra_args] ]
72
+ extra_str_args += ' -lazyOutput' if options[:noempty] # don't create reduce file if no records
70
73
  options[:reuse_jvms] = '-1' if (options[:reuse_jvms] == true)
71
74
  options[:respect_exit_status] = 'false' if (options[:ignore_exit_status] == true)
72
75
  extra_hsh_args = [:map_speculative, :timeout, :reuse_jvms, :respect_exit_status].map{|opt| jobconf(opt) }
@@ -11,5 +11,6 @@ module Wukong
11
11
  autoload :AccumulatingReducer, 'wukong/streamer/accumulating_reducer'
12
12
  autoload :ListReducer, 'wukong/streamer/list_reducer'
13
13
  autoload :UniqByLastReducer, 'wukong/streamer/uniq_by_last_reducer'
14
+ autoload :CountingReducer, 'wukong/streamer/counting_reducer'
14
15
  end
15
16
  end
@@ -19,9 +19,8 @@ module Wukong
19
19
  def stream
20
20
  Log.info("Streaming on:\t%s" % [Script.input_file]) unless Script.input_file.blank?
21
21
  before_stream
22
- $stdin.each do |line|
23
- record = recordize(line.chomp)
24
- next unless record
22
+ each_record do |line|
23
+ record = recordize(line.chomp) or next
25
24
  process(*record) do |output_record|
26
25
  emit output_record
27
26
  end
@@ -29,6 +28,10 @@ module Wukong
29
28
  after_stream
30
29
  end
31
30
 
31
+ def each_record &block
32
+ $stdin.each(&block)
33
+ end
34
+
32
35
  # Called exactly once, before streaming begins
33
36
  def before_stream
34
37
  end
@@ -0,0 +1,25 @@
1
+ module Wukong
2
+ module Streamer
3
+
4
+ #
5
+ # Count the number of records for each key.
6
+ #
7
+ class CountingReducer < AccumulatingReducer
8
+ attr_accessor :count
9
+
10
+ # start the sum with 0 for each key
11
+ def start! *_
12
+ self.count = 0
13
+ end
14
+ # ... and count the number of records for this key
15
+ def accumulate *_
16
+ self.count += 1
17
+ end
18
+ # emit [key, count]
19
+ def finalize
20
+ yield [key, count].flatten
21
+ end
22
+ end
23
+
24
+ end
25
+ end
data/wukong.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{wukong}
8
- s.version = "1.4.6"
8
+ s.version = "1.4.7"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Philip (flip) Kromer"]
12
- s.date = %q{2010-01-26}
12
+ s.date = %q{2010-03-04}
13
13
  s.description = %q{ Treat your dataset like a:
14
14
 
15
15
  * stream of lines when it’s efficient to process by lines
@@ -109,7 +109,11 @@ Gem::Specification.new do |s|
109
109
  "examples/rank_and_bin.rb",
110
110
  "examples/run_all.sh",
111
111
  "examples/sample_records.rb",
112
+ "examples/server_logs/breadcrumbs.rb",
113
+ "examples/server_logs/user_agent.rb",
112
114
  "examples/size.rb",
115
+ "examples/stats/avg_value_frequency.rb",
116
+ "examples/stats/data/avg_value_frequency.tsv",
113
117
  "examples/word_count.rb",
114
118
  "lib/wukong.rb",
115
119
  "lib/wukong/bad_record.rb",
@@ -145,6 +149,7 @@ Gem::Specification.new do |s|
145
149
  "lib/wukong/streamer/base.rb",
146
150
  "lib/wukong/streamer/count_keys.rb",
147
151
  "lib/wukong/streamer/count_lines.rb",
152
+ "lib/wukong/streamer/counting_reducer.rb",
148
153
  "lib/wukong/streamer/filter.rb",
149
154
  "lib/wukong/streamer/line_streamer.rb",
150
155
  "lib/wukong/streamer/list_reducer.rb",
@@ -170,7 +175,7 @@ Gem::Specification.new do |s|
170
175
  s.homepage = %q{http://mrflip.github.com/wukong}
171
176
  s.rdoc_options = ["--charset=UTF-8"]
172
177
  s.require_paths = ["lib"]
173
- s.rubygems_version = %q{1.3.5}
178
+ s.rubygems_version = %q{1.3.6}
174
179
  s.summary = %q{Wukong makes Hadoop so easy a chimpanzee can use it.}
175
180
  s.test_files = [
176
181
  "spec/spec_helper.rb",
@@ -193,7 +198,10 @@ Gem::Specification.new do |s|
193
198
  "examples/pagerank/pagerank_initialize.rb",
194
199
  "examples/rank_and_bin.rb",
195
200
  "examples/sample_records.rb",
201
+ "examples/server_logs/breadcrumbs.rb",
202
+ "examples/server_logs/user_agent.rb",
196
203
  "examples/size.rb",
204
+ "examples/stats/avg_value_frequency.rb",
197
205
  "examples/word_count.rb"
198
206
  ]
199
207
 
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.6
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 4
8
+ - 7
9
+ version: 1.4.7
5
10
  platform: ruby
6
11
  authors:
7
12
  - Philip (flip) Kromer
@@ -9,39 +14,45 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2010-01-26 00:00:00 -06:00
17
+ date: 2010-03-04 00:00:00 -06:00
13
18
  default_executable:
14
19
  dependencies:
15
20
  - !ruby/object:Gem::Dependency
16
21
  name: addressable
17
- type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
20
24
  requirements:
21
25
  - - ">="
22
26
  - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
23
29
  version: "0"
24
- version:
30
+ type: :runtime
31
+ version_requirements: *id001
25
32
  - !ruby/object:Gem::Dependency
26
33
  name: extlib
27
- type: :runtime
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
34
+ prerelease: false
35
+ requirement: &id002 !ruby/object:Gem::Requirement
30
36
  requirements:
31
37
  - - ">="
32
38
  - !ruby/object:Gem::Version
39
+ segments:
40
+ - 0
33
41
  version: "0"
34
- version:
42
+ type: :runtime
43
+ version_requirements: *id002
35
44
  - !ruby/object:Gem::Dependency
36
45
  name: htmlentities
37
- type: :runtime
38
- version_requirement:
39
- version_requirements: !ruby/object:Gem::Requirement
46
+ prerelease: false
47
+ requirement: &id003 !ruby/object:Gem::Requirement
40
48
  requirements:
41
49
  - - ">="
42
50
  - !ruby/object:Gem::Version
51
+ segments:
52
+ - 0
43
53
  version: "0"
44
- version:
54
+ type: :runtime
55
+ version_requirements: *id003
45
56
  description: " Treat your dataset like a:\n\n * stream of lines when it\xE2\x80\x99s efficient to process by lines\n * stream of field arrays when it\xE2\x80\x99s efficient to deal directly with fields\n * stream of lightweight objects when it\xE2\x80\x99s efficient to deal with objects\n\n Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.\n"
46
57
  email: flip@infochimps.org
47
58
  executables:
@@ -141,7 +152,11 @@ files:
141
152
  - examples/rank_and_bin.rb
142
153
  - examples/run_all.sh
143
154
  - examples/sample_records.rb
155
+ - examples/server_logs/breadcrumbs.rb
156
+ - examples/server_logs/user_agent.rb
144
157
  - examples/size.rb
158
+ - examples/stats/avg_value_frequency.rb
159
+ - examples/stats/data/avg_value_frequency.tsv
145
160
  - examples/word_count.rb
146
161
  - lib/wukong.rb
147
162
  - lib/wukong/bad_record.rb
@@ -177,6 +192,7 @@ files:
177
192
  - lib/wukong/streamer/base.rb
178
193
  - lib/wukong/streamer/count_keys.rb
179
194
  - lib/wukong/streamer/count_lines.rb
195
+ - lib/wukong/streamer/counting_reducer.rb
180
196
  - lib/wukong/streamer/filter.rb
181
197
  - lib/wukong/streamer/line_streamer.rb
182
198
  - lib/wukong/streamer/list_reducer.rb
@@ -211,18 +227,20 @@ required_ruby_version: !ruby/object:Gem::Requirement
211
227
  requirements:
212
228
  - - ">="
213
229
  - !ruby/object:Gem::Version
230
+ segments:
231
+ - 0
214
232
  version: "0"
215
- version:
216
233
  required_rubygems_version: !ruby/object:Gem::Requirement
217
234
  requirements:
218
235
  - - ">="
219
236
  - !ruby/object:Gem::Version
237
+ segments:
238
+ - 0
220
239
  version: "0"
221
- version:
222
240
  requirements: []
223
241
 
224
242
  rubyforge_project:
225
- rubygems_version: 1.3.5
243
+ rubygems_version: 1.3.6
226
244
  signing_key:
227
245
  specification_version: 3
228
246
  summary: Wukong makes Hadoop so easy a chimpanzee can use it.
@@ -247,5 +265,8 @@ test_files:
247
265
  - examples/pagerank/pagerank_initialize.rb
248
266
  - examples/rank_and_bin.rb
249
267
  - examples/sample_records.rb
268
+ - examples/server_logs/breadcrumbs.rb
269
+ - examples/server_logs/user_agent.rb
250
270
  - examples/size.rb
271
+ - examples/stats/avg_value_frequency.rb
251
272
  - examples/word_count.rb