wukong 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.textile +107 -0
- data/README.textile +166 -0
- data/bin/cutc +30 -0
- data/bin/cuttab +5 -0
- data/bin/greptrue +8 -0
- data/bin/hdp-cat +3 -0
- data/bin/hdp-catd +3 -0
- data/bin/hdp-du +81 -0
- data/bin/hdp-get +3 -0
- data/bin/hdp-kill +3 -0
- data/bin/hdp-ls +10 -0
- data/bin/hdp-mkdir +3 -0
- data/bin/hdp-mv +3 -0
- data/bin/hdp-parts_to_keys.rb +77 -0
- data/bin/hdp-ps +3 -0
- data/bin/hdp-put +3 -0
- data/bin/hdp-rm +11 -0
- data/bin/hdp-sort +29 -0
- data/bin/hdp-stream +29 -0
- data/bin/hdp-stream-flat +18 -0
- data/bin/hdp-sync +17 -0
- data/bin/hdp-wc +67 -0
- data/bin/md5sort +20 -0
- data/bin/tabchar +5 -0
- data/bin/uniqc +3 -0
- data/bin/wu-hist +3 -0
- data/bin/wu-lign +177 -0
- data/bin/wu-sum +30 -0
- data/doc/INSTALL.textile +41 -0
- data/doc/LICENSE.textile +107 -0
- data/doc/README-tutorial.textile +163 -0
- data/doc/README-wulign.textile +59 -0
- data/doc/README-wutils.textile +128 -0
- data/doc/TODO.textile +61 -0
- data/doc/UsingWukong-part1-setup.textile +2 -0
- data/doc/UsingWukong-part2-scraping.textile +2 -0
- data/doc/UsingWukong-part3-parsing.textile +132 -0
- data/doc/code/api_response_example.txt +20 -0
- data/doc/code/parser_skeleton.rb +38 -0
- data/doc/hadoop-nfs.textile +51 -0
- data/doc/hadoop-setup.textile +29 -0
- data/doc/index.textile +124 -0
- data/doc/intro_to_map_reduce/MapReduceDiagram.graffle +0 -0
- data/doc/links.textile +42 -0
- data/doc/overview.textile +91 -0
- data/doc/pig/PigLatinExpressionsList.txt +122 -0
- data/doc/pig/PigLatinReferenceManual.html +19134 -0
- data/doc/pig/PigLatinReferenceManual.txt +1640 -0
- data/doc/tips.textile +116 -0
- data/doc/usage.textile +102 -0
- data/doc/utils.textile +48 -0
- data/examples/README.txt +17 -0
- data/examples/and_pig/sample_queries.rb +128 -0
- data/examples/apache_log_parser.rb +53 -0
- data/examples/count_keys.rb +56 -0
- data/examples/count_keys_at_mapper.rb +57 -0
- data/examples/graph/adjacency_list.rb +74 -0
- data/examples/graph/breadth_first_search.rb +79 -0
- data/examples/graph/gen_2paths.rb +68 -0
- data/examples/graph/gen_multi_edge.rb +103 -0
- data/examples/graph/gen_symmetric_links.rb +53 -0
- data/examples/package-local.rb +100 -0
- data/examples/package.rb +96 -0
- data/examples/pagerank/README.textile +6 -0
- data/examples/pagerank/gen_initial_pagerank_graph.pig +57 -0
- data/examples/pagerank/pagerank.rb +88 -0
- data/examples/pagerank/pagerank_initialize.rb +46 -0
- data/examples/pagerank/run_pagerank.sh +19 -0
- data/examples/rank_and_bin.rb +173 -0
- data/examples/run_all.sh +47 -0
- data/examples/sample_records.rb +44 -0
- data/examples/size.rb +60 -0
- data/examples/word_count.rb +95 -0
- data/lib/wukong.rb +11 -0
- data/lib/wukong/and_pig.rb +62 -0
- data/lib/wukong/and_pig/README.textile +12 -0
- data/lib/wukong/and_pig/as.rb +37 -0
- data/lib/wukong/and_pig/data_types.rb +30 -0
- data/lib/wukong/and_pig/functions.rb +50 -0
- data/lib/wukong/and_pig/generate.rb +85 -0
- data/lib/wukong/and_pig/generate/variable_inflections.rb +82 -0
- data/lib/wukong/and_pig/junk.rb +51 -0
- data/lib/wukong/and_pig/operators.rb +8 -0
- data/lib/wukong/and_pig/operators/compound.rb +29 -0
- data/lib/wukong/and_pig/operators/evaluators.rb +7 -0
- data/lib/wukong/and_pig/operators/execution.rb +15 -0
- data/lib/wukong/and_pig/operators/file_methods.rb +29 -0
- data/lib/wukong/and_pig/operators/foreach.rb +98 -0
- data/lib/wukong/and_pig/operators/groupies.rb +212 -0
- data/lib/wukong/and_pig/operators/load_store.rb +65 -0
- data/lib/wukong/and_pig/operators/meta.rb +42 -0
- data/lib/wukong/and_pig/operators/relational.rb +129 -0
- data/lib/wukong/and_pig/pig_struct.rb +48 -0
- data/lib/wukong/and_pig/pig_var.rb +95 -0
- data/lib/wukong/and_pig/symbol.rb +29 -0
- data/lib/wukong/and_pig/utils.rb +0 -0
- data/lib/wukong/bad_record.rb +18 -0
- data/lib/wukong/boot.rb +47 -0
- data/lib/wukong/datatypes.rb +24 -0
- data/lib/wukong/datatypes/enum.rb +123 -0
- data/lib/wukong/dfs.rb +80 -0
- data/lib/wukong/encoding.rb +111 -0
- data/lib/wukong/extensions.rb +15 -0
- data/lib/wukong/extensions/array.rb +18 -0
- data/lib/wukong/extensions/blank.rb +93 -0
- data/lib/wukong/extensions/class.rb +189 -0
- data/lib/wukong/extensions/date_time.rb +24 -0
- data/lib/wukong/extensions/emittable.rb +82 -0
- data/lib/wukong/extensions/hash.rb +120 -0
- data/lib/wukong/extensions/hash_like.rb +119 -0
- data/lib/wukong/extensions/hashlike_class.rb +47 -0
- data/lib/wukong/extensions/module.rb +2 -0
- data/lib/wukong/extensions/pathname.rb +27 -0
- data/lib/wukong/extensions/string.rb +65 -0
- data/lib/wukong/extensions/struct.rb +17 -0
- data/lib/wukong/extensions/symbol.rb +11 -0
- data/lib/wukong/logger.rb +53 -0
- data/lib/wukong/models/graph.rb +27 -0
- data/lib/wukong/rdf.rb +104 -0
- data/lib/wukong/schema.rb +37 -0
- data/lib/wukong/script.rb +265 -0
- data/lib/wukong/script/hadoop_command.rb +111 -0
- data/lib/wukong/script/local_command.rb +14 -0
- data/lib/wukong/streamer.rb +13 -0
- data/lib/wukong/streamer/accumulating_reducer.rb +89 -0
- data/lib/wukong/streamer/base.rb +76 -0
- data/lib/wukong/streamer/count_keys.rb +30 -0
- data/lib/wukong/streamer/count_lines.rb +26 -0
- data/lib/wukong/streamer/filter.rb +20 -0
- data/lib/wukong/streamer/line_streamer.rb +12 -0
- data/lib/wukong/streamer/list_reducer.rb +20 -0
- data/lib/wukong/streamer/preprocess_with_pipe_streamer.rb +22 -0
- data/lib/wukong/streamer/rank_and_bin_reducer.rb +145 -0
- data/lib/wukong/streamer/set_reducer.rb +14 -0
- data/lib/wukong/streamer/struct_streamer.rb +48 -0
- data/lib/wukong/streamer/summing_reducer.rb +29 -0
- data/lib/wukong/streamer/uniq_by_last_reducer.rb +44 -0
- data/lib/wukong/typed_struct.rb +12 -0
- data/lib/wukong/wukong_class.rb +21 -0
- data/spec/bin/hdp-wc_spec.rb +4 -0
- data/spec/spec_helper.rb +0 -0
- data/wukong.gemspec +179 -0
- metadata +214 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Streamer
|
3
|
+
|
4
|
+
#
|
5
|
+
# Bin and order a partitioned subset of keys
|
6
|
+
#
|
7
|
+
# For each record, appends a
|
8
|
+
#
|
9
|
+
# * numbering, from 0..(n-1). Each element gets a distinct numbering based on
|
10
|
+
# the order seen at the reducer; elements with identical keys might have
|
11
|
+
# different numbering on different runs.
|
12
|
+
#
|
13
|
+
# * rank, a number within 1..n giving the "place" of each value. Each element
|
14
|
+
# receives a successive (and thus unique) numbering, but all elements with
|
15
|
+
# the same key share the same rank. The first element for a given rank has
|
16
|
+
#
|
17
|
+
# (rank == numbering + 1)
|
18
|
+
#
|
19
|
+
# * bin, a number assigning keys by rank into a smaller number of groups. You
|
20
|
+
# must supply command line arguments
|
21
|
+
#
|
22
|
+
# --bins=[number] --total_count=[number]
|
23
|
+
#
|
24
|
+
# giving the number of groups and predicting in advance the total number of
|
25
|
+
# records. (Or override the bin assignment method to use your own damn
|
26
|
+
# strategy).
|
27
|
+
#
|
28
|
+
# If your data looked (in order) as follows, and 4 bins were requested:
|
29
|
+
#
|
30
|
+
# data: 1 1 1 2.3 7 69 79 79 80 81 81
|
31
|
+
# numbering: 0 1 2 3 4 5 6 7 8 9 10
|
32
|
+
# rank: 1 1 1 4 5 6 7 7 9 10 10
|
33
|
+
# 4-bin: 1 1 1 2 2 3 3 3 4 4 4
|
34
|
+
#
|
35
|
+
# If instead 100 bins were requested,
|
36
|
+
#
|
37
|
+
# data: 1 1 1 2.3 7 69 79 79 80 81 81
|
38
|
+
# numbering: 0 1 2 3 4 5 6 7 8 9 10
|
39
|
+
# rank: 1 1 1 4 5 6 7 7 9 10 10
|
40
|
+
# 100-bin: 1 1 1 28 37 46 55 55 73 82 91
|
41
|
+
#
|
42
|
+
# Note most of the bins are empty, and that the
|
43
|
+
#
|
44
|
+
# --------------------------------------------------------------------------
|
45
|
+
#
|
46
|
+
# Note that in this implementation each reducer numbers its own subset of
|
47
|
+
# elements from 1..total_count. If you want to number your whole dataset,
|
48
|
+
# you'll have to set @:reduce_tasks => 1@ in your Script's
|
49
|
+
# Script#default_options.
|
50
|
+
#
|
51
|
+
# You might feel a bit better about yourself if you can bin several fields
|
52
|
+
# (or subsets) at once. The :partition_fields option to Wukong::Script
|
53
|
+
# (which requests a KeyFieldBasedPartitioner) can be used to route different
|
54
|
+
# subsets to (possibly) distinct reducers.
|
55
|
+
#
|
56
|
+
# See the [examples/rank_and_bin_fields.rb] example script for an
|
57
|
+
# implementation of this. (And note the thing you have to do in case one
|
58
|
+
# reducer sees multiple partitions).
|
59
|
+
#
|
60
|
+
# It would surely be best to use a total sort and supply each reducer with the
|
61
|
+
# initial rank of its run.
|
62
|
+
#
|
63
|
+
class RankAndBinReducer < Wukong::Streamer::Base
|
64
|
+
attr_accessor :bin_size
|
65
|
+
def initialize options
|
66
|
+
super options
|
67
|
+
configure_bins! options
|
68
|
+
reset_order_params!
|
69
|
+
end
|
70
|
+
|
71
|
+
# ===========================================================================
|
72
|
+
#
|
73
|
+
# Order parameters (numbering, bin and rank)
|
74
|
+
#
|
75
|
+
|
76
|
+
#
|
77
|
+
# Key used to assign ranking -- elements with identical keys have identical
|
78
|
+
# rank.
|
79
|
+
#
|
80
|
+
def get_key *args
|
81
|
+
args.first
|
82
|
+
end
|
83
|
+
|
84
|
+
def reset_order_params!
|
85
|
+
@last_key = nil
|
86
|
+
@numbering = 0
|
87
|
+
@rank = 1
|
88
|
+
end
|
89
|
+
|
90
|
+
#
|
91
|
+
# The ranking is the "place" of each value: each element receives a
|
92
|
+
# successive (and thus unique) numbering, but all elements with the same key
|
93
|
+
# share the same rank. The first element for a given rank has
|
94
|
+
#
|
95
|
+
# (rank == numbering + 1)
|
96
|
+
#
|
97
|
+
def get_rank key
|
98
|
+
if @last_key != key
|
99
|
+
@rank = @numbering + 1
|
100
|
+
@last_key = key
|
101
|
+
end
|
102
|
+
@rank
|
103
|
+
end
|
104
|
+
|
105
|
+
#
|
106
|
+
# Set the bin from the current rank
|
107
|
+
# elements with identical keys land in identical bins.
|
108
|
+
#
|
109
|
+
def get_bin rank
|
110
|
+
((rank-0.5) / bin_size ).floor + 1
|
111
|
+
end
|
112
|
+
|
113
|
+
#
|
114
|
+
# Return the numbering, rank and bin for the given key
|
115
|
+
#
|
116
|
+
def get_order_params key
|
117
|
+
numbering = @numbering # use un-incremented value
|
118
|
+
rank = get_rank key
|
119
|
+
bin = get_bin rank
|
120
|
+
@numbering += 1
|
121
|
+
[numbering, rank, bin]
|
122
|
+
end
|
123
|
+
|
124
|
+
def configure_bins! options
|
125
|
+
case
|
126
|
+
when options[:bins]
|
127
|
+
total_count = options[:total_count].to_f
|
128
|
+
bins = options[:bins].to_i
|
129
|
+
unless total_count && (total_count != 0) then raise "To set the bin (%ile) size using --bins, we need to know the total count in advance. Please supply the total_count option." end
|
130
|
+
self.bin_size = (total_count / bins)
|
131
|
+
# $stderr.puts "Splitting %s records into %s bins of size %f. First element gets bin %d, last gets bin %d, median gets bin %d/%d" %
|
132
|
+
# [total_count, bins, bin_size, get_bin(1), get_bin(total_count), get_bin(((total_count+1)/2.0).floor), get_bin(((total_count+1)/2.0).ceil)]
|
133
|
+
else
|
134
|
+
raise "Please specify a number of --bins= and a --total_count= or your own strategy to bin the ranked items."
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def process *fields
|
139
|
+
numbering, rank, bin = get_order_params(get_key(*fields))
|
140
|
+
yield fields.to_flat + [numbering, rank, bin]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'wukong/streamer/list_reducer'
|
2
|
+
module Wukong
|
3
|
+
module Streamer
|
4
|
+
#
|
5
|
+
# Emit each unique key and the count of its occurrences
|
6
|
+
#
|
7
|
+
class SetReducer < Wukong::Streamer::ListReducer
|
8
|
+
# Begin with an empty set
|
9
|
+
def start! *args
|
10
|
+
self.values = Set.new
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Wukong
|
3
|
+
module Streamer
|
4
|
+
#
|
5
|
+
# Mix StructRecordizer into any streamer to make it accept a stream of
|
6
|
+
# objects -- the first field in each line is turned into a class and used to
|
7
|
+
# instantiate an object using the remaining fields on that line.
|
8
|
+
#
|
9
|
+
module StructRecordizer
|
10
|
+
|
11
|
+
#
|
12
|
+
# Turned the first field into a class name, then use the remaining fields
|
13
|
+
# on that line to instantiate the object to process.
|
14
|
+
#
|
15
|
+
def self.recordize rsrc, *fields
|
16
|
+
klass_name, suffix = rsrc.split('-', 2)
|
17
|
+
klass = Wukong.class_from_resource(klass_name) or return
|
18
|
+
# instantiate the class using the remaining fields on that line
|
19
|
+
begin
|
20
|
+
[ klass.new(*fields), suffix ]
|
21
|
+
rescue ArgumentError => e
|
22
|
+
warn "Couldn't instantiate: #{e} (#{[rsrc, fields].inspect})"
|
23
|
+
return
|
24
|
+
rescue Exception => e
|
25
|
+
raise [e, rsrc, fields].inspect
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
#
|
30
|
+
#
|
31
|
+
#
|
32
|
+
def recordize line
|
33
|
+
StructRecordizer.recordize *line.split("\t")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# Processes file as a stream of objects -- the first field in each line is
|
39
|
+
# turned into a class and used to instantiate an object using the remaining
|
40
|
+
# fields on that line.
|
41
|
+
#
|
42
|
+
# See [StructRecordizer] for more.
|
43
|
+
#
|
44
|
+
class StructStreamer < Wukong::Streamer::Base
|
45
|
+
include StructRecordizer
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Streamer
|
3
|
+
#
|
4
|
+
# Emit each unique key and the count of its occurrences
|
5
|
+
#
|
6
|
+
class SummingReducer < Wukong::Streamer::AccumulatingReducer
|
7
|
+
attr_accessor :summing_elements
|
8
|
+
attr_accessor :sums
|
9
|
+
|
10
|
+
# reset the counter to zero
|
11
|
+
def start! *args
|
12
|
+
self.sums = summing_elements.map{ 0 }
|
13
|
+
end
|
14
|
+
|
15
|
+
# record one more for this key
|
16
|
+
def accumulate *fields
|
17
|
+
vals = fields.values_at( *summing_elements )
|
18
|
+
vals.each_with_index{|val,idx| self.sums[idx] += val.to_i }
|
19
|
+
end
|
20
|
+
|
21
|
+
# emit each key field and the count, tab-separated.
|
22
|
+
def finalize
|
23
|
+
yield [key, sums].flatten
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Streamer
|
3
|
+
#
|
4
|
+
# Accumulate acts like an insecure high-school kid, for each key adopting in
|
5
|
+
# turn the latest value seen. It then emits the last (in sort order) value
|
6
|
+
# for that key.
|
7
|
+
#
|
8
|
+
# For example, to extract the *latest* value for each property, set hadoop
|
9
|
+
# to use <resource, item_id, timestamp> as sort fields and <resource,
|
10
|
+
# item_id> as key fields.
|
11
|
+
#
|
12
|
+
class UniqByLastReducer < Wukong::Streamer::AccumulatingReducer
|
13
|
+
attr_accessor :final_value
|
14
|
+
|
15
|
+
#
|
16
|
+
# Use first two fields as keys by default
|
17
|
+
#
|
18
|
+
def get_key *vals
|
19
|
+
vals[0..1]
|
20
|
+
end
|
21
|
+
|
22
|
+
#
|
23
|
+
# Adopt each value in turn: the last one's the one you want.
|
24
|
+
#
|
25
|
+
def accumulate *vals
|
26
|
+
self.final_value = vals
|
27
|
+
end
|
28
|
+
|
29
|
+
#
|
30
|
+
# Emit the last-seen value
|
31
|
+
#
|
32
|
+
def finalize
|
33
|
+
yield final_value if final_value
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# Clear state on reset
|
38
|
+
#
|
39
|
+
def start! *args
|
40
|
+
self.final_value = nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class TypedStruct < Struct
|
2
|
+
def self.new *members_types
|
3
|
+
members, mtypes = members_types.transpose
|
4
|
+
klass = Struct.new *members.map(&:to_sym)
|
5
|
+
klass.class_eval do
|
6
|
+
cattr_accessor :mtypes, :members_types
|
7
|
+
self.mtypes = mtypes
|
8
|
+
self.members_types = Hash.zip(members, mtypes)
|
9
|
+
end
|
10
|
+
klass
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# require 'active_support/core_ext/class/inheritable_attributes.rb'
|
2
|
+
require 'extlib/class'
|
3
|
+
|
4
|
+
module Wukong
|
5
|
+
#
|
6
|
+
# Use to instrument an actual class to behave
|
7
|
+
#
|
8
|
+
module WukongClass
|
9
|
+
|
10
|
+
|
11
|
+
def [](attr)
|
12
|
+
self.send attr
|
13
|
+
end
|
14
|
+
def []=(attr, val)
|
15
|
+
self.send("#{attr}=", val)
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
data/spec/spec_helper.rb
ADDED
File without changes
|
data/wukong.gemspec
ADDED
@@ -0,0 +1,179 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{wukong}
|
8
|
+
s.version = "0.1.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Philip (flip) Kromer"]
|
12
|
+
s.date = %q{2009-09-28}
|
13
|
+
s.description = %q{ Treat your dataset like a:
|
14
|
+
|
15
|
+
* stream of lines when it’s efficient to process by lines
|
16
|
+
* stream of field arrays when it’s efficient to deal directly with fields
|
17
|
+
* stream of lightweight objects when it’s efficient to deal with objects
|
18
|
+
|
19
|
+
Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.
|
20
|
+
}
|
21
|
+
s.email = %q{flip@infochimps.org}
|
22
|
+
s.executables = ["cutc", "cuttab", "greptrue", "hdp-cat", "hdp-catd", "hdp-du", "hdp-get", "hdp-kill", "hdp-ls", "hdp-mkdir", "hdp-mv", "hdp-parts_to_keys.rb", "hdp-ps", "hdp-put", "hdp-rm", "hdp-sort", "hdp-stream", "hdp-stream-flat", "hdp-sync", "hdp-wc", "md5sort", "tabchar", "uniqc", "wu-hist", "wu-lign", "wu-sum"]
|
23
|
+
s.extra_rdoc_files = [
|
24
|
+
"LICENSE.textile",
|
25
|
+
"README.textile"
|
26
|
+
]
|
27
|
+
s.files = [
|
28
|
+
"doc/INSTALL.textile",
|
29
|
+
"doc/LICENSE.textile",
|
30
|
+
"doc/README-tutorial.textile",
|
31
|
+
"doc/README-wulign.textile",
|
32
|
+
"doc/README-wutils.textile",
|
33
|
+
"doc/TODO.textile",
|
34
|
+
"doc/UsingWukong-part1-setup.textile",
|
35
|
+
"doc/UsingWukong-part2-scraping.textile",
|
36
|
+
"doc/UsingWukong-part3-parsing.textile",
|
37
|
+
"doc/code/api_response_example.txt",
|
38
|
+
"doc/code/parser_skeleton.rb",
|
39
|
+
"doc/hadoop-nfs.textile",
|
40
|
+
"doc/hadoop-setup.textile",
|
41
|
+
"doc/index.textile",
|
42
|
+
"doc/intro_to_map_reduce/MapReduceDiagram.graffle",
|
43
|
+
"doc/links.textile",
|
44
|
+
"doc/overview.textile",
|
45
|
+
"doc/pig/PigLatinExpressionsList.txt",
|
46
|
+
"doc/pig/PigLatinReferenceManual.html",
|
47
|
+
"doc/pig/PigLatinReferenceManual.txt",
|
48
|
+
"doc/tips.textile",
|
49
|
+
"doc/usage.textile",
|
50
|
+
"doc/utils.textile",
|
51
|
+
"examples/README.txt",
|
52
|
+
"examples/and_pig/sample_queries.rb",
|
53
|
+
"examples/apache_log_parser.rb",
|
54
|
+
"examples/count_keys.rb",
|
55
|
+
"examples/count_keys_at_mapper.rb",
|
56
|
+
"examples/graph/adjacency_list.rb",
|
57
|
+
"examples/graph/breadth_first_search.rb",
|
58
|
+
"examples/graph/gen_2paths.rb",
|
59
|
+
"examples/graph/gen_multi_edge.rb",
|
60
|
+
"examples/graph/gen_symmetric_links.rb",
|
61
|
+
"examples/package-local.rb",
|
62
|
+
"examples/package.rb",
|
63
|
+
"examples/pagerank/README.textile",
|
64
|
+
"examples/pagerank/gen_initial_pagerank_graph.pig",
|
65
|
+
"examples/pagerank/pagerank.rb",
|
66
|
+
"examples/pagerank/pagerank_initialize.rb",
|
67
|
+
"examples/pagerank/run_pagerank.sh",
|
68
|
+
"examples/rank_and_bin.rb",
|
69
|
+
"examples/run_all.sh",
|
70
|
+
"examples/sample_records.rb",
|
71
|
+
"examples/size.rb",
|
72
|
+
"examples/word_count.rb",
|
73
|
+
"lib/wukong.rb",
|
74
|
+
"lib/wukong/and_pig.rb",
|
75
|
+
"lib/wukong/and_pig/README.textile",
|
76
|
+
"lib/wukong/and_pig/as.rb",
|
77
|
+
"lib/wukong/and_pig/data_types.rb",
|
78
|
+
"lib/wukong/and_pig/functions.rb",
|
79
|
+
"lib/wukong/and_pig/generate.rb",
|
80
|
+
"lib/wukong/and_pig/generate/variable_inflections.rb",
|
81
|
+
"lib/wukong/and_pig/junk.rb",
|
82
|
+
"lib/wukong/and_pig/operators.rb",
|
83
|
+
"lib/wukong/and_pig/operators/compound.rb",
|
84
|
+
"lib/wukong/and_pig/operators/evaluators.rb",
|
85
|
+
"lib/wukong/and_pig/operators/execution.rb",
|
86
|
+
"lib/wukong/and_pig/operators/file_methods.rb",
|
87
|
+
"lib/wukong/and_pig/operators/foreach.rb",
|
88
|
+
"lib/wukong/and_pig/operators/groupies.rb",
|
89
|
+
"lib/wukong/and_pig/operators/load_store.rb",
|
90
|
+
"lib/wukong/and_pig/operators/meta.rb",
|
91
|
+
"lib/wukong/and_pig/operators/relational.rb",
|
92
|
+
"lib/wukong/and_pig/pig_struct.rb",
|
93
|
+
"lib/wukong/and_pig/pig_var.rb",
|
94
|
+
"lib/wukong/and_pig/symbol.rb",
|
95
|
+
"lib/wukong/and_pig/utils.rb",
|
96
|
+
"lib/wukong/bad_record.rb",
|
97
|
+
"lib/wukong/boot.rb",
|
98
|
+
"lib/wukong/datatypes.rb",
|
99
|
+
"lib/wukong/datatypes/enum.rb",
|
100
|
+
"lib/wukong/dfs.rb",
|
101
|
+
"lib/wukong/encoding.rb",
|
102
|
+
"lib/wukong/extensions.rb",
|
103
|
+
"lib/wukong/extensions/array.rb",
|
104
|
+
"lib/wukong/extensions/blank.rb",
|
105
|
+
"lib/wukong/extensions/class.rb",
|
106
|
+
"lib/wukong/extensions/date_time.rb",
|
107
|
+
"lib/wukong/extensions/emittable.rb",
|
108
|
+
"lib/wukong/extensions/hash.rb",
|
109
|
+
"lib/wukong/extensions/hash_like.rb",
|
110
|
+
"lib/wukong/extensions/hashlike_class.rb",
|
111
|
+
"lib/wukong/extensions/module.rb",
|
112
|
+
"lib/wukong/extensions/pathname.rb",
|
113
|
+
"lib/wukong/extensions/string.rb",
|
114
|
+
"lib/wukong/extensions/struct.rb",
|
115
|
+
"lib/wukong/extensions/symbol.rb",
|
116
|
+
"lib/wukong/logger.rb",
|
117
|
+
"lib/wukong/models/graph.rb",
|
118
|
+
"lib/wukong/rdf.rb",
|
119
|
+
"lib/wukong/schema.rb",
|
120
|
+
"lib/wukong/script.rb",
|
121
|
+
"lib/wukong/script/hadoop_command.rb",
|
122
|
+
"lib/wukong/script/local_command.rb",
|
123
|
+
"lib/wukong/streamer.rb",
|
124
|
+
"lib/wukong/streamer/accumulating_reducer.rb",
|
125
|
+
"lib/wukong/streamer/base.rb",
|
126
|
+
"lib/wukong/streamer/count_keys.rb",
|
127
|
+
"lib/wukong/streamer/count_lines.rb",
|
128
|
+
"lib/wukong/streamer/filter.rb",
|
129
|
+
"lib/wukong/streamer/line_streamer.rb",
|
130
|
+
"lib/wukong/streamer/list_reducer.rb",
|
131
|
+
"lib/wukong/streamer/preprocess_with_pipe_streamer.rb",
|
132
|
+
"lib/wukong/streamer/rank_and_bin_reducer.rb",
|
133
|
+
"lib/wukong/streamer/set_reducer.rb",
|
134
|
+
"lib/wukong/streamer/struct_streamer.rb",
|
135
|
+
"lib/wukong/streamer/summing_reducer.rb",
|
136
|
+
"lib/wukong/streamer/uniq_by_last_reducer.rb",
|
137
|
+
"lib/wukong/typed_struct.rb",
|
138
|
+
"lib/wukong/wukong_class.rb",
|
139
|
+
"spec/bin/hdp-wc_spec.rb",
|
140
|
+
"spec/spec_helper.rb",
|
141
|
+
"wukong.gemspec"
|
142
|
+
]
|
143
|
+
s.homepage = %q{http://github.com/mrflip/wukong}
|
144
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
145
|
+
s.require_paths = ["lib"]
|
146
|
+
s.rubygems_version = %q{1.3.5}
|
147
|
+
s.summary = %q{Wukong makes Hadoop so easy a chimpanzee can use it.}
|
148
|
+
s.test_files = [
|
149
|
+
"spec/bin/hdp-wc_spec.rb",
|
150
|
+
"spec/spec_helper.rb",
|
151
|
+
"examples/and_pig/sample_queries.rb",
|
152
|
+
"examples/apache_log_parser.rb",
|
153
|
+
"examples/count_keys.rb",
|
154
|
+
"examples/count_keys_at_mapper.rb",
|
155
|
+
"examples/graph/adjacency_list.rb",
|
156
|
+
"examples/graph/breadth_first_search.rb",
|
157
|
+
"examples/graph/gen_2paths.rb",
|
158
|
+
"examples/graph/gen_multi_edge.rb",
|
159
|
+
"examples/graph/gen_symmetric_links.rb",
|
160
|
+
"examples/package-local.rb",
|
161
|
+
"examples/package.rb",
|
162
|
+
"examples/pagerank/pagerank.rb",
|
163
|
+
"examples/pagerank/pagerank_initialize.rb",
|
164
|
+
"examples/rank_and_bin.rb",
|
165
|
+
"examples/sample_records.rb",
|
166
|
+
"examples/size.rb",
|
167
|
+
"examples/word_count.rb"
|
168
|
+
]
|
169
|
+
|
170
|
+
if s.respond_to? :specification_version then
|
171
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
172
|
+
s.specification_version = 3
|
173
|
+
|
174
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
175
|
+
else
|
176
|
+
end
|
177
|
+
else
|
178
|
+
end
|
179
|
+
end
|