wukong 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE.textile +107 -0
- data/README.textile +166 -0
- data/bin/cutc +30 -0
- data/bin/cuttab +5 -0
- data/bin/greptrue +8 -0
- data/bin/hdp-cat +3 -0
- data/bin/hdp-catd +3 -0
- data/bin/hdp-du +81 -0
- data/bin/hdp-get +3 -0
- data/bin/hdp-kill +3 -0
- data/bin/hdp-ls +10 -0
- data/bin/hdp-mkdir +3 -0
- data/bin/hdp-mv +3 -0
- data/bin/hdp-parts_to_keys.rb +77 -0
- data/bin/hdp-ps +3 -0
- data/bin/hdp-put +3 -0
- data/bin/hdp-rm +11 -0
- data/bin/hdp-sort +29 -0
- data/bin/hdp-stream +29 -0
- data/bin/hdp-stream-flat +18 -0
- data/bin/hdp-sync +17 -0
- data/bin/hdp-wc +67 -0
- data/bin/md5sort +20 -0
- data/bin/tabchar +5 -0
- data/bin/uniqc +3 -0
- data/bin/wu-hist +3 -0
- data/bin/wu-lign +177 -0
- data/bin/wu-sum +30 -0
- data/doc/INSTALL.textile +41 -0
- data/doc/LICENSE.textile +107 -0
- data/doc/README-tutorial.textile +163 -0
- data/doc/README-wulign.textile +59 -0
- data/doc/README-wutils.textile +128 -0
- data/doc/TODO.textile +61 -0
- data/doc/UsingWukong-part1-setup.textile +2 -0
- data/doc/UsingWukong-part2-scraping.textile +2 -0
- data/doc/UsingWukong-part3-parsing.textile +132 -0
- data/doc/code/api_response_example.txt +20 -0
- data/doc/code/parser_skeleton.rb +38 -0
- data/doc/hadoop-nfs.textile +51 -0
- data/doc/hadoop-setup.textile +29 -0
- data/doc/index.textile +124 -0
- data/doc/intro_to_map_reduce/MapReduceDiagram.graffle +0 -0
- data/doc/links.textile +42 -0
- data/doc/overview.textile +91 -0
- data/doc/pig/PigLatinExpressionsList.txt +122 -0
- data/doc/pig/PigLatinReferenceManual.html +19134 -0
- data/doc/pig/PigLatinReferenceManual.txt +1640 -0
- data/doc/tips.textile +116 -0
- data/doc/usage.textile +102 -0
- data/doc/utils.textile +48 -0
- data/examples/README.txt +17 -0
- data/examples/and_pig/sample_queries.rb +128 -0
- data/examples/apache_log_parser.rb +53 -0
- data/examples/count_keys.rb +56 -0
- data/examples/count_keys_at_mapper.rb +57 -0
- data/examples/graph/adjacency_list.rb +74 -0
- data/examples/graph/breadth_first_search.rb +79 -0
- data/examples/graph/gen_2paths.rb +68 -0
- data/examples/graph/gen_multi_edge.rb +103 -0
- data/examples/graph/gen_symmetric_links.rb +53 -0
- data/examples/package-local.rb +100 -0
- data/examples/package.rb +96 -0
- data/examples/pagerank/README.textile +6 -0
- data/examples/pagerank/gen_initial_pagerank_graph.pig +57 -0
- data/examples/pagerank/pagerank.rb +88 -0
- data/examples/pagerank/pagerank_initialize.rb +46 -0
- data/examples/pagerank/run_pagerank.sh +19 -0
- data/examples/rank_and_bin.rb +173 -0
- data/examples/run_all.sh +47 -0
- data/examples/sample_records.rb +44 -0
- data/examples/size.rb +60 -0
- data/examples/word_count.rb +95 -0
- data/lib/wukong.rb +11 -0
- data/lib/wukong/and_pig.rb +62 -0
- data/lib/wukong/and_pig/README.textile +12 -0
- data/lib/wukong/and_pig/as.rb +37 -0
- data/lib/wukong/and_pig/data_types.rb +30 -0
- data/lib/wukong/and_pig/functions.rb +50 -0
- data/lib/wukong/and_pig/generate.rb +85 -0
- data/lib/wukong/and_pig/generate/variable_inflections.rb +82 -0
- data/lib/wukong/and_pig/junk.rb +51 -0
- data/lib/wukong/and_pig/operators.rb +8 -0
- data/lib/wukong/and_pig/operators/compound.rb +29 -0
- data/lib/wukong/and_pig/operators/evaluators.rb +7 -0
- data/lib/wukong/and_pig/operators/execution.rb +15 -0
- data/lib/wukong/and_pig/operators/file_methods.rb +29 -0
- data/lib/wukong/and_pig/operators/foreach.rb +98 -0
- data/lib/wukong/and_pig/operators/groupies.rb +212 -0
- data/lib/wukong/and_pig/operators/load_store.rb +65 -0
- data/lib/wukong/and_pig/operators/meta.rb +42 -0
- data/lib/wukong/and_pig/operators/relational.rb +129 -0
- data/lib/wukong/and_pig/pig_struct.rb +48 -0
- data/lib/wukong/and_pig/pig_var.rb +95 -0
- data/lib/wukong/and_pig/symbol.rb +29 -0
- data/lib/wukong/and_pig/utils.rb +0 -0
- data/lib/wukong/bad_record.rb +18 -0
- data/lib/wukong/boot.rb +47 -0
- data/lib/wukong/datatypes.rb +24 -0
- data/lib/wukong/datatypes/enum.rb +123 -0
- data/lib/wukong/dfs.rb +80 -0
- data/lib/wukong/encoding.rb +111 -0
- data/lib/wukong/extensions.rb +15 -0
- data/lib/wukong/extensions/array.rb +18 -0
- data/lib/wukong/extensions/blank.rb +93 -0
- data/lib/wukong/extensions/class.rb +189 -0
- data/lib/wukong/extensions/date_time.rb +24 -0
- data/lib/wukong/extensions/emittable.rb +82 -0
- data/lib/wukong/extensions/hash.rb +120 -0
- data/lib/wukong/extensions/hash_like.rb +119 -0
- data/lib/wukong/extensions/hashlike_class.rb +47 -0
- data/lib/wukong/extensions/module.rb +2 -0
- data/lib/wukong/extensions/pathname.rb +27 -0
- data/lib/wukong/extensions/string.rb +65 -0
- data/lib/wukong/extensions/struct.rb +17 -0
- data/lib/wukong/extensions/symbol.rb +11 -0
- data/lib/wukong/logger.rb +53 -0
- data/lib/wukong/models/graph.rb +27 -0
- data/lib/wukong/rdf.rb +104 -0
- data/lib/wukong/schema.rb +37 -0
- data/lib/wukong/script.rb +265 -0
- data/lib/wukong/script/hadoop_command.rb +111 -0
- data/lib/wukong/script/local_command.rb +14 -0
- data/lib/wukong/streamer.rb +13 -0
- data/lib/wukong/streamer/accumulating_reducer.rb +89 -0
- data/lib/wukong/streamer/base.rb +76 -0
- data/lib/wukong/streamer/count_keys.rb +30 -0
- data/lib/wukong/streamer/count_lines.rb +26 -0
- data/lib/wukong/streamer/filter.rb +20 -0
- data/lib/wukong/streamer/line_streamer.rb +12 -0
- data/lib/wukong/streamer/list_reducer.rb +20 -0
- data/lib/wukong/streamer/preprocess_with_pipe_streamer.rb +22 -0
- data/lib/wukong/streamer/rank_and_bin_reducer.rb +145 -0
- data/lib/wukong/streamer/set_reducer.rb +14 -0
- data/lib/wukong/streamer/struct_streamer.rb +48 -0
- data/lib/wukong/streamer/summing_reducer.rb +29 -0
- data/lib/wukong/streamer/uniq_by_last_reducer.rb +44 -0
- data/lib/wukong/typed_struct.rb +12 -0
- data/lib/wukong/wukong_class.rb +21 -0
- data/spec/bin/hdp-wc_spec.rb +4 -0
- data/spec/spec_helper.rb +0 -0
- data/wukong.gemspec +179 -0
- metadata +214 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Streamer
|
3
|
+
|
4
|
+
#
|
5
|
+
# Bin and order a partitioned subset of keys
|
6
|
+
#
|
7
|
+
# For each record, appends a
|
8
|
+
#
|
9
|
+
# * numbering, from 0..(n-1). Each element gets a distinct numbering based on
|
10
|
+
# the order seen at the reducer; elements with identical keys might have
|
11
|
+
# different numbering on different runs.
|
12
|
+
#
|
13
|
+
# * rank, a number within 1..n giving the "place" of each value. Each element
|
14
|
+
# receives a successive (and thus unique) numbering, but all elements with
|
15
|
+
# the same key share the same rank. The first element for a given rank has
|
16
|
+
#
|
17
|
+
# (rank == numbering + 1)
|
18
|
+
#
|
19
|
+
# * bin, a number assigning keys by rank into a smaller number of groups. You
|
20
|
+
# must supply command line arguments
|
21
|
+
#
|
22
|
+
# --bins=[number] --total_count=[number]
|
23
|
+
#
|
24
|
+
# giving the number of groups and predicting in advance the total number of
|
25
|
+
# records. (Or override the bin assignment method to use your own damn
|
26
|
+
# strategy).
|
27
|
+
#
|
28
|
+
# If your data looked (in order) as follows, and 4 bins were requested:
|
29
|
+
#
|
30
|
+
# data: 1 1 1 2.3 7 69 79 79 80 81 81
|
31
|
+
# numbering: 0 1 2 3 4 5 6 7 8 9 10
|
32
|
+
# rank: 1 1 1 4 5 6 7 7 9 10 10
|
33
|
+
# 4-bin: 1 1 1 2 2 3 3 3 4 4 4
|
34
|
+
#
|
35
|
+
# If instead 100 bins were requested,
|
36
|
+
#
|
37
|
+
# data: 1 1 1 2.3 7 69 79 79 80 81 81
|
38
|
+
# numbering: 0 1 2 3 4 5 6 7 8 9 10
|
39
|
+
# rank: 1 1 1 4 5 6 7 7 9 10 10
|
40
|
+
# 100-bin: 1 1 1 28 37 46 55 55 73 82 91
|
41
|
+
#
|
42
|
+
# Note most of the bins are empty, and that the
|
43
|
+
#
|
44
|
+
# --------------------------------------------------------------------------
|
45
|
+
#
|
46
|
+
# Note that in this implementation each reducer numbers its own subset of
|
47
|
+
# elements from 1..total_count. If you want to number your whole dataset,
|
48
|
+
# you'll have to set @:reduce_tasks => 1@ in your Script's
|
49
|
+
# Script#default_options.
|
50
|
+
#
|
51
|
+
# You might feel a bit better about yourself if you can bin several fields
|
52
|
+
# (or subsets) at once. The :partition_fields option to Wukong::Script
|
53
|
+
# (which requests a KeyFieldBasedPartitioner) can be used to route different
|
54
|
+
# subsets to (possibly) distinct reducers.
|
55
|
+
#
|
56
|
+
# See the [examples/rank_and_bin_fields.rb] example script for an
|
57
|
+
# implementation of this. (And note the thing you have to do in case one
|
58
|
+
# reducer sees multiple partitions).
|
59
|
+
#
|
60
|
+
# It would surely be best to use a total sort and supply each reducer with the
|
61
|
+
# initial rank of its run.
|
62
|
+
#
|
63
|
+
class RankAndBinReducer < Wukong::Streamer::Base
|
64
|
+
attr_accessor :bin_size
|
65
|
+
def initialize options
|
66
|
+
super options
|
67
|
+
configure_bins! options
|
68
|
+
reset_order_params!
|
69
|
+
end
|
70
|
+
|
71
|
+
# ===========================================================================
|
72
|
+
#
|
73
|
+
# Order parameters (numbering, bin and rank)
|
74
|
+
#
|
75
|
+
|
76
|
+
#
|
77
|
+
# Key used to assign ranking -- elements with identical keys have identical
|
78
|
+
# rank.
|
79
|
+
#
|
80
|
+
def get_key *args
|
81
|
+
args.first
|
82
|
+
end
|
83
|
+
|
84
|
+
def reset_order_params!
|
85
|
+
@last_key = nil
|
86
|
+
@numbering = 0
|
87
|
+
@rank = 1
|
88
|
+
end
|
89
|
+
|
90
|
+
#
|
91
|
+
# The ranking is the "place" of each value: each element receives a
|
92
|
+
# successive (and thus unique) numbering, but all elements with the same key
|
93
|
+
# share the same rank. The first element for a given rank has
|
94
|
+
#
|
95
|
+
# (rank == numbering + 1)
|
96
|
+
#
|
97
|
+
def get_rank key
|
98
|
+
if @last_key != key
|
99
|
+
@rank = @numbering + 1
|
100
|
+
@last_key = key
|
101
|
+
end
|
102
|
+
@rank
|
103
|
+
end
|
104
|
+
|
105
|
+
#
|
106
|
+
# Set the bin from the current rank
|
107
|
+
# elements with identical keys land in identical bins.
|
108
|
+
#
|
109
|
+
def get_bin rank
|
110
|
+
((rank-0.5) / bin_size ).floor + 1
|
111
|
+
end
|
112
|
+
|
113
|
+
#
|
114
|
+
# Return the numbering, rank and bin for the given key
|
115
|
+
#
|
116
|
+
def get_order_params key
|
117
|
+
numbering = @numbering # use un-incremented value
|
118
|
+
rank = get_rank key
|
119
|
+
bin = get_bin rank
|
120
|
+
@numbering += 1
|
121
|
+
[numbering, rank, bin]
|
122
|
+
end
|
123
|
+
|
124
|
+
def configure_bins! options
|
125
|
+
case
|
126
|
+
when options[:bins]
|
127
|
+
total_count = options[:total_count].to_f
|
128
|
+
bins = options[:bins].to_i
|
129
|
+
unless total_count && (total_count != 0) then raise "To set the bin (%ile) size using --bins, we need to know the total count in advance. Please supply the total_count option." end
|
130
|
+
self.bin_size = (total_count / bins)
|
131
|
+
# $stderr.puts "Splitting %s records into %s bins of size %f. First element gets bin %d, last gets bin %d, median gets bin %d/%d" %
|
132
|
+
# [total_count, bins, bin_size, get_bin(1), get_bin(total_count), get_bin(((total_count+1)/2.0).floor), get_bin(((total_count+1)/2.0).ceil)]
|
133
|
+
else
|
134
|
+
raise "Please specify a number of --bins= and a --total_count= or your own strategy to bin the ranked items."
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def process *fields
|
139
|
+
numbering, rank, bin = get_order_params(get_key(*fields))
|
140
|
+
yield fields.to_flat + [numbering, rank, bin]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'wukong/streamer/list_reducer'
|
2
|
+
module Wukong
|
3
|
+
module Streamer
|
4
|
+
#
|
5
|
+
# Emit each unique key and the count of its occurrences
|
6
|
+
#
|
7
|
+
class SetReducer < Wukong::Streamer::ListReducer
|
8
|
+
# Begin with an empty set
|
9
|
+
def start! *args
|
10
|
+
self.values = Set.new
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Wukong
|
3
|
+
module Streamer
|
4
|
+
#
|
5
|
+
# Mix StructRecordizer into any streamer to make it accept a stream of
|
6
|
+
# objects -- the first field in each line is turned into a class and used to
|
7
|
+
# instantiate an object using the remaining fields on that line.
|
8
|
+
#
|
9
|
+
module StructRecordizer
|
10
|
+
|
11
|
+
#
|
12
|
+
# Turned the first field into a class name, then use the remaining fields
|
13
|
+
# on that line to instantiate the object to process.
|
14
|
+
#
|
15
|
+
def self.recordize rsrc, *fields
|
16
|
+
klass_name, suffix = rsrc.split('-', 2)
|
17
|
+
klass = Wukong.class_from_resource(klass_name) or return
|
18
|
+
# instantiate the class using the remaining fields on that line
|
19
|
+
begin
|
20
|
+
[ klass.new(*fields), suffix ]
|
21
|
+
rescue ArgumentError => e
|
22
|
+
warn "Couldn't instantiate: #{e} (#{[rsrc, fields].inspect})"
|
23
|
+
return
|
24
|
+
rescue Exception => e
|
25
|
+
raise [e, rsrc, fields].inspect
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
#
|
30
|
+
#
|
31
|
+
#
|
32
|
+
def recordize line
|
33
|
+
StructRecordizer.recordize *line.split("\t")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# Processes file as a stream of objects -- the first field in each line is
|
39
|
+
# turned into a class and used to instantiate an object using the remaining
|
40
|
+
# fields on that line.
|
41
|
+
#
|
42
|
+
# See [StructRecordizer] for more.
|
43
|
+
#
|
44
|
+
class StructStreamer < Wukong::Streamer::Base
|
45
|
+
include StructRecordizer
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Streamer
|
3
|
+
#
|
4
|
+
# Emit each unique key and the count of its occurrences
|
5
|
+
#
|
6
|
+
class SummingReducer < Wukong::Streamer::AccumulatingReducer
|
7
|
+
attr_accessor :summing_elements
|
8
|
+
attr_accessor :sums
|
9
|
+
|
10
|
+
# reset the counter to zero
|
11
|
+
def start! *args
|
12
|
+
self.sums = summing_elements.map{ 0 }
|
13
|
+
end
|
14
|
+
|
15
|
+
# record one more for this key
|
16
|
+
def accumulate *fields
|
17
|
+
vals = fields.values_at( *summing_elements )
|
18
|
+
vals.each_with_index{|val,idx| self.sums[idx] += val.to_i }
|
19
|
+
end
|
20
|
+
|
21
|
+
# emit each key field and the count, tab-separated.
|
22
|
+
def finalize
|
23
|
+
yield [key, sums].flatten
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Streamer
|
3
|
+
#
|
4
|
+
# Accumulate acts like an insecure high-school kid, for each key adopting in
|
5
|
+
# turn the latest value seen. It then emits the last (in sort order) value
|
6
|
+
# for that key.
|
7
|
+
#
|
8
|
+
# For example, to extract the *latest* value for each property, set hadoop
|
9
|
+
# to use <resource, item_id, timestamp> as sort fields and <resource,
|
10
|
+
# item_id> as key fields.
|
11
|
+
#
|
12
|
+
class UniqByLastReducer < Wukong::Streamer::AccumulatingReducer
|
13
|
+
attr_accessor :final_value
|
14
|
+
|
15
|
+
#
|
16
|
+
# Use first two fields as keys by default
|
17
|
+
#
|
18
|
+
def get_key *vals
|
19
|
+
vals[0..1]
|
20
|
+
end
|
21
|
+
|
22
|
+
#
|
23
|
+
# Adopt each value in turn: the last one's the one you want.
|
24
|
+
#
|
25
|
+
def accumulate *vals
|
26
|
+
self.final_value = vals
|
27
|
+
end
|
28
|
+
|
29
|
+
#
|
30
|
+
# Emit the last-seen value
|
31
|
+
#
|
32
|
+
def finalize
|
33
|
+
yield final_value if final_value
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# Clear state on reset
|
38
|
+
#
|
39
|
+
def start! *args
|
40
|
+
self.final_value = nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class TypedStruct < Struct
|
2
|
+
def self.new *members_types
|
3
|
+
members, mtypes = members_types.transpose
|
4
|
+
klass = Struct.new *members.map(&:to_sym)
|
5
|
+
klass.class_eval do
|
6
|
+
cattr_accessor :mtypes, :members_types
|
7
|
+
self.mtypes = mtypes
|
8
|
+
self.members_types = Hash.zip(members, mtypes)
|
9
|
+
end
|
10
|
+
klass
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# require 'active_support/core_ext/class/inheritable_attributes.rb'
|
2
|
+
require 'extlib/class'
|
3
|
+
|
4
|
+
module Wukong
|
5
|
+
#
|
6
|
+
# Use to instrument an actual class to behave
|
7
|
+
#
|
8
|
+
module WukongClass
|
9
|
+
|
10
|
+
|
11
|
+
def [](attr)
|
12
|
+
self.send attr
|
13
|
+
end
|
14
|
+
def []=(attr, val)
|
15
|
+
self.send("#{attr}=", val)
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
data/spec/spec_helper.rb
ADDED
File without changes
|
data/wukong.gemspec
ADDED
@@ -0,0 +1,179 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{wukong}
|
8
|
+
s.version = "0.1.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Philip (flip) Kromer"]
|
12
|
+
s.date = %q{2009-09-28}
|
13
|
+
s.description = %q{ Treat your dataset like a:
|
14
|
+
|
15
|
+
* stream of lines when it’s efficient to process by lines
|
16
|
+
* stream of field arrays when it’s efficient to deal directly with fields
|
17
|
+
* stream of lightweight objects when it’s efficient to deal with objects
|
18
|
+
|
19
|
+
Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.
|
20
|
+
}
|
21
|
+
s.email = %q{flip@infochimps.org}
|
22
|
+
s.executables = ["cutc", "cuttab", "greptrue", "hdp-cat", "hdp-catd", "hdp-du", "hdp-get", "hdp-kill", "hdp-ls", "hdp-mkdir", "hdp-mv", "hdp-parts_to_keys.rb", "hdp-ps", "hdp-put", "hdp-rm", "hdp-sort", "hdp-stream", "hdp-stream-flat", "hdp-sync", "hdp-wc", "md5sort", "tabchar", "uniqc", "wu-hist", "wu-lign", "wu-sum"]
|
23
|
+
s.extra_rdoc_files = [
|
24
|
+
"LICENSE.textile",
|
25
|
+
"README.textile"
|
26
|
+
]
|
27
|
+
s.files = [
|
28
|
+
"doc/INSTALL.textile",
|
29
|
+
"doc/LICENSE.textile",
|
30
|
+
"doc/README-tutorial.textile",
|
31
|
+
"doc/README-wulign.textile",
|
32
|
+
"doc/README-wutils.textile",
|
33
|
+
"doc/TODO.textile",
|
34
|
+
"doc/UsingWukong-part1-setup.textile",
|
35
|
+
"doc/UsingWukong-part2-scraping.textile",
|
36
|
+
"doc/UsingWukong-part3-parsing.textile",
|
37
|
+
"doc/code/api_response_example.txt",
|
38
|
+
"doc/code/parser_skeleton.rb",
|
39
|
+
"doc/hadoop-nfs.textile",
|
40
|
+
"doc/hadoop-setup.textile",
|
41
|
+
"doc/index.textile",
|
42
|
+
"doc/intro_to_map_reduce/MapReduceDiagram.graffle",
|
43
|
+
"doc/links.textile",
|
44
|
+
"doc/overview.textile",
|
45
|
+
"doc/pig/PigLatinExpressionsList.txt",
|
46
|
+
"doc/pig/PigLatinReferenceManual.html",
|
47
|
+
"doc/pig/PigLatinReferenceManual.txt",
|
48
|
+
"doc/tips.textile",
|
49
|
+
"doc/usage.textile",
|
50
|
+
"doc/utils.textile",
|
51
|
+
"examples/README.txt",
|
52
|
+
"examples/and_pig/sample_queries.rb",
|
53
|
+
"examples/apache_log_parser.rb",
|
54
|
+
"examples/count_keys.rb",
|
55
|
+
"examples/count_keys_at_mapper.rb",
|
56
|
+
"examples/graph/adjacency_list.rb",
|
57
|
+
"examples/graph/breadth_first_search.rb",
|
58
|
+
"examples/graph/gen_2paths.rb",
|
59
|
+
"examples/graph/gen_multi_edge.rb",
|
60
|
+
"examples/graph/gen_symmetric_links.rb",
|
61
|
+
"examples/package-local.rb",
|
62
|
+
"examples/package.rb",
|
63
|
+
"examples/pagerank/README.textile",
|
64
|
+
"examples/pagerank/gen_initial_pagerank_graph.pig",
|
65
|
+
"examples/pagerank/pagerank.rb",
|
66
|
+
"examples/pagerank/pagerank_initialize.rb",
|
67
|
+
"examples/pagerank/run_pagerank.sh",
|
68
|
+
"examples/rank_and_bin.rb",
|
69
|
+
"examples/run_all.sh",
|
70
|
+
"examples/sample_records.rb",
|
71
|
+
"examples/size.rb",
|
72
|
+
"examples/word_count.rb",
|
73
|
+
"lib/wukong.rb",
|
74
|
+
"lib/wukong/and_pig.rb",
|
75
|
+
"lib/wukong/and_pig/README.textile",
|
76
|
+
"lib/wukong/and_pig/as.rb",
|
77
|
+
"lib/wukong/and_pig/data_types.rb",
|
78
|
+
"lib/wukong/and_pig/functions.rb",
|
79
|
+
"lib/wukong/and_pig/generate.rb",
|
80
|
+
"lib/wukong/and_pig/generate/variable_inflections.rb",
|
81
|
+
"lib/wukong/and_pig/junk.rb",
|
82
|
+
"lib/wukong/and_pig/operators.rb",
|
83
|
+
"lib/wukong/and_pig/operators/compound.rb",
|
84
|
+
"lib/wukong/and_pig/operators/evaluators.rb",
|
85
|
+
"lib/wukong/and_pig/operators/execution.rb",
|
86
|
+
"lib/wukong/and_pig/operators/file_methods.rb",
|
87
|
+
"lib/wukong/and_pig/operators/foreach.rb",
|
88
|
+
"lib/wukong/and_pig/operators/groupies.rb",
|
89
|
+
"lib/wukong/and_pig/operators/load_store.rb",
|
90
|
+
"lib/wukong/and_pig/operators/meta.rb",
|
91
|
+
"lib/wukong/and_pig/operators/relational.rb",
|
92
|
+
"lib/wukong/and_pig/pig_struct.rb",
|
93
|
+
"lib/wukong/and_pig/pig_var.rb",
|
94
|
+
"lib/wukong/and_pig/symbol.rb",
|
95
|
+
"lib/wukong/and_pig/utils.rb",
|
96
|
+
"lib/wukong/bad_record.rb",
|
97
|
+
"lib/wukong/boot.rb",
|
98
|
+
"lib/wukong/datatypes.rb",
|
99
|
+
"lib/wukong/datatypes/enum.rb",
|
100
|
+
"lib/wukong/dfs.rb",
|
101
|
+
"lib/wukong/encoding.rb",
|
102
|
+
"lib/wukong/extensions.rb",
|
103
|
+
"lib/wukong/extensions/array.rb",
|
104
|
+
"lib/wukong/extensions/blank.rb",
|
105
|
+
"lib/wukong/extensions/class.rb",
|
106
|
+
"lib/wukong/extensions/date_time.rb",
|
107
|
+
"lib/wukong/extensions/emittable.rb",
|
108
|
+
"lib/wukong/extensions/hash.rb",
|
109
|
+
"lib/wukong/extensions/hash_like.rb",
|
110
|
+
"lib/wukong/extensions/hashlike_class.rb",
|
111
|
+
"lib/wukong/extensions/module.rb",
|
112
|
+
"lib/wukong/extensions/pathname.rb",
|
113
|
+
"lib/wukong/extensions/string.rb",
|
114
|
+
"lib/wukong/extensions/struct.rb",
|
115
|
+
"lib/wukong/extensions/symbol.rb",
|
116
|
+
"lib/wukong/logger.rb",
|
117
|
+
"lib/wukong/models/graph.rb",
|
118
|
+
"lib/wukong/rdf.rb",
|
119
|
+
"lib/wukong/schema.rb",
|
120
|
+
"lib/wukong/script.rb",
|
121
|
+
"lib/wukong/script/hadoop_command.rb",
|
122
|
+
"lib/wukong/script/local_command.rb",
|
123
|
+
"lib/wukong/streamer.rb",
|
124
|
+
"lib/wukong/streamer/accumulating_reducer.rb",
|
125
|
+
"lib/wukong/streamer/base.rb",
|
126
|
+
"lib/wukong/streamer/count_keys.rb",
|
127
|
+
"lib/wukong/streamer/count_lines.rb",
|
128
|
+
"lib/wukong/streamer/filter.rb",
|
129
|
+
"lib/wukong/streamer/line_streamer.rb",
|
130
|
+
"lib/wukong/streamer/list_reducer.rb",
|
131
|
+
"lib/wukong/streamer/preprocess_with_pipe_streamer.rb",
|
132
|
+
"lib/wukong/streamer/rank_and_bin_reducer.rb",
|
133
|
+
"lib/wukong/streamer/set_reducer.rb",
|
134
|
+
"lib/wukong/streamer/struct_streamer.rb",
|
135
|
+
"lib/wukong/streamer/summing_reducer.rb",
|
136
|
+
"lib/wukong/streamer/uniq_by_last_reducer.rb",
|
137
|
+
"lib/wukong/typed_struct.rb",
|
138
|
+
"lib/wukong/wukong_class.rb",
|
139
|
+
"spec/bin/hdp-wc_spec.rb",
|
140
|
+
"spec/spec_helper.rb",
|
141
|
+
"wukong.gemspec"
|
142
|
+
]
|
143
|
+
s.homepage = %q{http://github.com/mrflip/wukong}
|
144
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
145
|
+
s.require_paths = ["lib"]
|
146
|
+
s.rubygems_version = %q{1.3.5}
|
147
|
+
s.summary = %q{Wukong makes Hadoop so easy a chimpanzee can use it.}
|
148
|
+
s.test_files = [
|
149
|
+
"spec/bin/hdp-wc_spec.rb",
|
150
|
+
"spec/spec_helper.rb",
|
151
|
+
"examples/and_pig/sample_queries.rb",
|
152
|
+
"examples/apache_log_parser.rb",
|
153
|
+
"examples/count_keys.rb",
|
154
|
+
"examples/count_keys_at_mapper.rb",
|
155
|
+
"examples/graph/adjacency_list.rb",
|
156
|
+
"examples/graph/breadth_first_search.rb",
|
157
|
+
"examples/graph/gen_2paths.rb",
|
158
|
+
"examples/graph/gen_multi_edge.rb",
|
159
|
+
"examples/graph/gen_symmetric_links.rb",
|
160
|
+
"examples/package-local.rb",
|
161
|
+
"examples/package.rb",
|
162
|
+
"examples/pagerank/pagerank.rb",
|
163
|
+
"examples/pagerank/pagerank_initialize.rb",
|
164
|
+
"examples/rank_and_bin.rb",
|
165
|
+
"examples/sample_records.rb",
|
166
|
+
"examples/size.rb",
|
167
|
+
"examples/word_count.rb"
|
168
|
+
]
|
169
|
+
|
170
|
+
if s.respond_to? :specification_version then
|
171
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
172
|
+
s.specification_version = 3
|
173
|
+
|
174
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
175
|
+
else
|
176
|
+
end
|
177
|
+
else
|
178
|
+
end
|
179
|
+
end
|