wukong 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. data/LICENSE.textile +107 -0
  2. data/README.textile +166 -0
  3. data/bin/cutc +30 -0
  4. data/bin/cuttab +5 -0
  5. data/bin/greptrue +8 -0
  6. data/bin/hdp-cat +3 -0
  7. data/bin/hdp-catd +3 -0
  8. data/bin/hdp-du +81 -0
  9. data/bin/hdp-get +3 -0
  10. data/bin/hdp-kill +3 -0
  11. data/bin/hdp-ls +10 -0
  12. data/bin/hdp-mkdir +3 -0
  13. data/bin/hdp-mv +3 -0
  14. data/bin/hdp-parts_to_keys.rb +77 -0
  15. data/bin/hdp-ps +3 -0
  16. data/bin/hdp-put +3 -0
  17. data/bin/hdp-rm +11 -0
  18. data/bin/hdp-sort +29 -0
  19. data/bin/hdp-stream +29 -0
  20. data/bin/hdp-stream-flat +18 -0
  21. data/bin/hdp-sync +17 -0
  22. data/bin/hdp-wc +67 -0
  23. data/bin/md5sort +20 -0
  24. data/bin/tabchar +5 -0
  25. data/bin/uniqc +3 -0
  26. data/bin/wu-hist +3 -0
  27. data/bin/wu-lign +177 -0
  28. data/bin/wu-sum +30 -0
  29. data/doc/INSTALL.textile +41 -0
  30. data/doc/LICENSE.textile +107 -0
  31. data/doc/README-tutorial.textile +163 -0
  32. data/doc/README-wulign.textile +59 -0
  33. data/doc/README-wutils.textile +128 -0
  34. data/doc/TODO.textile +61 -0
  35. data/doc/UsingWukong-part1-setup.textile +2 -0
  36. data/doc/UsingWukong-part2-scraping.textile +2 -0
  37. data/doc/UsingWukong-part3-parsing.textile +132 -0
  38. data/doc/code/api_response_example.txt +20 -0
  39. data/doc/code/parser_skeleton.rb +38 -0
  40. data/doc/hadoop-nfs.textile +51 -0
  41. data/doc/hadoop-setup.textile +29 -0
  42. data/doc/index.textile +124 -0
  43. data/doc/intro_to_map_reduce/MapReduceDiagram.graffle +0 -0
  44. data/doc/links.textile +42 -0
  45. data/doc/overview.textile +91 -0
  46. data/doc/pig/PigLatinExpressionsList.txt +122 -0
  47. data/doc/pig/PigLatinReferenceManual.html +19134 -0
  48. data/doc/pig/PigLatinReferenceManual.txt +1640 -0
  49. data/doc/tips.textile +116 -0
  50. data/doc/usage.textile +102 -0
  51. data/doc/utils.textile +48 -0
  52. data/examples/README.txt +17 -0
  53. data/examples/and_pig/sample_queries.rb +128 -0
  54. data/examples/apache_log_parser.rb +53 -0
  55. data/examples/count_keys.rb +56 -0
  56. data/examples/count_keys_at_mapper.rb +57 -0
  57. data/examples/graph/adjacency_list.rb +74 -0
  58. data/examples/graph/breadth_first_search.rb +79 -0
  59. data/examples/graph/gen_2paths.rb +68 -0
  60. data/examples/graph/gen_multi_edge.rb +103 -0
  61. data/examples/graph/gen_symmetric_links.rb +53 -0
  62. data/examples/package-local.rb +100 -0
  63. data/examples/package.rb +96 -0
  64. data/examples/pagerank/README.textile +6 -0
  65. data/examples/pagerank/gen_initial_pagerank_graph.pig +57 -0
  66. data/examples/pagerank/pagerank.rb +88 -0
  67. data/examples/pagerank/pagerank_initialize.rb +46 -0
  68. data/examples/pagerank/run_pagerank.sh +19 -0
  69. data/examples/rank_and_bin.rb +173 -0
  70. data/examples/run_all.sh +47 -0
  71. data/examples/sample_records.rb +44 -0
  72. data/examples/size.rb +60 -0
  73. data/examples/word_count.rb +95 -0
  74. data/lib/wukong.rb +11 -0
  75. data/lib/wukong/and_pig.rb +62 -0
  76. data/lib/wukong/and_pig/README.textile +12 -0
  77. data/lib/wukong/and_pig/as.rb +37 -0
  78. data/lib/wukong/and_pig/data_types.rb +30 -0
  79. data/lib/wukong/and_pig/functions.rb +50 -0
  80. data/lib/wukong/and_pig/generate.rb +85 -0
  81. data/lib/wukong/and_pig/generate/variable_inflections.rb +82 -0
  82. data/lib/wukong/and_pig/junk.rb +51 -0
  83. data/lib/wukong/and_pig/operators.rb +8 -0
  84. data/lib/wukong/and_pig/operators/compound.rb +29 -0
  85. data/lib/wukong/and_pig/operators/evaluators.rb +7 -0
  86. data/lib/wukong/and_pig/operators/execution.rb +15 -0
  87. data/lib/wukong/and_pig/operators/file_methods.rb +29 -0
  88. data/lib/wukong/and_pig/operators/foreach.rb +98 -0
  89. data/lib/wukong/and_pig/operators/groupies.rb +212 -0
  90. data/lib/wukong/and_pig/operators/load_store.rb +65 -0
  91. data/lib/wukong/and_pig/operators/meta.rb +42 -0
  92. data/lib/wukong/and_pig/operators/relational.rb +129 -0
  93. data/lib/wukong/and_pig/pig_struct.rb +48 -0
  94. data/lib/wukong/and_pig/pig_var.rb +95 -0
  95. data/lib/wukong/and_pig/symbol.rb +29 -0
  96. data/lib/wukong/and_pig/utils.rb +0 -0
  97. data/lib/wukong/bad_record.rb +18 -0
  98. data/lib/wukong/boot.rb +47 -0
  99. data/lib/wukong/datatypes.rb +24 -0
  100. data/lib/wukong/datatypes/enum.rb +123 -0
  101. data/lib/wukong/dfs.rb +80 -0
  102. data/lib/wukong/encoding.rb +111 -0
  103. data/lib/wukong/extensions.rb +15 -0
  104. data/lib/wukong/extensions/array.rb +18 -0
  105. data/lib/wukong/extensions/blank.rb +93 -0
  106. data/lib/wukong/extensions/class.rb +189 -0
  107. data/lib/wukong/extensions/date_time.rb +24 -0
  108. data/lib/wukong/extensions/emittable.rb +82 -0
  109. data/lib/wukong/extensions/hash.rb +120 -0
  110. data/lib/wukong/extensions/hash_like.rb +119 -0
  111. data/lib/wukong/extensions/hashlike_class.rb +47 -0
  112. data/lib/wukong/extensions/module.rb +2 -0
  113. data/lib/wukong/extensions/pathname.rb +27 -0
  114. data/lib/wukong/extensions/string.rb +65 -0
  115. data/lib/wukong/extensions/struct.rb +17 -0
  116. data/lib/wukong/extensions/symbol.rb +11 -0
  117. data/lib/wukong/logger.rb +53 -0
  118. data/lib/wukong/models/graph.rb +27 -0
  119. data/lib/wukong/rdf.rb +104 -0
  120. data/lib/wukong/schema.rb +37 -0
  121. data/lib/wukong/script.rb +265 -0
  122. data/lib/wukong/script/hadoop_command.rb +111 -0
  123. data/lib/wukong/script/local_command.rb +14 -0
  124. data/lib/wukong/streamer.rb +13 -0
  125. data/lib/wukong/streamer/accumulating_reducer.rb +89 -0
  126. data/lib/wukong/streamer/base.rb +76 -0
  127. data/lib/wukong/streamer/count_keys.rb +30 -0
  128. data/lib/wukong/streamer/count_lines.rb +26 -0
  129. data/lib/wukong/streamer/filter.rb +20 -0
  130. data/lib/wukong/streamer/line_streamer.rb +12 -0
  131. data/lib/wukong/streamer/list_reducer.rb +20 -0
  132. data/lib/wukong/streamer/preprocess_with_pipe_streamer.rb +22 -0
  133. data/lib/wukong/streamer/rank_and_bin_reducer.rb +145 -0
  134. data/lib/wukong/streamer/set_reducer.rb +14 -0
  135. data/lib/wukong/streamer/struct_streamer.rb +48 -0
  136. data/lib/wukong/streamer/summing_reducer.rb +29 -0
  137. data/lib/wukong/streamer/uniq_by_last_reducer.rb +44 -0
  138. data/lib/wukong/typed_struct.rb +12 -0
  139. data/lib/wukong/wukong_class.rb +21 -0
  140. data/spec/bin/hdp-wc_spec.rb +4 -0
  141. data/spec/spec_helper.rb +0 -0
  142. data/wukong.gemspec +179 -0
  143. metadata +214 -0
@@ -0,0 +1,145 @@
1
+ module Wukong
2
+ module Streamer
3
+
4
+ #
5
+ # Bin and order a partitioned subset of keys
6
+ #
7
+ # For each record, appends a
8
+ #
9
+ # * numbering, from 0..(n-1). Each element gets a distinct numbering based on
10
+ # the order seen at the reducer; elements with identical keys might have
11
+ # different numbering on different runs.
12
+ #
13
+ # * rank, a number within 1..n giving the "place" of each value. Each element
14
+ # receives a successive (and thus unique) numbering, but all elements with
15
+ # the same key share the same rank. The first element for a given rank has
16
+ #
17
+ # (rank == numbering + 1)
18
+ #
19
+ # * bin, a number assigning keys by rank into a smaller number of groups. You
20
+ # must supply command line arguments
21
+ #
22
+ # --bins=[number] --total_count=[number]
23
+ #
24
+ # giving the number of groups and predicting in advance the total number of
25
+ # records. (Or override the bin assignment method to use your own damn
26
+ # strategy).
27
+ #
28
+ # If your data looked (in order) as follows, and 4 bins were requested:
29
+ #
30
+ # data: 1 1 1 2.3 7 69 79 79 80 81 81
31
+ # numbering: 0 1 2 3 4 5 6 7 8 9 10
32
+ # rank: 1 1 1 4 5 6 7 7 9 10 10
33
+ # 4-bin: 1 1 1 2 2 3 3 3 4 4 4
34
+ #
35
+ # If instead 100 bins were requested,
36
+ #
37
+ # data: 1 1 1 2.3 7 69 79 79 80 81 81
38
+ # numbering: 0 1 2 3 4 5 6 7 8 9 10
39
+ # rank: 1 1 1 4 5 6 7 7 9 10 10
40
+ # 100-bin: 1 1 1 28 37 46 55 55 73 82 91
41
+ #
42
+ # Note most of the bins are empty, and that the
43
+ #
44
+ # --------------------------------------------------------------------------
45
+ #
46
+ # Note that in this implementation each reducer numbers its own subset of
47
+ # elements from 1..total_count. If you want to number your whole dataset,
48
+ # you'll have to set @:reduce_tasks => 1@ in your Script's
49
+ # Script#default_options.
50
+ #
51
+ # You might feel a bit better about yourself if you can bin several fields
52
+ # (or subsets) at once. The :partition_fields option to Wukong::Script
53
+ # (which requests a KeyFieldBasedPartitioner) can be used to route different
54
+ # subsets to (possibly) distinct reducers.
55
+ #
56
+ # See the [examples/rank_and_bin_fields.rb] example script for an
57
+ # implementation of this. (And note the thing you have to do in case one
58
+ # reducer sees multiple partitions).
59
+ #
60
+ # It would surely be best to use a total sort and supply each reducer with the
61
+ # initial rank of its run.
62
+ #
63
+ class RankAndBinReducer < Wukong::Streamer::Base
64
+ attr_accessor :bin_size
65
+ def initialize options
66
+ super options
67
+ configure_bins! options
68
+ reset_order_params!
69
+ end
70
+
71
+ # ===========================================================================
72
+ #
73
+ # Order parameters (numbering, bin and rank)
74
+ #
75
+
76
+ #
77
+ # Key used to assign ranking -- elements with identical keys have identical
78
+ # rank.
79
+ #
80
+ def get_key *args
81
+ args.first
82
+ end
83
+
84
+ def reset_order_params!
85
+ @last_key = nil
86
+ @numbering = 0
87
+ @rank = 1
88
+ end
89
+
90
+ #
91
+ # The ranking is the "place" of each value: each element receives a
92
+ # successive (and thus unique) numbering, but all elements with the same key
93
+ # share the same rank. The first element for a given rank has
94
+ #
95
+ # (rank == numbering + 1)
96
+ #
97
+ def get_rank key
98
+ if @last_key != key
99
+ @rank = @numbering + 1
100
+ @last_key = key
101
+ end
102
+ @rank
103
+ end
104
+
105
+ #
106
+ # Set the bin from the current rank
107
+ # elements with identical keys land in identical bins.
108
+ #
109
+ def get_bin rank
110
+ ((rank-0.5) / bin_size ).floor + 1
111
+ end
112
+
113
+ #
114
+ # Return the numbering, rank and bin for the given key
115
+ #
116
+ def get_order_params key
117
+ numbering = @numbering # use un-incremented value
118
+ rank = get_rank key
119
+ bin = get_bin rank
120
+ @numbering += 1
121
+ [numbering, rank, bin]
122
+ end
123
+
124
+ def configure_bins! options
125
+ case
126
+ when options[:bins]
127
+ total_count = options[:total_count].to_f
128
+ bins = options[:bins].to_i
129
+ unless total_count && (total_count != 0) then raise "To set the bin (%ile) size using --bins, we need to know the total count in advance. Please supply the total_count option." end
130
+ self.bin_size = (total_count / bins)
131
+ # $stderr.puts "Splitting %s records into %s bins of size %f. First element gets bin %d, last gets bin %d, median gets bin %d/%d" %
132
+ # [total_count, bins, bin_size, get_bin(1), get_bin(total_count), get_bin(((total_count+1)/2.0).floor), get_bin(((total_count+1)/2.0).ceil)]
133
+ else
134
+ raise "Please specify a number of --bins= and a --total_count= or your own strategy to bin the ranked items."
135
+ end
136
+ end
137
+
138
+ def process *fields
139
+ numbering, rank, bin = get_order_params(get_key(*fields))
140
+ yield fields.to_flat + [numbering, rank, bin]
141
+ end
142
+ end
143
+
144
+ end
145
+ end
@@ -0,0 +1,14 @@
1
+ require 'wukong/streamer/list_reducer'
2
+ module Wukong
3
+ module Streamer
4
+ #
5
+ # Emit each unique key and the count of its occurrences
6
+ #
7
+ class SetReducer < Wukong::Streamer::ListReducer
8
+ # Begin with an empty set
9
+ def start! *args
10
+ self.values = Set.new
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,48 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Wukong
3
+ module Streamer
4
+ #
5
+ # Mix StructRecordizer into any streamer to make it accept a stream of
6
+ # objects -- the first field in each line is turned into a class and used to
7
+ # instantiate an object using the remaining fields on that line.
8
+ #
9
+ module StructRecordizer
10
+
11
+ #
12
+ # Turned the first field into a class name, then use the remaining fields
13
+ # on that line to instantiate the object to process.
14
+ #
15
+ def self.recordize rsrc, *fields
16
+ klass_name, suffix = rsrc.split('-', 2)
17
+ klass = Wukong.class_from_resource(klass_name) or return
18
+ # instantiate the class using the remaining fields on that line
19
+ begin
20
+ [ klass.new(*fields), suffix ]
21
+ rescue ArgumentError => e
22
+ warn "Couldn't instantiate: #{e} (#{[rsrc, fields].inspect})"
23
+ return
24
+ rescue Exception => e
25
+ raise [e, rsrc, fields].inspect
26
+ end
27
+ end
28
+
29
+ #
30
+ #
31
+ #
32
+ def recordize line
33
+ StructRecordizer.recordize *line.split("\t")
34
+ end
35
+ end
36
+
37
+ #
38
+ # Processes file as a stream of objects -- the first field in each line is
39
+ # turned into a class and used to instantiate an object using the remaining
40
+ # fields on that line.
41
+ #
42
+ # See [StructRecordizer] for more.
43
+ #
44
+ class StructStreamer < Wukong::Streamer::Base
45
+ include StructRecordizer
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,29 @@
1
+ module Wukong
2
+ module Streamer
3
+ #
4
+ # Emit each unique key and the count of its occurrences
5
+ #
6
+ class SummingReducer < Wukong::Streamer::AccumulatingReducer
7
+ attr_accessor :summing_elements
8
+ attr_accessor :sums
9
+
10
+ # reset the counter to zero
11
+ def start! *args
12
+ self.sums = summing_elements.map{ 0 }
13
+ end
14
+
15
+ # record one more for this key
16
+ def accumulate *fields
17
+ vals = fields.values_at( *summing_elements )
18
+ vals.each_with_index{|val,idx| self.sums[idx] += val.to_i }
19
+ end
20
+
21
+ # emit each key field and the count, tab-separated.
22
+ def finalize
23
+ yield [key, sums].flatten
24
+ end
25
+ end
26
+
27
+ end
28
+ end
29
+
@@ -0,0 +1,44 @@
1
+ module Wukong
2
+ module Streamer
3
+ #
4
+ # Accumulate acts like an insecure high-school kid, for each key adopting in
5
+ # turn the latest value seen. It then emits the last (in sort order) value
6
+ # for that key.
7
+ #
8
+ # For example, to extract the *latest* value for each property, set hadoop
9
+ # to use <resource, item_id, timestamp> as sort fields and <resource,
10
+ # item_id> as key fields.
11
+ #
12
+ class UniqByLastReducer < Wukong::Streamer::AccumulatingReducer
13
+ attr_accessor :final_value
14
+
15
+ #
16
+ # Use first two fields as keys by default
17
+ #
18
+ def get_key *vals
19
+ vals[0..1]
20
+ end
21
+
22
+ #
23
+ # Adopt each value in turn: the last one's the one you want.
24
+ #
25
+ def accumulate *vals
26
+ self.final_value = vals
27
+ end
28
+
29
+ #
30
+ # Emit the last-seen value
31
+ #
32
+ def finalize
33
+ yield final_value if final_value
34
+ end
35
+
36
+ #
37
+ # Clear state on reset
38
+ #
39
+ def start! *args
40
+ self.final_value = nil
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,12 @@
1
+ class TypedStruct < Struct
2
+ def self.new *members_types
3
+ members, mtypes = members_types.transpose
4
+ klass = Struct.new *members.map(&:to_sym)
5
+ klass.class_eval do
6
+ cattr_accessor :mtypes, :members_types
7
+ self.mtypes = mtypes
8
+ self.members_types = Hash.zip(members, mtypes)
9
+ end
10
+ klass
11
+ end
12
+ end
@@ -0,0 +1,21 @@
1
+ # require 'active_support/core_ext/class/inheritable_attributes.rb'
2
+ require 'extlib/class'
3
+
4
+ module Wukong
5
+ #
6
+ # Use to instrument an actual class to behave
7
+ #
8
+ module WukongClass
9
+
10
+
11
+ def [](attr)
12
+ self.send attr
13
+ end
14
+ def []=(attr, val)
15
+ self.send("#{attr}=", val)
16
+ end
17
+
18
+ end
19
+
20
+
21
+ end
@@ -0,0 +1,4 @@
1
+ # -*- coding: undecided -*-
2
+
3
+
4
+ I18N_STR = "Iñtërnâtiônàlizætiøn hasn't happened to ruby 1.8 yet"
File without changes
data/wukong.gemspec ADDED
@@ -0,0 +1,179 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{wukong}
8
+ s.version = "0.1.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Philip (flip) Kromer"]
12
+ s.date = %q{2009-09-28}
13
+ s.description = %q{ Treat your dataset like a:
14
+
15
+ * stream of lines when it’s efficient to process by lines
16
+ * stream of field arrays when it’s efficient to deal directly with fields
17
+ * stream of lightweight objects when it’s efficient to deal with objects
18
+
19
+ Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.
20
+ }
21
+ s.email = %q{flip@infochimps.org}
22
+ s.executables = ["cutc", "cuttab", "greptrue", "hdp-cat", "hdp-catd", "hdp-du", "hdp-get", "hdp-kill", "hdp-ls", "hdp-mkdir", "hdp-mv", "hdp-parts_to_keys.rb", "hdp-ps", "hdp-put", "hdp-rm", "hdp-sort", "hdp-stream", "hdp-stream-flat", "hdp-sync", "hdp-wc", "md5sort", "tabchar", "uniqc", "wu-hist", "wu-lign", "wu-sum"]
23
+ s.extra_rdoc_files = [
24
+ "LICENSE.textile",
25
+ "README.textile"
26
+ ]
27
+ s.files = [
28
+ "doc/INSTALL.textile",
29
+ "doc/LICENSE.textile",
30
+ "doc/README-tutorial.textile",
31
+ "doc/README-wulign.textile",
32
+ "doc/README-wutils.textile",
33
+ "doc/TODO.textile",
34
+ "doc/UsingWukong-part1-setup.textile",
35
+ "doc/UsingWukong-part2-scraping.textile",
36
+ "doc/UsingWukong-part3-parsing.textile",
37
+ "doc/code/api_response_example.txt",
38
+ "doc/code/parser_skeleton.rb",
39
+ "doc/hadoop-nfs.textile",
40
+ "doc/hadoop-setup.textile",
41
+ "doc/index.textile",
42
+ "doc/intro_to_map_reduce/MapReduceDiagram.graffle",
43
+ "doc/links.textile",
44
+ "doc/overview.textile",
45
+ "doc/pig/PigLatinExpressionsList.txt",
46
+ "doc/pig/PigLatinReferenceManual.html",
47
+ "doc/pig/PigLatinReferenceManual.txt",
48
+ "doc/tips.textile",
49
+ "doc/usage.textile",
50
+ "doc/utils.textile",
51
+ "examples/README.txt",
52
+ "examples/and_pig/sample_queries.rb",
53
+ "examples/apache_log_parser.rb",
54
+ "examples/count_keys.rb",
55
+ "examples/count_keys_at_mapper.rb",
56
+ "examples/graph/adjacency_list.rb",
57
+ "examples/graph/breadth_first_search.rb",
58
+ "examples/graph/gen_2paths.rb",
59
+ "examples/graph/gen_multi_edge.rb",
60
+ "examples/graph/gen_symmetric_links.rb",
61
+ "examples/package-local.rb",
62
+ "examples/package.rb",
63
+ "examples/pagerank/README.textile",
64
+ "examples/pagerank/gen_initial_pagerank_graph.pig",
65
+ "examples/pagerank/pagerank.rb",
66
+ "examples/pagerank/pagerank_initialize.rb",
67
+ "examples/pagerank/run_pagerank.sh",
68
+ "examples/rank_and_bin.rb",
69
+ "examples/run_all.sh",
70
+ "examples/sample_records.rb",
71
+ "examples/size.rb",
72
+ "examples/word_count.rb",
73
+ "lib/wukong.rb",
74
+ "lib/wukong/and_pig.rb",
75
+ "lib/wukong/and_pig/README.textile",
76
+ "lib/wukong/and_pig/as.rb",
77
+ "lib/wukong/and_pig/data_types.rb",
78
+ "lib/wukong/and_pig/functions.rb",
79
+ "lib/wukong/and_pig/generate.rb",
80
+ "lib/wukong/and_pig/generate/variable_inflections.rb",
81
+ "lib/wukong/and_pig/junk.rb",
82
+ "lib/wukong/and_pig/operators.rb",
83
+ "lib/wukong/and_pig/operators/compound.rb",
84
+ "lib/wukong/and_pig/operators/evaluators.rb",
85
+ "lib/wukong/and_pig/operators/execution.rb",
86
+ "lib/wukong/and_pig/operators/file_methods.rb",
87
+ "lib/wukong/and_pig/operators/foreach.rb",
88
+ "lib/wukong/and_pig/operators/groupies.rb",
89
+ "lib/wukong/and_pig/operators/load_store.rb",
90
+ "lib/wukong/and_pig/operators/meta.rb",
91
+ "lib/wukong/and_pig/operators/relational.rb",
92
+ "lib/wukong/and_pig/pig_struct.rb",
93
+ "lib/wukong/and_pig/pig_var.rb",
94
+ "lib/wukong/and_pig/symbol.rb",
95
+ "lib/wukong/and_pig/utils.rb",
96
+ "lib/wukong/bad_record.rb",
97
+ "lib/wukong/boot.rb",
98
+ "lib/wukong/datatypes.rb",
99
+ "lib/wukong/datatypes/enum.rb",
100
+ "lib/wukong/dfs.rb",
101
+ "lib/wukong/encoding.rb",
102
+ "lib/wukong/extensions.rb",
103
+ "lib/wukong/extensions/array.rb",
104
+ "lib/wukong/extensions/blank.rb",
105
+ "lib/wukong/extensions/class.rb",
106
+ "lib/wukong/extensions/date_time.rb",
107
+ "lib/wukong/extensions/emittable.rb",
108
+ "lib/wukong/extensions/hash.rb",
109
+ "lib/wukong/extensions/hash_like.rb",
110
+ "lib/wukong/extensions/hashlike_class.rb",
111
+ "lib/wukong/extensions/module.rb",
112
+ "lib/wukong/extensions/pathname.rb",
113
+ "lib/wukong/extensions/string.rb",
114
+ "lib/wukong/extensions/struct.rb",
115
+ "lib/wukong/extensions/symbol.rb",
116
+ "lib/wukong/logger.rb",
117
+ "lib/wukong/models/graph.rb",
118
+ "lib/wukong/rdf.rb",
119
+ "lib/wukong/schema.rb",
120
+ "lib/wukong/script.rb",
121
+ "lib/wukong/script/hadoop_command.rb",
122
+ "lib/wukong/script/local_command.rb",
123
+ "lib/wukong/streamer.rb",
124
+ "lib/wukong/streamer/accumulating_reducer.rb",
125
+ "lib/wukong/streamer/base.rb",
126
+ "lib/wukong/streamer/count_keys.rb",
127
+ "lib/wukong/streamer/count_lines.rb",
128
+ "lib/wukong/streamer/filter.rb",
129
+ "lib/wukong/streamer/line_streamer.rb",
130
+ "lib/wukong/streamer/list_reducer.rb",
131
+ "lib/wukong/streamer/preprocess_with_pipe_streamer.rb",
132
+ "lib/wukong/streamer/rank_and_bin_reducer.rb",
133
+ "lib/wukong/streamer/set_reducer.rb",
134
+ "lib/wukong/streamer/struct_streamer.rb",
135
+ "lib/wukong/streamer/summing_reducer.rb",
136
+ "lib/wukong/streamer/uniq_by_last_reducer.rb",
137
+ "lib/wukong/typed_struct.rb",
138
+ "lib/wukong/wukong_class.rb",
139
+ "spec/bin/hdp-wc_spec.rb",
140
+ "spec/spec_helper.rb",
141
+ "wukong.gemspec"
142
+ ]
143
+ s.homepage = %q{http://github.com/mrflip/wukong}
144
+ s.rdoc_options = ["--charset=UTF-8"]
145
+ s.require_paths = ["lib"]
146
+ s.rubygems_version = %q{1.3.5}
147
+ s.summary = %q{Wukong makes Hadoop so easy a chimpanzee can use it.}
148
+ s.test_files = [
149
+ "spec/bin/hdp-wc_spec.rb",
150
+ "spec/spec_helper.rb",
151
+ "examples/and_pig/sample_queries.rb",
152
+ "examples/apache_log_parser.rb",
153
+ "examples/count_keys.rb",
154
+ "examples/count_keys_at_mapper.rb",
155
+ "examples/graph/adjacency_list.rb",
156
+ "examples/graph/breadth_first_search.rb",
157
+ "examples/graph/gen_2paths.rb",
158
+ "examples/graph/gen_multi_edge.rb",
159
+ "examples/graph/gen_symmetric_links.rb",
160
+ "examples/package-local.rb",
161
+ "examples/package.rb",
162
+ "examples/pagerank/pagerank.rb",
163
+ "examples/pagerank/pagerank_initialize.rb",
164
+ "examples/rank_and_bin.rb",
165
+ "examples/sample_records.rb",
166
+ "examples/size.rb",
167
+ "examples/word_count.rb"
168
+ ]
169
+
170
+ if s.respond_to? :specification_version then
171
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
172
+ s.specification_version = 3
173
+
174
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
175
+ else
176
+ end
177
+ else
178
+ end
179
+ end