wukong 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. data/LICENSE.textile +107 -0
  2. data/README.textile +166 -0
  3. data/bin/cutc +30 -0
  4. data/bin/cuttab +5 -0
  5. data/bin/greptrue +8 -0
  6. data/bin/hdp-cat +3 -0
  7. data/bin/hdp-catd +3 -0
  8. data/bin/hdp-du +81 -0
  9. data/bin/hdp-get +3 -0
  10. data/bin/hdp-kill +3 -0
  11. data/bin/hdp-ls +10 -0
  12. data/bin/hdp-mkdir +3 -0
  13. data/bin/hdp-mv +3 -0
  14. data/bin/hdp-parts_to_keys.rb +77 -0
  15. data/bin/hdp-ps +3 -0
  16. data/bin/hdp-put +3 -0
  17. data/bin/hdp-rm +11 -0
  18. data/bin/hdp-sort +29 -0
  19. data/bin/hdp-stream +29 -0
  20. data/bin/hdp-stream-flat +18 -0
  21. data/bin/hdp-sync +17 -0
  22. data/bin/hdp-wc +67 -0
  23. data/bin/md5sort +20 -0
  24. data/bin/tabchar +5 -0
  25. data/bin/uniqc +3 -0
  26. data/bin/wu-hist +3 -0
  27. data/bin/wu-lign +177 -0
  28. data/bin/wu-sum +30 -0
  29. data/doc/INSTALL.textile +41 -0
  30. data/doc/LICENSE.textile +107 -0
  31. data/doc/README-tutorial.textile +163 -0
  32. data/doc/README-wulign.textile +59 -0
  33. data/doc/README-wutils.textile +128 -0
  34. data/doc/TODO.textile +61 -0
  35. data/doc/UsingWukong-part1-setup.textile +2 -0
  36. data/doc/UsingWukong-part2-scraping.textile +2 -0
  37. data/doc/UsingWukong-part3-parsing.textile +132 -0
  38. data/doc/code/api_response_example.txt +20 -0
  39. data/doc/code/parser_skeleton.rb +38 -0
  40. data/doc/hadoop-nfs.textile +51 -0
  41. data/doc/hadoop-setup.textile +29 -0
  42. data/doc/index.textile +124 -0
  43. data/doc/intro_to_map_reduce/MapReduceDiagram.graffle +0 -0
  44. data/doc/links.textile +42 -0
  45. data/doc/overview.textile +91 -0
  46. data/doc/pig/PigLatinExpressionsList.txt +122 -0
  47. data/doc/pig/PigLatinReferenceManual.html +19134 -0
  48. data/doc/pig/PigLatinReferenceManual.txt +1640 -0
  49. data/doc/tips.textile +116 -0
  50. data/doc/usage.textile +102 -0
  51. data/doc/utils.textile +48 -0
  52. data/examples/README.txt +17 -0
  53. data/examples/and_pig/sample_queries.rb +128 -0
  54. data/examples/apache_log_parser.rb +53 -0
  55. data/examples/count_keys.rb +56 -0
  56. data/examples/count_keys_at_mapper.rb +57 -0
  57. data/examples/graph/adjacency_list.rb +74 -0
  58. data/examples/graph/breadth_first_search.rb +79 -0
  59. data/examples/graph/gen_2paths.rb +68 -0
  60. data/examples/graph/gen_multi_edge.rb +103 -0
  61. data/examples/graph/gen_symmetric_links.rb +53 -0
  62. data/examples/package-local.rb +100 -0
  63. data/examples/package.rb +96 -0
  64. data/examples/pagerank/README.textile +6 -0
  65. data/examples/pagerank/gen_initial_pagerank_graph.pig +57 -0
  66. data/examples/pagerank/pagerank.rb +88 -0
  67. data/examples/pagerank/pagerank_initialize.rb +46 -0
  68. data/examples/pagerank/run_pagerank.sh +19 -0
  69. data/examples/rank_and_bin.rb +173 -0
  70. data/examples/run_all.sh +47 -0
  71. data/examples/sample_records.rb +44 -0
  72. data/examples/size.rb +60 -0
  73. data/examples/word_count.rb +95 -0
  74. data/lib/wukong.rb +11 -0
  75. data/lib/wukong/and_pig.rb +62 -0
  76. data/lib/wukong/and_pig/README.textile +12 -0
  77. data/lib/wukong/and_pig/as.rb +37 -0
  78. data/lib/wukong/and_pig/data_types.rb +30 -0
  79. data/lib/wukong/and_pig/functions.rb +50 -0
  80. data/lib/wukong/and_pig/generate.rb +85 -0
  81. data/lib/wukong/and_pig/generate/variable_inflections.rb +82 -0
  82. data/lib/wukong/and_pig/junk.rb +51 -0
  83. data/lib/wukong/and_pig/operators.rb +8 -0
  84. data/lib/wukong/and_pig/operators/compound.rb +29 -0
  85. data/lib/wukong/and_pig/operators/evaluators.rb +7 -0
  86. data/lib/wukong/and_pig/operators/execution.rb +15 -0
  87. data/lib/wukong/and_pig/operators/file_methods.rb +29 -0
  88. data/lib/wukong/and_pig/operators/foreach.rb +98 -0
  89. data/lib/wukong/and_pig/operators/groupies.rb +212 -0
  90. data/lib/wukong/and_pig/operators/load_store.rb +65 -0
  91. data/lib/wukong/and_pig/operators/meta.rb +42 -0
  92. data/lib/wukong/and_pig/operators/relational.rb +129 -0
  93. data/lib/wukong/and_pig/pig_struct.rb +48 -0
  94. data/lib/wukong/and_pig/pig_var.rb +95 -0
  95. data/lib/wukong/and_pig/symbol.rb +29 -0
  96. data/lib/wukong/and_pig/utils.rb +0 -0
  97. data/lib/wukong/bad_record.rb +18 -0
  98. data/lib/wukong/boot.rb +47 -0
  99. data/lib/wukong/datatypes.rb +24 -0
  100. data/lib/wukong/datatypes/enum.rb +123 -0
  101. data/lib/wukong/dfs.rb +80 -0
  102. data/lib/wukong/encoding.rb +111 -0
  103. data/lib/wukong/extensions.rb +15 -0
  104. data/lib/wukong/extensions/array.rb +18 -0
  105. data/lib/wukong/extensions/blank.rb +93 -0
  106. data/lib/wukong/extensions/class.rb +189 -0
  107. data/lib/wukong/extensions/date_time.rb +24 -0
  108. data/lib/wukong/extensions/emittable.rb +82 -0
  109. data/lib/wukong/extensions/hash.rb +120 -0
  110. data/lib/wukong/extensions/hash_like.rb +119 -0
  111. data/lib/wukong/extensions/hashlike_class.rb +47 -0
  112. data/lib/wukong/extensions/module.rb +2 -0
  113. data/lib/wukong/extensions/pathname.rb +27 -0
  114. data/lib/wukong/extensions/string.rb +65 -0
  115. data/lib/wukong/extensions/struct.rb +17 -0
  116. data/lib/wukong/extensions/symbol.rb +11 -0
  117. data/lib/wukong/logger.rb +53 -0
  118. data/lib/wukong/models/graph.rb +27 -0
  119. data/lib/wukong/rdf.rb +104 -0
  120. data/lib/wukong/schema.rb +37 -0
  121. data/lib/wukong/script.rb +265 -0
  122. data/lib/wukong/script/hadoop_command.rb +111 -0
  123. data/lib/wukong/script/local_command.rb +14 -0
  124. data/lib/wukong/streamer.rb +13 -0
  125. data/lib/wukong/streamer/accumulating_reducer.rb +89 -0
  126. data/lib/wukong/streamer/base.rb +76 -0
  127. data/lib/wukong/streamer/count_keys.rb +30 -0
  128. data/lib/wukong/streamer/count_lines.rb +26 -0
  129. data/lib/wukong/streamer/filter.rb +20 -0
  130. data/lib/wukong/streamer/line_streamer.rb +12 -0
  131. data/lib/wukong/streamer/list_reducer.rb +20 -0
  132. data/lib/wukong/streamer/preprocess_with_pipe_streamer.rb +22 -0
  133. data/lib/wukong/streamer/rank_and_bin_reducer.rb +145 -0
  134. data/lib/wukong/streamer/set_reducer.rb +14 -0
  135. data/lib/wukong/streamer/struct_streamer.rb +48 -0
  136. data/lib/wukong/streamer/summing_reducer.rb +29 -0
  137. data/lib/wukong/streamer/uniq_by_last_reducer.rb +44 -0
  138. data/lib/wukong/typed_struct.rb +12 -0
  139. data/lib/wukong/wukong_class.rb +21 -0
  140. data/spec/bin/hdp-wc_spec.rb +4 -0
  141. data/spec/spec_helper.rb +0 -0
  142. data/wukong.gemspec +179 -0
  143. metadata +214 -0
@@ -0,0 +1,145 @@
1
+ module Wukong
2
+ module Streamer
3
+
4
+ #
5
+ # Bin and order a partitioned subset of keys
6
+ #
7
+ # For each record, appends a
8
+ #
9
+ # * numbering, from 0..(n-1). Each element gets a distinct numbering based on
10
+ # the order seen at the reducer; elements with identical keys might have
11
+ # different numbering on different runs.
12
+ #
13
+ # * rank, a number within 1..n giving the "place" of each value. Each element
14
+ # receives a successive (and thus unique) numbering, but all elements with
15
+ # the same key share the same rank. The first element for a given rank has
16
+ #
17
+ # (rank == numbering + 1)
18
+ #
19
+ # * bin, a number assigning keys by rank into a smaller number of groups. You
20
+ # must supply command line arguments
21
+ #
22
+ # --bins=[number] --total_count=[number]
23
+ #
24
+ # giving the number of groups and predicting in advance the total number of
25
+ # records. (Or override the bin assignment method to use your own damn
26
+ # strategy).
27
+ #
28
+ # If your data looked (in order) as follows, and 4 bins were requested:
29
+ #
30
+ # data: 1 1 1 2.3 7 69 79 79 80 81 81
31
+ # numbering: 0 1 2 3 4 5 6 7 8 9 10
32
+ # rank: 1 1 1 4 5 6 7 7 9 10 10
33
+ # 4-bin: 1 1 1 2 2 3 3 3 4 4 4
34
+ #
35
+ # If instead 100 bins were requested,
36
+ #
37
+ # data: 1 1 1 2.3 7 69 79 79 80 81 81
38
+ # numbering: 0 1 2 3 4 5 6 7 8 9 10
39
+ # rank: 1 1 1 4 5 6 7 7 9 10 10
40
+ # 100-bin: 1 1 1 28 37 46 55 55 73 82 91
41
+ #
42
+ # Note most of the bins are empty, and that the
43
+ #
44
+ # --------------------------------------------------------------------------
45
+ #
46
+ # Note that in this implementation each reducer numbers its own subset of
47
+ # elements from 1..total_count. If you want to number your whole dataset,
48
+ # you'll have to set @:reduce_tasks => 1@ in your Script's
49
+ # Script#default_options.
50
+ #
51
+ # You might feel a bit better about yourself if you can bin several fields
52
+ # (or subsets) at once. The :partition_fields option to Wukong::Script
53
+ # (which requests a KeyFieldBasedPartitioner) can be used to route different
54
+ # subsets to (possibly) distinct reducers.
55
+ #
56
+ # See the [examples/rank_and_bin_fields.rb] example script for an
57
+ # implementation of this. (And note the thing you have to do in case one
58
+ # reducer sees multiple partitions).
59
+ #
60
+ # It would surely be best to use a total sort and supply each reducer with the
61
+ # initial rank of its run.
62
+ #
63
+ class RankAndBinReducer < Wukong::Streamer::Base
64
+ attr_accessor :bin_size
65
+ def initialize options
66
+ super options
67
+ configure_bins! options
68
+ reset_order_params!
69
+ end
70
+
71
+ # ===========================================================================
72
+ #
73
+ # Order parameters (numbering, bin and rank)
74
+ #
75
+
76
+ #
77
+ # Key used to assign ranking -- elements with identical keys have identical
78
+ # rank.
79
+ #
80
+ def get_key *args
81
+ args.first
82
+ end
83
+
84
+ def reset_order_params!
85
+ @last_key = nil
86
+ @numbering = 0
87
+ @rank = 1
88
+ end
89
+
90
+ #
91
+ # The ranking is the "place" of each value: each element receives a
92
+ # successive (and thus unique) numbering, but all elements with the same key
93
+ # share the same rank. The first element for a given rank has
94
+ #
95
+ # (rank == numbering + 1)
96
+ #
97
+ def get_rank key
98
+ if @last_key != key
99
+ @rank = @numbering + 1
100
+ @last_key = key
101
+ end
102
+ @rank
103
+ end
104
+
105
+ #
106
+ # Set the bin from the current rank
107
+ # elements with identical keys land in identical bins.
108
+ #
109
+ def get_bin rank
110
+ ((rank-0.5) / bin_size ).floor + 1
111
+ end
112
+
113
+ #
114
+ # Return the numbering, rank and bin for the given key
115
+ #
116
+ def get_order_params key
117
+ numbering = @numbering # use un-incremented value
118
+ rank = get_rank key
119
+ bin = get_bin rank
120
+ @numbering += 1
121
+ [numbering, rank, bin]
122
+ end
123
+
124
+ def configure_bins! options
125
+ case
126
+ when options[:bins]
127
+ total_count = options[:total_count].to_f
128
+ bins = options[:bins].to_i
129
+ unless total_count && (total_count != 0) then raise "To set the bin (%ile) size using --bins, we need to know the total count in advance. Please supply the total_count option." end
130
+ self.bin_size = (total_count / bins)
131
+ # $stderr.puts "Splitting %s records into %s bins of size %f. First element gets bin %d, last gets bin %d, median gets bin %d/%d" %
132
+ # [total_count, bins, bin_size, get_bin(1), get_bin(total_count), get_bin(((total_count+1)/2.0).floor), get_bin(((total_count+1)/2.0).ceil)]
133
+ else
134
+ raise "Please specify a number of --bins= and a --total_count= or your own strategy to bin the ranked items."
135
+ end
136
+ end
137
+
138
+ def process *fields
139
+ numbering, rank, bin = get_order_params(get_key(*fields))
140
+ yield fields.to_flat + [numbering, rank, bin]
141
+ end
142
+ end
143
+
144
+ end
145
+ end
@@ -0,0 +1,14 @@
1
+ require 'wukong/streamer/list_reducer'
2
+ module Wukong
3
+ module Streamer
4
+ #
5
+ # Emit each unique key and the count of its occurrences
6
+ #
7
+ class SetReducer < Wukong::Streamer::ListReducer
8
+ # Begin with an empty set
9
+ def start! *args
10
+ self.values = Set.new
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,48 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Wukong
3
+ module Streamer
4
+ #
5
+ # Mix StructRecordizer into any streamer to make it accept a stream of
6
+ # objects -- the first field in each line is turned into a class and used to
7
+ # instantiate an object using the remaining fields on that line.
8
+ #
9
+ module StructRecordizer
10
+
11
+ #
12
+ # Turned the first field into a class name, then use the remaining fields
13
+ # on that line to instantiate the object to process.
14
+ #
15
+ def self.recordize rsrc, *fields
16
+ klass_name, suffix = rsrc.split('-', 2)
17
+ klass = Wukong.class_from_resource(klass_name) or return
18
+ # instantiate the class using the remaining fields on that line
19
+ begin
20
+ [ klass.new(*fields), suffix ]
21
+ rescue ArgumentError => e
22
+ warn "Couldn't instantiate: #{e} (#{[rsrc, fields].inspect})"
23
+ return
24
+ rescue Exception => e
25
+ raise [e, rsrc, fields].inspect
26
+ end
27
+ end
28
+
29
+ #
30
+ #
31
+ #
32
+ def recordize line
33
+ StructRecordizer.recordize *line.split("\t")
34
+ end
35
+ end
36
+
37
+ #
38
+ # Processes file as a stream of objects -- the first field in each line is
39
+ # turned into a class and used to instantiate an object using the remaining
40
+ # fields on that line.
41
+ #
42
+ # See [StructRecordizer] for more.
43
+ #
44
+ class StructStreamer < Wukong::Streamer::Base
45
+ include StructRecordizer
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,29 @@
1
+ module Wukong
2
+ module Streamer
3
+ #
4
+ # Emit each unique key and the count of its occurrences
5
+ #
6
+ class SummingReducer < Wukong::Streamer::AccumulatingReducer
7
+ attr_accessor :summing_elements
8
+ attr_accessor :sums
9
+
10
+ # reset the counter to zero
11
+ def start! *args
12
+ self.sums = summing_elements.map{ 0 }
13
+ end
14
+
15
+ # record one more for this key
16
+ def accumulate *fields
17
+ vals = fields.values_at( *summing_elements )
18
+ vals.each_with_index{|val,idx| self.sums[idx] += val.to_i }
19
+ end
20
+
21
+ # emit each key field and the count, tab-separated.
22
+ def finalize
23
+ yield [key, sums].flatten
24
+ end
25
+ end
26
+
27
+ end
28
+ end
29
+
@@ -0,0 +1,44 @@
1
+ module Wukong
2
+ module Streamer
3
+ #
4
+ # Accumulate acts like an insecure high-school kid, for each key adopting in
5
+ # turn the latest value seen. It then emits the last (in sort order) value
6
+ # for that key.
7
+ #
8
+ # For example, to extract the *latest* value for each property, set hadoop
9
+ # to use <resource, item_id, timestamp> as sort fields and <resource,
10
+ # item_id> as key fields.
11
+ #
12
+ class UniqByLastReducer < Wukong::Streamer::AccumulatingReducer
13
+ attr_accessor :final_value
14
+
15
+ #
16
+ # Use first two fields as keys by default
17
+ #
18
+ def get_key *vals
19
+ vals[0..1]
20
+ end
21
+
22
+ #
23
+ # Adopt each value in turn: the last one's the one you want.
24
+ #
25
+ def accumulate *vals
26
+ self.final_value = vals
27
+ end
28
+
29
+ #
30
+ # Emit the last-seen value
31
+ #
32
+ def finalize
33
+ yield final_value if final_value
34
+ end
35
+
36
+ #
37
+ # Clear state on reset
38
+ #
39
+ def start! *args
40
+ self.final_value = nil
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,12 @@
1
+ class TypedStruct < Struct
2
+ def self.new *members_types
3
+ members, mtypes = members_types.transpose
4
+ klass = Struct.new *members.map(&:to_sym)
5
+ klass.class_eval do
6
+ cattr_accessor :mtypes, :members_types
7
+ self.mtypes = mtypes
8
+ self.members_types = Hash.zip(members, mtypes)
9
+ end
10
+ klass
11
+ end
12
+ end
@@ -0,0 +1,21 @@
1
+ # require 'active_support/core_ext/class/inheritable_attributes.rb'
2
+ require 'extlib/class'
3
+
4
+ module Wukong
5
+ #
6
+ # Use to instrument an actual class to behave
7
+ #
8
+ module WukongClass
9
+
10
+
11
+ def [](attr)
12
+ self.send attr
13
+ end
14
+ def []=(attr, val)
15
+ self.send("#{attr}=", val)
16
+ end
17
+
18
+ end
19
+
20
+
21
+ end
@@ -0,0 +1,4 @@
1
+ # -*- coding: undecided -*-
2
+
3
+
4
+ I18N_STR = "Iñtërnâtiônàlizætiøn hasn't happened to ruby 1.8 yet"
File without changes
data/wukong.gemspec ADDED
@@ -0,0 +1,179 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{wukong}
8
+ s.version = "0.1.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Philip (flip) Kromer"]
12
+ s.date = %q{2009-09-28}
13
+ s.description = %q{ Treat your dataset like a:
14
+
15
+ * stream of lines when it’s efficient to process by lines
16
+ * stream of field arrays when it’s efficient to deal directly with fields
17
+ * stream of lightweight objects when it’s efficient to deal with objects
18
+
19
+ Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.
20
+ }
21
+ s.email = %q{flip@infochimps.org}
22
+ s.executables = ["cutc", "cuttab", "greptrue", "hdp-cat", "hdp-catd", "hdp-du", "hdp-get", "hdp-kill", "hdp-ls", "hdp-mkdir", "hdp-mv", "hdp-parts_to_keys.rb", "hdp-ps", "hdp-put", "hdp-rm", "hdp-sort", "hdp-stream", "hdp-stream-flat", "hdp-sync", "hdp-wc", "md5sort", "tabchar", "uniqc", "wu-hist", "wu-lign", "wu-sum"]
23
+ s.extra_rdoc_files = [
24
+ "LICENSE.textile",
25
+ "README.textile"
26
+ ]
27
+ s.files = [
28
+ "doc/INSTALL.textile",
29
+ "doc/LICENSE.textile",
30
+ "doc/README-tutorial.textile",
31
+ "doc/README-wulign.textile",
32
+ "doc/README-wutils.textile",
33
+ "doc/TODO.textile",
34
+ "doc/UsingWukong-part1-setup.textile",
35
+ "doc/UsingWukong-part2-scraping.textile",
36
+ "doc/UsingWukong-part3-parsing.textile",
37
+ "doc/code/api_response_example.txt",
38
+ "doc/code/parser_skeleton.rb",
39
+ "doc/hadoop-nfs.textile",
40
+ "doc/hadoop-setup.textile",
41
+ "doc/index.textile",
42
+ "doc/intro_to_map_reduce/MapReduceDiagram.graffle",
43
+ "doc/links.textile",
44
+ "doc/overview.textile",
45
+ "doc/pig/PigLatinExpressionsList.txt",
46
+ "doc/pig/PigLatinReferenceManual.html",
47
+ "doc/pig/PigLatinReferenceManual.txt",
48
+ "doc/tips.textile",
49
+ "doc/usage.textile",
50
+ "doc/utils.textile",
51
+ "examples/README.txt",
52
+ "examples/and_pig/sample_queries.rb",
53
+ "examples/apache_log_parser.rb",
54
+ "examples/count_keys.rb",
55
+ "examples/count_keys_at_mapper.rb",
56
+ "examples/graph/adjacency_list.rb",
57
+ "examples/graph/breadth_first_search.rb",
58
+ "examples/graph/gen_2paths.rb",
59
+ "examples/graph/gen_multi_edge.rb",
60
+ "examples/graph/gen_symmetric_links.rb",
61
+ "examples/package-local.rb",
62
+ "examples/package.rb",
63
+ "examples/pagerank/README.textile",
64
+ "examples/pagerank/gen_initial_pagerank_graph.pig",
65
+ "examples/pagerank/pagerank.rb",
66
+ "examples/pagerank/pagerank_initialize.rb",
67
+ "examples/pagerank/run_pagerank.sh",
68
+ "examples/rank_and_bin.rb",
69
+ "examples/run_all.sh",
70
+ "examples/sample_records.rb",
71
+ "examples/size.rb",
72
+ "examples/word_count.rb",
73
+ "lib/wukong.rb",
74
+ "lib/wukong/and_pig.rb",
75
+ "lib/wukong/and_pig/README.textile",
76
+ "lib/wukong/and_pig/as.rb",
77
+ "lib/wukong/and_pig/data_types.rb",
78
+ "lib/wukong/and_pig/functions.rb",
79
+ "lib/wukong/and_pig/generate.rb",
80
+ "lib/wukong/and_pig/generate/variable_inflections.rb",
81
+ "lib/wukong/and_pig/junk.rb",
82
+ "lib/wukong/and_pig/operators.rb",
83
+ "lib/wukong/and_pig/operators/compound.rb",
84
+ "lib/wukong/and_pig/operators/evaluators.rb",
85
+ "lib/wukong/and_pig/operators/execution.rb",
86
+ "lib/wukong/and_pig/operators/file_methods.rb",
87
+ "lib/wukong/and_pig/operators/foreach.rb",
88
+ "lib/wukong/and_pig/operators/groupies.rb",
89
+ "lib/wukong/and_pig/operators/load_store.rb",
90
+ "lib/wukong/and_pig/operators/meta.rb",
91
+ "lib/wukong/and_pig/operators/relational.rb",
92
+ "lib/wukong/and_pig/pig_struct.rb",
93
+ "lib/wukong/and_pig/pig_var.rb",
94
+ "lib/wukong/and_pig/symbol.rb",
95
+ "lib/wukong/and_pig/utils.rb",
96
+ "lib/wukong/bad_record.rb",
97
+ "lib/wukong/boot.rb",
98
+ "lib/wukong/datatypes.rb",
99
+ "lib/wukong/datatypes/enum.rb",
100
+ "lib/wukong/dfs.rb",
101
+ "lib/wukong/encoding.rb",
102
+ "lib/wukong/extensions.rb",
103
+ "lib/wukong/extensions/array.rb",
104
+ "lib/wukong/extensions/blank.rb",
105
+ "lib/wukong/extensions/class.rb",
106
+ "lib/wukong/extensions/date_time.rb",
107
+ "lib/wukong/extensions/emittable.rb",
108
+ "lib/wukong/extensions/hash.rb",
109
+ "lib/wukong/extensions/hash_like.rb",
110
+ "lib/wukong/extensions/hashlike_class.rb",
111
+ "lib/wukong/extensions/module.rb",
112
+ "lib/wukong/extensions/pathname.rb",
113
+ "lib/wukong/extensions/string.rb",
114
+ "lib/wukong/extensions/struct.rb",
115
+ "lib/wukong/extensions/symbol.rb",
116
+ "lib/wukong/logger.rb",
117
+ "lib/wukong/models/graph.rb",
118
+ "lib/wukong/rdf.rb",
119
+ "lib/wukong/schema.rb",
120
+ "lib/wukong/script.rb",
121
+ "lib/wukong/script/hadoop_command.rb",
122
+ "lib/wukong/script/local_command.rb",
123
+ "lib/wukong/streamer.rb",
124
+ "lib/wukong/streamer/accumulating_reducer.rb",
125
+ "lib/wukong/streamer/base.rb",
126
+ "lib/wukong/streamer/count_keys.rb",
127
+ "lib/wukong/streamer/count_lines.rb",
128
+ "lib/wukong/streamer/filter.rb",
129
+ "lib/wukong/streamer/line_streamer.rb",
130
+ "lib/wukong/streamer/list_reducer.rb",
131
+ "lib/wukong/streamer/preprocess_with_pipe_streamer.rb",
132
+ "lib/wukong/streamer/rank_and_bin_reducer.rb",
133
+ "lib/wukong/streamer/set_reducer.rb",
134
+ "lib/wukong/streamer/struct_streamer.rb",
135
+ "lib/wukong/streamer/summing_reducer.rb",
136
+ "lib/wukong/streamer/uniq_by_last_reducer.rb",
137
+ "lib/wukong/typed_struct.rb",
138
+ "lib/wukong/wukong_class.rb",
139
+ "spec/bin/hdp-wc_spec.rb",
140
+ "spec/spec_helper.rb",
141
+ "wukong.gemspec"
142
+ ]
143
+ s.homepage = %q{http://github.com/mrflip/wukong}
144
+ s.rdoc_options = ["--charset=UTF-8"]
145
+ s.require_paths = ["lib"]
146
+ s.rubygems_version = %q{1.3.5}
147
+ s.summary = %q{Wukong makes Hadoop so easy a chimpanzee can use it.}
148
+ s.test_files = [
149
+ "spec/bin/hdp-wc_spec.rb",
150
+ "spec/spec_helper.rb",
151
+ "examples/and_pig/sample_queries.rb",
152
+ "examples/apache_log_parser.rb",
153
+ "examples/count_keys.rb",
154
+ "examples/count_keys_at_mapper.rb",
155
+ "examples/graph/adjacency_list.rb",
156
+ "examples/graph/breadth_first_search.rb",
157
+ "examples/graph/gen_2paths.rb",
158
+ "examples/graph/gen_multi_edge.rb",
159
+ "examples/graph/gen_symmetric_links.rb",
160
+ "examples/package-local.rb",
161
+ "examples/package.rb",
162
+ "examples/pagerank/pagerank.rb",
163
+ "examples/pagerank/pagerank_initialize.rb",
164
+ "examples/rank_and_bin.rb",
165
+ "examples/sample_records.rb",
166
+ "examples/size.rb",
167
+ "examples/word_count.rb"
168
+ ]
169
+
170
+ if s.respond_to? :specification_version then
171
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
172
+ s.specification_version = 3
173
+
174
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
175
+ else
176
+ end
177
+ else
178
+ end
179
+ end