wukong 1.5.4 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/CHANGELOG.textile +32 -0
  2. data/README.textile +58 -12
  3. data/TODO.textile +0 -8
  4. data/bin/hdp-bzip +12 -17
  5. data/bin/hdp-kill-task +1 -1
  6. data/bin/hdp-sort +7 -7
  7. data/bin/hdp-stream +7 -7
  8. data/bin/hdp-stream-flat +2 -3
  9. data/bin/setcat +11 -0
  10. data/bin/uniq-ord +59 -0
  11. data/examples/corpus/bucket_counter.rb +47 -0
  12. data/examples/corpus/dbpedia_abstract_to_sentences.rb +85 -0
  13. data/examples/corpus/sentence_coocurrence.rb +70 -0
  14. data/examples/emr/README.textile +110 -0
  15. data/examples/emr/dot_wukong_dir/emr_bootstrap.sh +1 -0
  16. data/examples/emr/elastic_mapreduce_example.rb +2 -2
  17. data/examples/ignore_me/counting.rb +56 -0
  18. data/examples/ignore_me/grouper.rb +71 -0
  19. data/examples/network_graph/adjacency_list.rb +2 -2
  20. data/examples/network_graph/breadth_first_search.rb +14 -21
  21. data/examples/network_graph/gen_multi_edge.rb +22 -13
  22. data/examples/pagerank/pagerank.rb +1 -1
  23. data/examples/pagerank/pagerank_initialize.rb +6 -10
  24. data/examples/sample_records.rb +6 -16
  25. data/examples/server_logs/apache_log_parser.rb +7 -22
  26. data/examples/server_logs/breadcrumbs.rb +39 -0
  27. data/examples/server_logs/logline.rb +27 -0
  28. data/examples/size.rb +3 -2
  29. data/examples/{binning_percentile_estimator.rb → stats/binning_percentile_estimator.rb} +9 -11
  30. data/examples/{rank_and_bin.rb → stats/rank_and_bin.rb} +2 -2
  31. data/examples/stupidly_simple_filter.rb +11 -14
  32. data/examples/word_count.rb +16 -36
  33. data/lib/wukong/and_pig.rb +2 -15
  34. data/lib/wukong/logger.rb +7 -28
  35. data/lib/wukong/periodic_monitor.rb +24 -9
  36. data/lib/wukong/script/emr_command.rb +1 -0
  37. data/lib/wukong/script/hadoop_command.rb +31 -29
  38. data/lib/wukong/script.rb +19 -14
  39. data/lib/wukong/store/cassandra_model.rb +2 -1
  40. data/lib/wukong/streamer/accumulating_reducer.rb +5 -9
  41. data/lib/wukong/streamer/base.rb +44 -3
  42. data/lib/wukong/streamer/counting_reducer.rb +12 -12
  43. data/lib/wukong/streamer/filter.rb +2 -2
  44. data/lib/wukong/streamer/list_reducer.rb +3 -3
  45. data/lib/wukong/streamer/reducer.rb +11 -0
  46. data/lib/wukong/streamer.rb +7 -3
  47. data/lib/wukong.rb +7 -3
  48. data/{examples → old}/cassandra_streaming/berlitz_for_cassandra.textile +0 -0
  49. data/{examples → old}/cassandra_streaming/client_interface_notes.textile +0 -0
  50. data/{examples → old}/cassandra_streaming/client_schema.textile +0 -0
  51. data/{examples → old}/cassandra_streaming/tuning.textile +0 -0
  52. data/wukong.gemspec +257 -285
  53. metadata +45 -62
  54. data/examples/cassandra_streaming/avromapper.rb +0 -85
  55. data/examples/cassandra_streaming/cassandra.avpr +0 -468
  56. data/examples/cassandra_streaming/cassandra_random_partitioner.rb +0 -62
  57. data/examples/cassandra_streaming/catter.sh +0 -45
  58. data/examples/cassandra_streaming/client_schema.avpr +0 -211
  59. data/examples/cassandra_streaming/foofile.avr +0 -0
  60. data/examples/cassandra_streaming/pymap.sh +0 -1
  61. data/examples/cassandra_streaming/pyreduce.sh +0 -1
  62. data/examples/cassandra_streaming/smutation.avpr +0 -188
  63. data/examples/cassandra_streaming/streamer.sh +0 -51
  64. data/examples/cassandra_streaming/struct_loader.rb +0 -24
  65. data/examples/count_keys.rb +0 -56
  66. data/examples/count_keys_at_mapper.rb +0 -57
  67. data/examples/emr/README-elastic_map_reduce.textile +0 -26
  68. data/examples/keystore/cassandra_batch_test.rb +0 -41
  69. data/examples/keystore/conditional_outputter_example.rb +0 -70
  70. data/examples/store/chunked_store_example.rb +0 -18
  71. data/lib/wukong/dfs.rb +0 -81
  72. data/lib/wukong/keystore/cassandra_conditional_outputter.rb +0 -122
  73. data/lib/wukong/keystore/redis_db.rb +0 -24
  74. data/lib/wukong/keystore/tyrant_db.rb +0 -137
  75. data/lib/wukong/keystore/tyrant_notes.textile +0 -145
  76. data/lib/wukong/models/graph.rb +0 -25
  77. data/lib/wukong/monitor/chunked_store.rb +0 -23
  78. data/lib/wukong/monitor/periodic_logger.rb +0 -34
  79. data/lib/wukong/monitor/periodic_monitor.rb +0 -70
  80. data/lib/wukong/monitor.rb +0 -7
  81. data/lib/wukong/rdf.rb +0 -104
  82. data/lib/wukong/streamer/cassandra_streamer.rb +0 -61
  83. data/lib/wukong/streamer/count_keys.rb +0 -30
  84. data/lib/wukong/streamer/count_lines.rb +0 -26
  85. data/lib/wukong/streamer/em_streamer.rb +0 -7
  86. data/lib/wukong/streamer/preprocess_with_pipe_streamer.rb +0 -22
  87. data/lib/wukong/wukong_class.rb +0 -21
data/wukong.gemspec CHANGED
@@ -1,15 +1,15 @@
1
1
  # Generated by jeweler
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{wukong}
8
- s.version = "1.5.4"
8
+ s.version = "2.0.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Philip (flip) Kromer"]
12
- s.date = %q{2010-11-02}
12
+ s.date = %q{2011-01-29}
13
13
  s.description = %q{ Treat your dataset like a:
14
14
 
15
15
  * stream of lines when it's efficient to process by lines
@@ -22,300 +22,272 @@ Gem::Specification.new do |s|
22
22
  s.executables = ["hdp-du", "hdp-sync", "hdp-wc", "wu-lign", "wu-sum", "hdp-parts_to_keys.rb"]
23
23
  s.extra_rdoc_files = [
24
24
  "LICENSE.textile",
25
- "README.textile"
25
+ "README.textile"
26
26
  ]
27
27
  s.files = [
28
28
  "CHANGELOG.textile",
29
- "INSTALL.textile",
30
- "LICENSE.textile",
31
- "README.textile",
32
- "TODO.textile",
33
- "bin/cutc",
34
- "bin/cuttab",
35
- "bin/greptrue",
36
- "bin/hdp-bin",
37
- "bin/hdp-bzip",
38
- "bin/hdp-cat",
39
- "bin/hdp-catd",
40
- "bin/hdp-cp",
41
- "bin/hdp-du",
42
- "bin/hdp-get",
43
- "bin/hdp-kill",
44
- "bin/hdp-kill-task",
45
- "bin/hdp-ls",
46
- "bin/hdp-mkdir",
47
- "bin/hdp-mkdirp",
48
- "bin/hdp-mv",
49
- "bin/hdp-parts_to_keys.rb",
50
- "bin/hdp-ps",
51
- "bin/hdp-put",
52
- "bin/hdp-rm",
53
- "bin/hdp-sort",
54
- "bin/hdp-stream",
55
- "bin/hdp-stream-flat",
56
- "bin/hdp-stream2",
57
- "bin/hdp-sync",
58
- "bin/hdp-wc",
59
- "bin/md5sort",
60
- "bin/tabchar",
61
- "bin/uniqc",
62
- "bin/wu-date",
63
- "bin/wu-datetime",
64
- "bin/wu-hist",
65
- "bin/wu-lign",
66
- "bin/wu-plus",
67
- "bin/wu-sum",
68
- "docpages/INSTALL.textile",
69
- "docpages/LICENSE.textile",
70
- "docpages/README-elastic_map_reduce.textile",
71
- "docpages/README-performance.textile",
72
- "docpages/README-wulign.textile",
73
- "docpages/UsingWukong-part1-get_ready.textile",
74
- "docpages/UsingWukong-part2-ThinkingBigData.textile",
75
- "docpages/UsingWukong-part3-parsing.textile",
76
- "docpages/_config.yml",
77
- "docpages/avro/avro_notes.textile",
78
- "docpages/avro/performance.textile",
79
- "docpages/avro/tethering.textile",
80
- "docpages/bigdata-tips.textile",
81
- "docpages/code/api_response_example.txt",
82
- "docpages/code/parser_skeleton.rb",
83
- "docpages/diagrams/MapReduceDiagram.graffle",
84
- "docpages/favicon.ico",
85
- "docpages/gem.css",
86
- "docpages/hadoop-tips.textile",
87
- "docpages/index.textile",
88
- "docpages/intro.textile",
89
- "docpages/moreinfo.textile",
90
- "docpages/news.html",
91
- "docpages/pig/PigLatinExpressionsList.txt",
92
- "docpages/pig/PigLatinReferenceManual.txt",
93
- "docpages/pig/commandline_params.txt",
94
- "docpages/pig/cookbook.html",
95
- "docpages/pig/images/hadoop-logo.jpg",
96
- "docpages/pig/images/instruction_arrow.png",
97
- "docpages/pig/images/pig-logo.gif",
98
- "docpages/pig/piglatin_ref1.html",
99
- "docpages/pig/piglatin_ref2.html",
100
- "docpages/pig/setup.html",
101
- "docpages/pig/skin/basic.css",
102
- "docpages/pig/skin/breadcrumbs.js",
103
- "docpages/pig/skin/fontsize.js",
104
- "docpages/pig/skin/getBlank.js",
105
- "docpages/pig/skin/getMenu.js",
106
- "docpages/pig/skin/images/chapter.gif",
107
- "docpages/pig/skin/images/chapter_open.gif",
108
- "docpages/pig/skin/images/current.gif",
109
- "docpages/pig/skin/images/external-link.gif",
110
- "docpages/pig/skin/images/header_white_line.gif",
111
- "docpages/pig/skin/images/page.gif",
112
- "docpages/pig/skin/images/pdfdoc.gif",
113
- "docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png",
114
- "docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png",
115
- "docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png",
116
- "docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png",
117
- "docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png",
118
- "docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png",
119
- "docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png",
120
- "docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png",
121
- "docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png",
122
- "docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png",
123
- "docpages/pig/skin/print.css",
124
- "docpages/pig/skin/profile.css",
125
- "docpages/pig/skin/screen.css",
126
- "docpages/pig/tutorial.html",
127
- "docpages/pig/udf.html",
128
- "docpages/tutorial.textile",
129
- "docpages/usage.textile",
130
- "docpages/wutils.textile",
131
- "examples/README.txt",
132
- "examples/binning_percentile_estimator.rb",
133
- "examples/cassandra_streaming/avromapper.rb",
134
- "examples/cassandra_streaming/berlitz_for_cassandra.textile",
135
- "examples/cassandra_streaming/cassandra.avpr",
136
- "examples/cassandra_streaming/cassandra_random_partitioner.rb",
137
- "examples/cassandra_streaming/catter.sh",
138
- "examples/cassandra_streaming/client_interface_notes.textile",
139
- "examples/cassandra_streaming/client_schema.avpr",
140
- "examples/cassandra_streaming/client_schema.textile",
141
- "examples/cassandra_streaming/foofile.avr",
142
- "examples/cassandra_streaming/pymap.sh",
143
- "examples/cassandra_streaming/pyreduce.sh",
144
- "examples/cassandra_streaming/smutation.avpr",
145
- "examples/cassandra_streaming/streamer.sh",
146
- "examples/cassandra_streaming/struct_loader.rb",
147
- "examples/cassandra_streaming/tuning.textile",
148
- "examples/contrib/jeans/README.markdown",
149
- "examples/contrib/jeans/data/normalized_sizes",
150
- "examples/contrib/jeans/data/orders.tsv",
151
- "examples/contrib/jeans/data/sizes",
152
- "examples/contrib/jeans/normalize.rb",
153
- "examples/contrib/jeans/sizes.rb",
154
- "examples/corpus/words_to_bigrams.rb",
155
- "examples/count_keys.rb",
156
- "examples/count_keys_at_mapper.rb",
157
- "examples/emr/README-elastic_map_reduce.textile",
158
- "examples/emr/dot_wukong_dir/credentials.json",
159
- "examples/emr/dot_wukong_dir/emr.yaml",
160
- "examples/emr/dot_wukong_dir/emr_bootstrap.sh",
161
- "examples/emr/elastic_mapreduce_example.rb",
162
- "examples/keystore/cassandra_batch_test.rb",
163
- "examples/keystore/conditional_outputter_example.rb",
164
- "examples/network_graph/adjacency_list.rb",
165
- "examples/network_graph/breadth_first_search.rb",
166
- "examples/network_graph/gen_2paths.rb",
167
- "examples/network_graph/gen_multi_edge.rb",
168
- "examples/network_graph/gen_symmetric_links.rb",
169
- "examples/pagerank/README.textile",
170
- "examples/pagerank/gen_initial_pagerank_graph.pig",
171
- "examples/pagerank/pagerank.rb",
172
- "examples/pagerank/pagerank_initialize.rb",
173
- "examples/pagerank/run_pagerank.sh",
174
- "examples/rank_and_bin.rb",
175
- "examples/sample_records.rb",
176
- "examples/server_logs/apache_log_parser.rb",
177
- "examples/server_logs/breadcrumbs.rb",
178
- "examples/server_logs/user_agent.rb",
179
- "examples/size.rb",
180
- "examples/stats/avg_value_frequency.rb",
181
- "examples/stats/data/avg_value_frequency.tsv",
182
- "examples/store/chunked_store_example.rb",
183
- "examples/stupidly_simple_filter.rb",
184
- "examples/word_count.rb",
185
- "lib/wukong.rb",
186
- "lib/wukong/and_pig.rb",
187
- "lib/wukong/bad_record.rb",
188
- "lib/wukong/datatypes.rb",
189
- "lib/wukong/datatypes/enum.rb",
190
- "lib/wukong/datatypes/fake_types.rb",
191
- "lib/wukong/dfs.rb",
192
- "lib/wukong/encoding.rb",
193
- "lib/wukong/encoding/asciize.rb",
194
- "lib/wukong/extensions.rb",
195
- "lib/wukong/extensions/array.rb",
196
- "lib/wukong/extensions/blank.rb",
197
- "lib/wukong/extensions/class.rb",
198
- "lib/wukong/extensions/date_time.rb",
199
- "lib/wukong/extensions/emittable.rb",
200
- "lib/wukong/extensions/enumerable.rb",
201
- "lib/wukong/extensions/hash.rb",
202
- "lib/wukong/extensions/hash_keys.rb",
203
- "lib/wukong/extensions/hash_like.rb",
204
- "lib/wukong/extensions/hashlike_class.rb",
205
- "lib/wukong/extensions/module.rb",
206
- "lib/wukong/extensions/pathname.rb",
207
- "lib/wukong/extensions/string.rb",
208
- "lib/wukong/extensions/struct.rb",
209
- "lib/wukong/extensions/symbol.rb",
210
- "lib/wukong/filename_pattern.rb",
211
- "lib/wukong/keystore/cassandra_conditional_outputter.rb",
212
- "lib/wukong/keystore/redis_db.rb",
213
- "lib/wukong/keystore/tyrant_db.rb",
214
- "lib/wukong/keystore/tyrant_notes.textile",
215
- "lib/wukong/logger.rb",
216
- "lib/wukong/models/graph.rb",
217
- "lib/wukong/monitor.rb",
218
- "lib/wukong/monitor/chunked_store.rb",
219
- "lib/wukong/monitor/periodic_logger.rb",
220
- "lib/wukong/monitor/periodic_monitor.rb",
221
- "lib/wukong/periodic_monitor.rb",
222
- "lib/wukong/rdf.rb",
223
- "lib/wukong/schema.rb",
224
- "lib/wukong/script.rb",
225
- "lib/wukong/script/avro_command.rb",
226
- "lib/wukong/script/cassandra_loader_script.rb",
227
- "lib/wukong/script/emr_command.rb",
228
- "lib/wukong/script/hadoop_command.rb",
229
- "lib/wukong/script/local_command.rb",
230
- "lib/wukong/store.rb",
231
- "lib/wukong/store/base.rb",
232
- "lib/wukong/store/cassandra.rb",
233
- "lib/wukong/store/cassandra/streaming.rb",
234
- "lib/wukong/store/cassandra/struct_loader.rb",
235
- "lib/wukong/store/cassandra_model.rb",
236
- "lib/wukong/store/chh_chunked_flat_file_store.rb",
237
- "lib/wukong/store/chunked_flat_file_store.rb",
238
- "lib/wukong/store/conditional_store.rb",
239
- "lib/wukong/store/factory.rb",
240
- "lib/wukong/store/flat_file_store.rb",
241
- "lib/wukong/store/key_store.rb",
242
- "lib/wukong/store/null_store.rb",
243
- "lib/wukong/store/read_thru_store.rb",
244
- "lib/wukong/store/tokyo_tdb_key_store.rb",
245
- "lib/wukong/store/tyrant_rdb_key_store.rb",
246
- "lib/wukong/store/tyrant_tdb_key_store.rb",
247
- "lib/wukong/streamer.rb",
248
- "lib/wukong/streamer/accumulating_reducer.rb",
249
- "lib/wukong/streamer/base.rb",
250
- "lib/wukong/streamer/cassandra_streamer.rb",
251
- "lib/wukong/streamer/count_keys.rb",
252
- "lib/wukong/streamer/count_lines.rb",
253
- "lib/wukong/streamer/counting_reducer.rb",
254
- "lib/wukong/streamer/em_streamer.rb",
255
- "lib/wukong/streamer/filter.rb",
256
- "lib/wukong/streamer/line_streamer.rb",
257
- "lib/wukong/streamer/list_reducer.rb",
258
- "lib/wukong/streamer/preprocess_with_pipe_streamer.rb",
259
- "lib/wukong/streamer/rank_and_bin_reducer.rb",
260
- "lib/wukong/streamer/record_streamer.rb",
261
- "lib/wukong/streamer/set_reducer.rb",
262
- "lib/wukong/streamer/struct_streamer.rb",
263
- "lib/wukong/streamer/summing_reducer.rb",
264
- "lib/wukong/streamer/uniq_by_last_reducer.rb",
265
- "lib/wukong/typed_struct.rb",
266
- "lib/wukong/wukong_class.rb",
267
- "spec/data/a_atsigns_b.tsv",
268
- "spec/data/a_follows_b.tsv",
269
- "spec/data/tweet.tsv",
270
- "spec/data/twitter_user.tsv",
271
- "spec/spec.opts",
272
- "spec/spec_helper.rb",
273
- "spec/wukong/encoding_spec.rb",
274
- "spec/wukong/script_spec.rb",
275
- "wukong.gemspec"
29
+ "INSTALL.textile",
30
+ "LICENSE.textile",
31
+ "README.textile",
32
+ "TODO.textile",
33
+ "bin/cutc",
34
+ "bin/cuttab",
35
+ "bin/greptrue",
36
+ "bin/hdp-bin",
37
+ "bin/hdp-bzip",
38
+ "bin/hdp-cat",
39
+ "bin/hdp-catd",
40
+ "bin/hdp-cp",
41
+ "bin/hdp-du",
42
+ "bin/hdp-get",
43
+ "bin/hdp-kill",
44
+ "bin/hdp-kill-task",
45
+ "bin/hdp-ls",
46
+ "bin/hdp-mkdir",
47
+ "bin/hdp-mkdirp",
48
+ "bin/hdp-mv",
49
+ "bin/hdp-parts_to_keys.rb",
50
+ "bin/hdp-ps",
51
+ "bin/hdp-put",
52
+ "bin/hdp-rm",
53
+ "bin/hdp-sort",
54
+ "bin/hdp-stream",
55
+ "bin/hdp-stream-flat",
56
+ "bin/hdp-stream2",
57
+ "bin/hdp-sync",
58
+ "bin/hdp-wc",
59
+ "bin/md5sort",
60
+ "bin/setcat",
61
+ "bin/tabchar",
62
+ "bin/uniq-ord",
63
+ "bin/uniqc",
64
+ "bin/wu-date",
65
+ "bin/wu-datetime",
66
+ "bin/wu-hist",
67
+ "bin/wu-lign",
68
+ "bin/wu-plus",
69
+ "bin/wu-sum",
70
+ "docpages/INSTALL.textile",
71
+ "docpages/LICENSE.textile",
72
+ "docpages/README-elastic_map_reduce.textile",
73
+ "docpages/README-performance.textile",
74
+ "docpages/README-wulign.textile",
75
+ "docpages/UsingWukong-part1-get_ready.textile",
76
+ "docpages/UsingWukong-part2-ThinkingBigData.textile",
77
+ "docpages/UsingWukong-part3-parsing.textile",
78
+ "docpages/_config.yml",
79
+ "docpages/avro/avro_notes.textile",
80
+ "docpages/avro/performance.textile",
81
+ "docpages/avro/tethering.textile",
82
+ "docpages/bigdata-tips.textile",
83
+ "docpages/code/api_response_example.txt",
84
+ "docpages/code/parser_skeleton.rb",
85
+ "docpages/diagrams/MapReduceDiagram.graffle",
86
+ "docpages/favicon.ico",
87
+ "docpages/gem.css",
88
+ "docpages/hadoop-tips.textile",
89
+ "docpages/index.textile",
90
+ "docpages/intro.textile",
91
+ "docpages/moreinfo.textile",
92
+ "docpages/news.html",
93
+ "docpages/pig/PigLatinExpressionsList.txt",
94
+ "docpages/pig/PigLatinReferenceManual.txt",
95
+ "docpages/pig/commandline_params.txt",
96
+ "docpages/pig/cookbook.html",
97
+ "docpages/pig/images/hadoop-logo.jpg",
98
+ "docpages/pig/images/instruction_arrow.png",
99
+ "docpages/pig/images/pig-logo.gif",
100
+ "docpages/pig/piglatin_ref1.html",
101
+ "docpages/pig/piglatin_ref2.html",
102
+ "docpages/pig/setup.html",
103
+ "docpages/pig/skin/basic.css",
104
+ "docpages/pig/skin/breadcrumbs.js",
105
+ "docpages/pig/skin/fontsize.js",
106
+ "docpages/pig/skin/getBlank.js",
107
+ "docpages/pig/skin/getMenu.js",
108
+ "docpages/pig/skin/images/chapter.gif",
109
+ "docpages/pig/skin/images/chapter_open.gif",
110
+ "docpages/pig/skin/images/current.gif",
111
+ "docpages/pig/skin/images/external-link.gif",
112
+ "docpages/pig/skin/images/header_white_line.gif",
113
+ "docpages/pig/skin/images/page.gif",
114
+ "docpages/pig/skin/images/pdfdoc.gif",
115
+ "docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png",
116
+ "docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png",
117
+ "docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png",
118
+ "docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png",
119
+ "docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png",
120
+ "docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png",
121
+ "docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png",
122
+ "docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png",
123
+ "docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png",
124
+ "docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png",
125
+ "docpages/pig/skin/print.css",
126
+ "docpages/pig/skin/profile.css",
127
+ "docpages/pig/skin/screen.css",
128
+ "docpages/pig/tutorial.html",
129
+ "docpages/pig/udf.html",
130
+ "docpages/tutorial.textile",
131
+ "docpages/usage.textile",
132
+ "docpages/wutils.textile",
133
+ "examples/README.txt",
134
+ "examples/contrib/jeans/README.markdown",
135
+ "examples/contrib/jeans/data/normalized_sizes",
136
+ "examples/contrib/jeans/data/orders.tsv",
137
+ "examples/contrib/jeans/data/sizes",
138
+ "examples/contrib/jeans/normalize.rb",
139
+ "examples/contrib/jeans/sizes.rb",
140
+ "examples/corpus/bucket_counter.rb",
141
+ "examples/corpus/dbpedia_abstract_to_sentences.rb",
142
+ "examples/corpus/sentence_coocurrence.rb",
143
+ "examples/corpus/words_to_bigrams.rb",
144
+ "examples/emr/README.textile",
145
+ "examples/emr/dot_wukong_dir/credentials.json",
146
+ "examples/emr/dot_wukong_dir/emr.yaml",
147
+ "examples/emr/dot_wukong_dir/emr_bootstrap.sh",
148
+ "examples/emr/elastic_mapreduce_example.rb",
149
+ "examples/ignore_me/counting.rb",
150
+ "examples/ignore_me/grouper.rb",
151
+ "examples/network_graph/adjacency_list.rb",
152
+ "examples/network_graph/breadth_first_search.rb",
153
+ "examples/network_graph/gen_2paths.rb",
154
+ "examples/network_graph/gen_multi_edge.rb",
155
+ "examples/network_graph/gen_symmetric_links.rb",
156
+ "examples/pagerank/README.textile",
157
+ "examples/pagerank/gen_initial_pagerank_graph.pig",
158
+ "examples/pagerank/pagerank.rb",
159
+ "examples/pagerank/pagerank_initialize.rb",
160
+ "examples/pagerank/run_pagerank.sh",
161
+ "examples/sample_records.rb",
162
+ "examples/server_logs/apache_log_parser.rb",
163
+ "examples/server_logs/breadcrumbs.rb",
164
+ "examples/server_logs/logline.rb",
165
+ "examples/server_logs/user_agent.rb",
166
+ "examples/size.rb",
167
+ "examples/stats/avg_value_frequency.rb",
168
+ "examples/stats/binning_percentile_estimator.rb",
169
+ "examples/stats/data/avg_value_frequency.tsv",
170
+ "examples/stats/rank_and_bin.rb",
171
+ "examples/stupidly_simple_filter.rb",
172
+ "examples/word_count.rb",
173
+ "lib/wukong.rb",
174
+ "lib/wukong/and_pig.rb",
175
+ "lib/wukong/bad_record.rb",
176
+ "lib/wukong/datatypes.rb",
177
+ "lib/wukong/datatypes/enum.rb",
178
+ "lib/wukong/datatypes/fake_types.rb",
179
+ "lib/wukong/encoding.rb",
180
+ "lib/wukong/encoding/asciize.rb",
181
+ "lib/wukong/extensions.rb",
182
+ "lib/wukong/extensions/array.rb",
183
+ "lib/wukong/extensions/blank.rb",
184
+ "lib/wukong/extensions/class.rb",
185
+ "lib/wukong/extensions/date_time.rb",
186
+ "lib/wukong/extensions/emittable.rb",
187
+ "lib/wukong/extensions/enumerable.rb",
188
+ "lib/wukong/extensions/hash.rb",
189
+ "lib/wukong/extensions/hash_keys.rb",
190
+ "lib/wukong/extensions/hash_like.rb",
191
+ "lib/wukong/extensions/hashlike_class.rb",
192
+ "lib/wukong/extensions/module.rb",
193
+ "lib/wukong/extensions/pathname.rb",
194
+ "lib/wukong/extensions/string.rb",
195
+ "lib/wukong/extensions/struct.rb",
196
+ "lib/wukong/extensions/symbol.rb",
197
+ "lib/wukong/filename_pattern.rb",
198
+ "lib/wukong/logger.rb",
199
+ "lib/wukong/periodic_monitor.rb",
200
+ "lib/wukong/schema.rb",
201
+ "lib/wukong/script.rb",
202
+ "lib/wukong/script/avro_command.rb",
203
+ "lib/wukong/script/cassandra_loader_script.rb",
204
+ "lib/wukong/script/emr_command.rb",
205
+ "lib/wukong/script/hadoop_command.rb",
206
+ "lib/wukong/script/local_command.rb",
207
+ "lib/wukong/store.rb",
208
+ "lib/wukong/store/base.rb",
209
+ "lib/wukong/store/cassandra.rb",
210
+ "lib/wukong/store/cassandra/streaming.rb",
211
+ "lib/wukong/store/cassandra/struct_loader.rb",
212
+ "lib/wukong/store/cassandra_model.rb",
213
+ "lib/wukong/store/chh_chunked_flat_file_store.rb",
214
+ "lib/wukong/store/chunked_flat_file_store.rb",
215
+ "lib/wukong/store/conditional_store.rb",
216
+ "lib/wukong/store/factory.rb",
217
+ "lib/wukong/store/flat_file_store.rb",
218
+ "lib/wukong/store/key_store.rb",
219
+ "lib/wukong/store/null_store.rb",
220
+ "lib/wukong/store/read_thru_store.rb",
221
+ "lib/wukong/store/tokyo_tdb_key_store.rb",
222
+ "lib/wukong/store/tyrant_rdb_key_store.rb",
223
+ "lib/wukong/store/tyrant_tdb_key_store.rb",
224
+ "lib/wukong/streamer.rb",
225
+ "lib/wukong/streamer/accumulating_reducer.rb",
226
+ "lib/wukong/streamer/base.rb",
227
+ "lib/wukong/streamer/counting_reducer.rb",
228
+ "lib/wukong/streamer/filter.rb",
229
+ "lib/wukong/streamer/line_streamer.rb",
230
+ "lib/wukong/streamer/list_reducer.rb",
231
+ "lib/wukong/streamer/rank_and_bin_reducer.rb",
232
+ "lib/wukong/streamer/record_streamer.rb",
233
+ "lib/wukong/streamer/reducer.rb",
234
+ "lib/wukong/streamer/set_reducer.rb",
235
+ "lib/wukong/streamer/struct_streamer.rb",
236
+ "lib/wukong/streamer/summing_reducer.rb",
237
+ "lib/wukong/streamer/uniq_by_last_reducer.rb",
238
+ "lib/wukong/typed_struct.rb",
239
+ "old/cassandra_streaming/berlitz_for_cassandra.textile",
240
+ "old/cassandra_streaming/client_interface_notes.textile",
241
+ "old/cassandra_streaming/client_schema.textile",
242
+ "old/cassandra_streaming/tuning.textile",
243
+ "spec/data/a_atsigns_b.tsv",
244
+ "spec/data/a_follows_b.tsv",
245
+ "spec/data/tweet.tsv",
246
+ "spec/data/twitter_user.tsv",
247
+ "spec/spec.opts",
248
+ "spec/spec_helper.rb",
249
+ "spec/wukong/encoding_spec.rb",
250
+ "spec/wukong/script_spec.rb",
251
+ "wukong.gemspec"
276
252
  ]
277
253
  s.homepage = %q{http://mrflip.github.com/wukong}
278
- s.rdoc_options = ["--charset=UTF-8"]
279
254
  s.require_paths = ["lib"]
280
- s.rubygems_version = %q{1.3.7}
255
+ s.rubygems_version = %q{1.4.2}
281
256
  s.summary = %q{Hadoop Streaming for Ruby. Wukong makes Hadoop so easy a chimpanzee can use it, yet handles terabyte-scale computation with ease.}
282
257
  s.test_files = [
258
+ "examples/contrib/jeans/normalize.rb",
259
+ "examples/contrib/jeans/sizes.rb",
260
+ "examples/corpus/bucket_counter.rb",
261
+ "examples/corpus/dbpedia_abstract_to_sentences.rb",
262
+ "examples/corpus/sentence_coocurrence.rb",
263
+ "examples/corpus/words_to_bigrams.rb",
264
+ "examples/emr/elastic_mapreduce_example.rb",
265
+ "examples/ignore_me/counting.rb",
266
+ "examples/ignore_me/grouper.rb",
267
+ "examples/network_graph/adjacency_list.rb",
268
+ "examples/network_graph/breadth_first_search.rb",
269
+ "examples/network_graph/gen_2paths.rb",
270
+ "examples/network_graph/gen_multi_edge.rb",
271
+ "examples/network_graph/gen_symmetric_links.rb",
272
+ "examples/pagerank/pagerank.rb",
273
+ "examples/pagerank/pagerank_initialize.rb",
274
+ "examples/sample_records.rb",
275
+ "examples/server_logs/apache_log_parser.rb",
276
+ "examples/server_logs/breadcrumbs.rb",
277
+ "examples/server_logs/logline.rb",
278
+ "examples/server_logs/user_agent.rb",
279
+ "examples/size.rb",
280
+ "examples/stats/avg_value_frequency.rb",
281
+ "examples/stats/binning_percentile_estimator.rb",
282
+ "examples/stats/rank_and_bin.rb",
283
+ "examples/stupidly_simple_filter.rb",
284
+ "examples/word_count.rb",
283
285
  "spec/spec_helper.rb",
284
- "spec/wukong/encoding_spec.rb",
285
- "spec/wukong/script_spec.rb",
286
- "examples/binning_percentile_estimator.rb",
287
- "examples/cassandra_streaming/avromapper.rb",
288
- "examples/cassandra_streaming/cassandra_random_partitioner.rb",
289
- "examples/cassandra_streaming/struct_loader.rb",
290
- "examples/contrib/jeans/normalize.rb",
291
- "examples/contrib/jeans/sizes.rb",
292
- "examples/corpus/words_to_bigrams.rb",
293
- "examples/count_keys.rb",
294
- "examples/count_keys_at_mapper.rb",
295
- "examples/emr/elastic_mapreduce_example.rb",
296
- "examples/keystore/cassandra_batch_test.rb",
297
- "examples/keystore/conditional_outputter_example.rb",
298
- "examples/network_graph/adjacency_list.rb",
299
- "examples/network_graph/breadth_first_search.rb",
300
- "examples/network_graph/gen_2paths.rb",
301
- "examples/network_graph/gen_multi_edge.rb",
302
- "examples/network_graph/gen_symmetric_links.rb",
303
- "examples/pagerank/pagerank.rb",
304
- "examples/pagerank/pagerank_initialize.rb",
305
- "examples/rank_and_bin.rb",
306
- "examples/sample_records.rb",
307
- "examples/server_logs/apache_log_parser.rb",
308
- "examples/server_logs/breadcrumbs.rb",
309
- "examples/server_logs/user_agent.rb",
310
- "examples/size.rb",
311
- "examples/stats/avg_value_frequency.rb",
312
- "examples/store/chunked_store_example.rb",
313
- "examples/stupidly_simple_filter.rb",
314
- "examples/word_count.rb"
286
+ "spec/wukong/encoding_spec.rb",
287
+ "spec/wukong/script_spec.rb"
315
288
  ]
316
289
 
317
290
  if s.respond_to? :specification_version then
318
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
319
291
  s.specification_version = 3
320
292
 
321
293
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then