wukong 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. data/LICENSE.textile +107 -0
  2. data/README.textile +166 -0
  3. data/bin/cutc +30 -0
  4. data/bin/cuttab +5 -0
  5. data/bin/greptrue +8 -0
  6. data/bin/hdp-cat +3 -0
  7. data/bin/hdp-catd +3 -0
  8. data/bin/hdp-du +81 -0
  9. data/bin/hdp-get +3 -0
  10. data/bin/hdp-kill +3 -0
  11. data/bin/hdp-ls +10 -0
  12. data/bin/hdp-mkdir +3 -0
  13. data/bin/hdp-mv +3 -0
  14. data/bin/hdp-parts_to_keys.rb +77 -0
  15. data/bin/hdp-ps +3 -0
  16. data/bin/hdp-put +3 -0
  17. data/bin/hdp-rm +11 -0
  18. data/bin/hdp-sort +29 -0
  19. data/bin/hdp-stream +29 -0
  20. data/bin/hdp-stream-flat +18 -0
  21. data/bin/hdp-sync +17 -0
  22. data/bin/hdp-wc +67 -0
  23. data/bin/md5sort +20 -0
  24. data/bin/tabchar +5 -0
  25. data/bin/uniqc +3 -0
  26. data/bin/wu-hist +3 -0
  27. data/bin/wu-lign +177 -0
  28. data/bin/wu-sum +30 -0
  29. data/doc/INSTALL.textile +41 -0
  30. data/doc/LICENSE.textile +107 -0
  31. data/doc/README-tutorial.textile +163 -0
  32. data/doc/README-wulign.textile +59 -0
  33. data/doc/README-wutils.textile +128 -0
  34. data/doc/TODO.textile +61 -0
  35. data/doc/UsingWukong-part1-setup.textile +2 -0
  36. data/doc/UsingWukong-part2-scraping.textile +2 -0
  37. data/doc/UsingWukong-part3-parsing.textile +132 -0
  38. data/doc/code/api_response_example.txt +20 -0
  39. data/doc/code/parser_skeleton.rb +38 -0
  40. data/doc/hadoop-nfs.textile +51 -0
  41. data/doc/hadoop-setup.textile +29 -0
  42. data/doc/index.textile +124 -0
  43. data/doc/intro_to_map_reduce/MapReduceDiagram.graffle +0 -0
  44. data/doc/links.textile +42 -0
  45. data/doc/overview.textile +91 -0
  46. data/doc/pig/PigLatinExpressionsList.txt +122 -0
  47. data/doc/pig/PigLatinReferenceManual.html +19134 -0
  48. data/doc/pig/PigLatinReferenceManual.txt +1640 -0
  49. data/doc/tips.textile +116 -0
  50. data/doc/usage.textile +102 -0
  51. data/doc/utils.textile +48 -0
  52. data/examples/README.txt +17 -0
  53. data/examples/and_pig/sample_queries.rb +128 -0
  54. data/examples/apache_log_parser.rb +53 -0
  55. data/examples/count_keys.rb +56 -0
  56. data/examples/count_keys_at_mapper.rb +57 -0
  57. data/examples/graph/adjacency_list.rb +74 -0
  58. data/examples/graph/breadth_first_search.rb +79 -0
  59. data/examples/graph/gen_2paths.rb +68 -0
  60. data/examples/graph/gen_multi_edge.rb +103 -0
  61. data/examples/graph/gen_symmetric_links.rb +53 -0
  62. data/examples/package-local.rb +100 -0
  63. data/examples/package.rb +96 -0
  64. data/examples/pagerank/README.textile +6 -0
  65. data/examples/pagerank/gen_initial_pagerank_graph.pig +57 -0
  66. data/examples/pagerank/pagerank.rb +88 -0
  67. data/examples/pagerank/pagerank_initialize.rb +46 -0
  68. data/examples/pagerank/run_pagerank.sh +19 -0
  69. data/examples/rank_and_bin.rb +173 -0
  70. data/examples/run_all.sh +47 -0
  71. data/examples/sample_records.rb +44 -0
  72. data/examples/size.rb +60 -0
  73. data/examples/word_count.rb +95 -0
  74. data/lib/wukong.rb +11 -0
  75. data/lib/wukong/and_pig.rb +62 -0
  76. data/lib/wukong/and_pig/README.textile +12 -0
  77. data/lib/wukong/and_pig/as.rb +37 -0
  78. data/lib/wukong/and_pig/data_types.rb +30 -0
  79. data/lib/wukong/and_pig/functions.rb +50 -0
  80. data/lib/wukong/and_pig/generate.rb +85 -0
  81. data/lib/wukong/and_pig/generate/variable_inflections.rb +82 -0
  82. data/lib/wukong/and_pig/junk.rb +51 -0
  83. data/lib/wukong/and_pig/operators.rb +8 -0
  84. data/lib/wukong/and_pig/operators/compound.rb +29 -0
  85. data/lib/wukong/and_pig/operators/evaluators.rb +7 -0
  86. data/lib/wukong/and_pig/operators/execution.rb +15 -0
  87. data/lib/wukong/and_pig/operators/file_methods.rb +29 -0
  88. data/lib/wukong/and_pig/operators/foreach.rb +98 -0
  89. data/lib/wukong/and_pig/operators/groupies.rb +212 -0
  90. data/lib/wukong/and_pig/operators/load_store.rb +65 -0
  91. data/lib/wukong/and_pig/operators/meta.rb +42 -0
  92. data/lib/wukong/and_pig/operators/relational.rb +129 -0
  93. data/lib/wukong/and_pig/pig_struct.rb +48 -0
  94. data/lib/wukong/and_pig/pig_var.rb +95 -0
  95. data/lib/wukong/and_pig/symbol.rb +29 -0
  96. data/lib/wukong/and_pig/utils.rb +0 -0
  97. data/lib/wukong/bad_record.rb +18 -0
  98. data/lib/wukong/boot.rb +47 -0
  99. data/lib/wukong/datatypes.rb +24 -0
  100. data/lib/wukong/datatypes/enum.rb +123 -0
  101. data/lib/wukong/dfs.rb +80 -0
  102. data/lib/wukong/encoding.rb +111 -0
  103. data/lib/wukong/extensions.rb +15 -0
  104. data/lib/wukong/extensions/array.rb +18 -0
  105. data/lib/wukong/extensions/blank.rb +93 -0
  106. data/lib/wukong/extensions/class.rb +189 -0
  107. data/lib/wukong/extensions/date_time.rb +24 -0
  108. data/lib/wukong/extensions/emittable.rb +82 -0
  109. data/lib/wukong/extensions/hash.rb +120 -0
  110. data/lib/wukong/extensions/hash_like.rb +119 -0
  111. data/lib/wukong/extensions/hashlike_class.rb +47 -0
  112. data/lib/wukong/extensions/module.rb +2 -0
  113. data/lib/wukong/extensions/pathname.rb +27 -0
  114. data/lib/wukong/extensions/string.rb +65 -0
  115. data/lib/wukong/extensions/struct.rb +17 -0
  116. data/lib/wukong/extensions/symbol.rb +11 -0
  117. data/lib/wukong/logger.rb +53 -0
  118. data/lib/wukong/models/graph.rb +27 -0
  119. data/lib/wukong/rdf.rb +104 -0
  120. data/lib/wukong/schema.rb +37 -0
  121. data/lib/wukong/script.rb +265 -0
  122. data/lib/wukong/script/hadoop_command.rb +111 -0
  123. data/lib/wukong/script/local_command.rb +14 -0
  124. data/lib/wukong/streamer.rb +13 -0
  125. data/lib/wukong/streamer/accumulating_reducer.rb +89 -0
  126. data/lib/wukong/streamer/base.rb +76 -0
  127. data/lib/wukong/streamer/count_keys.rb +30 -0
  128. data/lib/wukong/streamer/count_lines.rb +26 -0
  129. data/lib/wukong/streamer/filter.rb +20 -0
  130. data/lib/wukong/streamer/line_streamer.rb +12 -0
  131. data/lib/wukong/streamer/list_reducer.rb +20 -0
  132. data/lib/wukong/streamer/preprocess_with_pipe_streamer.rb +22 -0
  133. data/lib/wukong/streamer/rank_and_bin_reducer.rb +145 -0
  134. data/lib/wukong/streamer/set_reducer.rb +14 -0
  135. data/lib/wukong/streamer/struct_streamer.rb +48 -0
  136. data/lib/wukong/streamer/summing_reducer.rb +29 -0
  137. data/lib/wukong/streamer/uniq_by_last_reducer.rb +44 -0
  138. data/lib/wukong/typed_struct.rb +12 -0
  139. data/lib/wukong/wukong_class.rb +21 -0
  140. data/spec/bin/hdp-wc_spec.rb +4 -0
  141. data/spec/spec_helper.rb +0 -0
  142. data/wukong.gemspec +179 -0
  143. metadata +214 -0
metadata ADDED
@@ -0,0 +1,214 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wukong
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Philip (flip) Kromer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-09-28 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: " Treat your dataset like a:\n\n * stream of lines when it\xE2\x80\x99s efficient to process by lines\n * stream of field arrays when it\xE2\x80\x99s efficient to deal directly with fields\n * stream of lightweight objects when it\xE2\x80\x99s efficient to deal with objects\n\n Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.\n"
17
+ email: flip@infochimps.org
18
+ executables:
19
+ - cutc
20
+ - cuttab
21
+ - greptrue
22
+ - hdp-cat
23
+ - hdp-catd
24
+ - hdp-du
25
+ - hdp-get
26
+ - hdp-kill
27
+ - hdp-ls
28
+ - hdp-mkdir
29
+ - hdp-mv
30
+ - hdp-parts_to_keys.rb
31
+ - hdp-ps
32
+ - hdp-put
33
+ - hdp-rm
34
+ - hdp-sort
35
+ - hdp-stream
36
+ - hdp-stream-flat
37
+ - hdp-sync
38
+ - hdp-wc
39
+ - md5sort
40
+ - tabchar
41
+ - uniqc
42
+ - wu-hist
43
+ - wu-lign
44
+ - wu-sum
45
+ extensions: []
46
+
47
+ extra_rdoc_files:
48
+ - LICENSE.textile
49
+ - README.textile
50
+ files:
51
+ - doc/INSTALL.textile
52
+ - doc/LICENSE.textile
53
+ - doc/README-tutorial.textile
54
+ - doc/README-wulign.textile
55
+ - doc/README-wutils.textile
56
+ - doc/TODO.textile
57
+ - doc/UsingWukong-part1-setup.textile
58
+ - doc/UsingWukong-part2-scraping.textile
59
+ - doc/UsingWukong-part3-parsing.textile
60
+ - doc/code/api_response_example.txt
61
+ - doc/code/parser_skeleton.rb
62
+ - doc/hadoop-nfs.textile
63
+ - doc/hadoop-setup.textile
64
+ - doc/index.textile
65
+ - doc/intro_to_map_reduce/MapReduceDiagram.graffle
66
+ - doc/links.textile
67
+ - doc/overview.textile
68
+ - doc/pig/PigLatinExpressionsList.txt
69
+ - doc/pig/PigLatinReferenceManual.html
70
+ - doc/pig/PigLatinReferenceManual.txt
71
+ - doc/tips.textile
72
+ - doc/usage.textile
73
+ - doc/utils.textile
74
+ - examples/README.txt
75
+ - examples/and_pig/sample_queries.rb
76
+ - examples/apache_log_parser.rb
77
+ - examples/count_keys.rb
78
+ - examples/count_keys_at_mapper.rb
79
+ - examples/graph/adjacency_list.rb
80
+ - examples/graph/breadth_first_search.rb
81
+ - examples/graph/gen_2paths.rb
82
+ - examples/graph/gen_multi_edge.rb
83
+ - examples/graph/gen_symmetric_links.rb
84
+ - examples/package-local.rb
85
+ - examples/package.rb
86
+ - examples/pagerank/README.textile
87
+ - examples/pagerank/gen_initial_pagerank_graph.pig
88
+ - examples/pagerank/pagerank.rb
89
+ - examples/pagerank/pagerank_initialize.rb
90
+ - examples/pagerank/run_pagerank.sh
91
+ - examples/rank_and_bin.rb
92
+ - examples/run_all.sh
93
+ - examples/sample_records.rb
94
+ - examples/size.rb
95
+ - examples/word_count.rb
96
+ - lib/wukong.rb
97
+ - lib/wukong/and_pig.rb
98
+ - lib/wukong/and_pig/README.textile
99
+ - lib/wukong/and_pig/as.rb
100
+ - lib/wukong/and_pig/data_types.rb
101
+ - lib/wukong/and_pig/functions.rb
102
+ - lib/wukong/and_pig/generate.rb
103
+ - lib/wukong/and_pig/generate/variable_inflections.rb
104
+ - lib/wukong/and_pig/junk.rb
105
+ - lib/wukong/and_pig/operators.rb
106
+ - lib/wukong/and_pig/operators/compound.rb
107
+ - lib/wukong/and_pig/operators/evaluators.rb
108
+ - lib/wukong/and_pig/operators/execution.rb
109
+ - lib/wukong/and_pig/operators/file_methods.rb
110
+ - lib/wukong/and_pig/operators/foreach.rb
111
+ - lib/wukong/and_pig/operators/groupies.rb
112
+ - lib/wukong/and_pig/operators/load_store.rb
113
+ - lib/wukong/and_pig/operators/meta.rb
114
+ - lib/wukong/and_pig/operators/relational.rb
115
+ - lib/wukong/and_pig/pig_struct.rb
116
+ - lib/wukong/and_pig/pig_var.rb
117
+ - lib/wukong/and_pig/symbol.rb
118
+ - lib/wukong/and_pig/utils.rb
119
+ - lib/wukong/bad_record.rb
120
+ - lib/wukong/boot.rb
121
+ - lib/wukong/datatypes.rb
122
+ - lib/wukong/datatypes/enum.rb
123
+ - lib/wukong/dfs.rb
124
+ - lib/wukong/encoding.rb
125
+ - lib/wukong/extensions.rb
126
+ - lib/wukong/extensions/array.rb
127
+ - lib/wukong/extensions/blank.rb
128
+ - lib/wukong/extensions/class.rb
129
+ - lib/wukong/extensions/date_time.rb
130
+ - lib/wukong/extensions/emittable.rb
131
+ - lib/wukong/extensions/hash.rb
132
+ - lib/wukong/extensions/hash_like.rb
133
+ - lib/wukong/extensions/hashlike_class.rb
134
+ - lib/wukong/extensions/module.rb
135
+ - lib/wukong/extensions/pathname.rb
136
+ - lib/wukong/extensions/string.rb
137
+ - lib/wukong/extensions/struct.rb
138
+ - lib/wukong/extensions/symbol.rb
139
+ - lib/wukong/logger.rb
140
+ - lib/wukong/models/graph.rb
141
+ - lib/wukong/rdf.rb
142
+ - lib/wukong/schema.rb
143
+ - lib/wukong/script.rb
144
+ - lib/wukong/script/hadoop_command.rb
145
+ - lib/wukong/script/local_command.rb
146
+ - lib/wukong/streamer.rb
147
+ - lib/wukong/streamer/accumulating_reducer.rb
148
+ - lib/wukong/streamer/base.rb
149
+ - lib/wukong/streamer/count_keys.rb
150
+ - lib/wukong/streamer/count_lines.rb
151
+ - lib/wukong/streamer/filter.rb
152
+ - lib/wukong/streamer/line_streamer.rb
153
+ - lib/wukong/streamer/list_reducer.rb
154
+ - lib/wukong/streamer/preprocess_with_pipe_streamer.rb
155
+ - lib/wukong/streamer/rank_and_bin_reducer.rb
156
+ - lib/wukong/streamer/set_reducer.rb
157
+ - lib/wukong/streamer/struct_streamer.rb
158
+ - lib/wukong/streamer/summing_reducer.rb
159
+ - lib/wukong/streamer/uniq_by_last_reducer.rb
160
+ - lib/wukong/typed_struct.rb
161
+ - lib/wukong/wukong_class.rb
162
+ - spec/bin/hdp-wc_spec.rb
163
+ - spec/spec_helper.rb
164
+ - wukong.gemspec
165
+ - LICENSE.textile
166
+ - README.textile
167
+ has_rdoc: true
168
+ homepage: http://github.com/mrflip/wukong
169
+ licenses: []
170
+
171
+ post_install_message:
172
+ rdoc_options:
173
+ - --charset=UTF-8
174
+ require_paths:
175
+ - lib
176
+ required_ruby_version: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: "0"
181
+ version:
182
+ required_rubygems_version: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - ">="
185
+ - !ruby/object:Gem::Version
186
+ version: "0"
187
+ version:
188
+ requirements: []
189
+
190
+ rubyforge_project:
191
+ rubygems_version: 1.3.5
192
+ signing_key:
193
+ specification_version: 3
194
+ summary: Wukong makes Hadoop so easy a chimpanzee can use it.
195
+ test_files:
196
+ - spec/bin/hdp-wc_spec.rb
197
+ - spec/spec_helper.rb
198
+ - examples/and_pig/sample_queries.rb
199
+ - examples/apache_log_parser.rb
200
+ - examples/count_keys.rb
201
+ - examples/count_keys_at_mapper.rb
202
+ - examples/graph/adjacency_list.rb
203
+ - examples/graph/breadth_first_search.rb
204
+ - examples/graph/gen_2paths.rb
205
+ - examples/graph/gen_multi_edge.rb
206
+ - examples/graph/gen_symmetric_links.rb
207
+ - examples/package-local.rb
208
+ - examples/package.rb
209
+ - examples/pagerank/pagerank.rb
210
+ - examples/pagerank/pagerank_initialize.rb
211
+ - examples/rank_and_bin.rb
212
+ - examples/sample_records.rb
213
+ - examples/size.rb
214
+ - examples/word_count.rb