wukong 3.0.0.pre → 3.0.0.pre2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (476) hide show
  1. data/.gitignore +46 -33
  2. data/.gitmodules +3 -0
  3. data/.rspec +1 -1
  4. data/.travis.yml +8 -1
  5. data/.yardopts +0 -13
  6. data/Guardfile +4 -6
  7. data/{LICENSE.textile → LICENSE.md} +43 -55
  8. data/README-old.md +422 -0
  9. data/README.md +279 -418
  10. data/Rakefile +21 -5
  11. data/TODO.md +6 -6
  12. data/bin/wu-clean-encoding +31 -0
  13. data/bin/wu-lign +2 -2
  14. data/bin/wu-local +69 -0
  15. data/bin/wu-server +70 -0
  16. data/examples/Gemfile +38 -0
  17. data/examples/README.md +9 -0
  18. data/examples/dataflow/apache_log_line.rb +64 -25
  19. data/examples/dataflow/fibonacci_series.rb +101 -0
  20. data/examples/dataflow/parse_apache_logs.rb +37 -7
  21. data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
  22. data/examples/dataflow/simple.rb +4 -4
  23. data/examples/geo.rb +4 -0
  24. data/examples/geo/geo_grids.numbers +0 -0
  25. data/examples/geo/geolocated.rb +331 -0
  26. data/examples/geo/quadtile.rb +69 -0
  27. data/examples/geo/spec/geolocated_spec.rb +247 -0
  28. data/examples/geo/tile_fetcher.rb +77 -0
  29. data/examples/graph/minimum_spanning_tree.rb +61 -61
  30. data/examples/jabberwocky.txt +36 -0
  31. data/examples/models/wikipedia.rb +20 -0
  32. data/examples/munging/Gemfile +8 -0
  33. data/examples/munging/airline_flights/airline.rb +57 -0
  34. data/examples/munging/airline_flights/airline_flights.rake +83 -0
  35. data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
  36. data/examples/munging/airline_flights/airport.rb +211 -0
  37. data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
  38. data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
  39. data/examples/munging/airline_flights/flight.rb +156 -0
  40. data/examples/munging/airline_flights/models.rb +4 -0
  41. data/examples/munging/airline_flights/parse.rb +26 -0
  42. data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
  43. data/examples/munging/airline_flights/route.rb +35 -0
  44. data/examples/munging/airline_flights/tasks.rake +83 -0
  45. data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
  46. data/examples/munging/airline_flights/topcities.rb +167 -0
  47. data/examples/munging/airports/40_wbans.txt +40 -0
  48. data/examples/munging/airports/filter_weather_reports.rb +37 -0
  49. data/examples/munging/airports/join.pig +31 -0
  50. data/examples/munging/airports/to_tsv.rb +33 -0
  51. data/examples/munging/airports/usa_wbans.pig +19 -0
  52. data/examples/munging/airports/usa_wbans.txt +2157 -0
  53. data/examples/munging/airports/wbans.pig +19 -0
  54. data/examples/munging/airports/wbans.txt +2310 -0
  55. data/examples/munging/geo/geo_json.rb +54 -0
  56. data/examples/munging/geo/geo_models.rb +69 -0
  57. data/examples/munging/geo/geonames_models.rb +78 -0
  58. data/examples/munging/geo/iso_codes.rb +172 -0
  59. data/examples/munging/geo/reconcile_countries.rb +124 -0
  60. data/examples/munging/geo/tasks.rake +71 -0
  61. data/examples/munging/rake_helper.rb +62 -0
  62. data/examples/munging/weather/.gitignore +1 -0
  63. data/examples/munging/weather/Gemfile +4 -0
  64. data/examples/munging/weather/Rakefile +28 -0
  65. data/examples/munging/weather/extract_ish.rb +13 -0
  66. data/examples/munging/weather/models/weather.rb +119 -0
  67. data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
  68. data/examples/munging/wikipedia/README.md +34 -0
  69. data/examples/munging/wikipedia/Rakefile +193 -0
  70. data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
  71. data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
  72. data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
  73. data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
  74. data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
  75. data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
  76. data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
  77. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
  78. data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
  79. data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
  80. data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
  81. data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
  82. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
  83. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
  84. data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
  85. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
  86. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
  87. data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
  88. data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
  89. data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
  90. data/examples/munging/wikipedia/pig_style_guide.md +25 -0
  91. data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
  92. data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
  93. data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
  94. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
  95. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
  96. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
  97. data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
  98. data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
  99. data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
  100. data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
  101. data/examples/munging/wikipedia/utils/namespaces.json +1 -0
  102. data/examples/rake_helper.rb +85 -0
  103. data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
  104. data/examples/server_logs/logline.rb +95 -0
  105. data/examples/server_logs/models.rb +66 -0
  106. data/examples/server_logs/page_counts.pig +48 -0
  107. data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
  108. data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
  109. data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
  110. data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
  111. data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
  112. data/examples/string_reverser.rb +26 -0
  113. data/examples/text/pig_latin.rb +2 -2
  114. data/examples/text/regional_flavor/README.md +14 -0
  115. data/examples/text/regional_flavor/article_wordbags.pig +39 -0
  116. data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
  117. data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
  118. data/examples/word_count/accumulator.rb +26 -0
  119. data/examples/word_count/tokenizer.rb +13 -0
  120. data/examples/word_count/word_count.rb +6 -0
  121. data/examples/workflow/cherry_pie.dot +97 -0
  122. data/examples/workflow/cherry_pie.png +0 -0
  123. data/examples/workflow/cherry_pie.rb +61 -26
  124. data/lib/hanuman.rb +34 -7
  125. data/lib/hanuman/graph.rb +55 -31
  126. data/lib/hanuman/graphvizzer.rb +199 -178
  127. data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
  128. data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
  129. data/lib/hanuman/link.rb +35 -0
  130. data/lib/hanuman/registry.rb +46 -0
  131. data/lib/hanuman/stage.rb +76 -32
  132. data/lib/wukong.rb +23 -24
  133. data/lib/wukong/boot.rb +87 -0
  134. data/lib/wukong/configuration.rb +8 -0
  135. data/lib/wukong/dataflow.rb +45 -78
  136. data/lib/wukong/driver.rb +99 -0
  137. data/lib/wukong/emitter.rb +22 -0
  138. data/lib/wukong/model/faker.rb +24 -24
  139. data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
  140. data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
  141. data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
  142. data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
  143. data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
  144. data/lib/wukong/processor.rb +60 -114
  145. data/lib/wukong/spec_helpers.rb +81 -0
  146. data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
  147. data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
  148. data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
  149. data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
  150. data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
  151. data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
  152. data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
  153. data/lib/wukong/version.rb +2 -1
  154. data/lib/wukong/widget/filters.rb +311 -0
  155. data/lib/wukong/widget/processors.rb +156 -0
  156. data/lib/wukong/widget/reducers.rb +7 -0
  157. data/lib/wukong/widget/reducers/accumulator.rb +73 -0
  158. data/lib/wukong/widget/reducers/bin.rb +318 -0
  159. data/lib/wukong/widget/reducers/count.rb +61 -0
  160. data/lib/wukong/widget/reducers/group.rb +85 -0
  161. data/lib/wukong/widget/reducers/group_concat.rb +70 -0
  162. data/lib/wukong/widget/reducers/moments.rb +72 -0
  163. data/lib/wukong/widget/reducers/sort.rb +130 -0
  164. data/lib/wukong/widget/serializers.rb +287 -0
  165. data/lib/wukong/widget/sink.rb +10 -52
  166. data/lib/wukong/widget/source.rb +7 -113
  167. data/lib/wukong/widget/utils.rb +46 -0
  168. data/lib/wukong/widgets.rb +6 -0
  169. data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
  170. data/spec/examples/dataflow/parsing_spec.rb +12 -11
  171. data/spec/examples/dataflow/simple_spec.rb +32 -6
  172. data/spec/examples/dataflow/telegram_spec.rb +36 -36
  173. data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
  174. data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
  175. data/spec/examples/munging/airline_flights_spec.rb +202 -0
  176. data/spec/examples/text/pig_latin_spec.rb +13 -16
  177. data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
  178. data/spec/hanuman/graph_spec.rb +27 -2
  179. data/spec/hanuman/hanuman_spec.rb +10 -0
  180. data/spec/hanuman/registry_spec.rb +123 -0
  181. data/spec/hanuman/stage_spec.rb +61 -7
  182. data/spec/spec_helper.rb +29 -19
  183. data/spec/support/hanuman_test_helpers.rb +14 -12
  184. data/spec/support/shared_context_for_reducers.rb +37 -0
  185. data/spec/support/shared_examples_for_builders.rb +101 -0
  186. data/spec/support/shared_examples_for_shortcuts.rb +57 -0
  187. data/spec/support/wukong_test_helpers.rb +37 -11
  188. data/spec/wukong/dataflow_spec.rb +77 -55
  189. data/spec/wukong/local_runner_spec.rb +24 -24
  190. data/spec/wukong/model/faker_spec.rb +132 -131
  191. data/spec/wukong/runner_spec.rb +8 -8
  192. data/spec/wukong/widget/filters_spec.rb +61 -0
  193. data/spec/wukong/widget/processors_spec.rb +126 -0
  194. data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
  195. data/spec/wukong/widget/reducers/count_spec.rb +11 -0
  196. data/spec/wukong/widget/reducers/group_spec.rb +20 -0
  197. data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
  198. data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
  199. data/spec/wukong/widget/serializers_spec.rb +92 -0
  200. data/spec/wukong/widget/sink_spec.rb +15 -15
  201. data/spec/wukong/widget/source_spec.rb +65 -41
  202. data/spec/wukong/wukong_spec.rb +10 -0
  203. data/wukong.gemspec +17 -10
  204. metadata +359 -335
  205. data/.document +0 -5
  206. data/VERSION +0 -1
  207. data/bin/hdp-bin +0 -44
  208. data/bin/hdp-bzip +0 -23
  209. data/bin/hdp-cat +0 -3
  210. data/bin/hdp-catd +0 -3
  211. data/bin/hdp-cp +0 -3
  212. data/bin/hdp-du +0 -86
  213. data/bin/hdp-get +0 -3
  214. data/bin/hdp-kill +0 -3
  215. data/bin/hdp-kill-task +0 -3
  216. data/bin/hdp-ls +0 -11
  217. data/bin/hdp-mkdir +0 -2
  218. data/bin/hdp-mkdirp +0 -12
  219. data/bin/hdp-mv +0 -3
  220. data/bin/hdp-parts_to_keys.rb +0 -77
  221. data/bin/hdp-ps +0 -3
  222. data/bin/hdp-put +0 -3
  223. data/bin/hdp-rm +0 -32
  224. data/bin/hdp-sort +0 -40
  225. data/bin/hdp-stream +0 -40
  226. data/bin/hdp-stream-flat +0 -22
  227. data/bin/hdp-stream2 +0 -39
  228. data/bin/hdp-sync +0 -17
  229. data/bin/hdp-wc +0 -67
  230. data/bin/wu-flow +0 -10
  231. data/bin/wu-map +0 -17
  232. data/bin/wu-red +0 -17
  233. data/bin/wukong +0 -17
  234. data/data/CREDITS.md +0 -355
  235. data/data/graph/airfares.tsv +0 -2174
  236. data/data/text/gift_of_the_magi.txt +0 -225
  237. data/data/text/jabberwocky.txt +0 -36
  238. data/data/text/rectification_of_names.txt +0 -33
  239. data/data/twitter/a_atsigns_b.tsv +0 -64
  240. data/data/twitter/a_follows_b.tsv +0 -53
  241. data/data/twitter/tweet.tsv +0 -167
  242. data/data/twitter/twitter_user.tsv +0 -55
  243. data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
  244. data/docpages/INSTALL.textile +0 -92
  245. data/docpages/LICENSE.textile +0 -107
  246. data/docpages/README-elastic_map_reduce.textile +0 -377
  247. data/docpages/README-performance.textile +0 -90
  248. data/docpages/README-wulign.textile +0 -65
  249. data/docpages/UsingWukong-part1-get_ready.textile +0 -17
  250. data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
  251. data/docpages/UsingWukong-part3-parsing.textile +0 -138
  252. data/docpages/_config.yml +0 -39
  253. data/docpages/avro/avro_notes.textile +0 -56
  254. data/docpages/avro/performance.textile +0 -36
  255. data/docpages/avro/tethering.textile +0 -19
  256. data/docpages/bigdata-tips.textile +0 -143
  257. data/docpages/code/api_response_example.txt +0 -20
  258. data/docpages/code/parser_skeleton.rb +0 -38
  259. data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
  260. data/docpages/favicon.ico +0 -0
  261. data/docpages/gem.css +0 -16
  262. data/docpages/hadoop-tips.textile +0 -83
  263. data/docpages/index.textile +0 -92
  264. data/docpages/intro.textile +0 -8
  265. data/docpages/moreinfo.textile +0 -174
  266. data/docpages/news.html +0 -24
  267. data/docpages/pig/PigLatinExpressionsList.txt +0 -122
  268. data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
  269. data/docpages/pig/commandline_params.txt +0 -26
  270. data/docpages/pig/cookbook.html +0 -481
  271. data/docpages/pig/images/hadoop-logo.jpg +0 -0
  272. data/docpages/pig/images/instruction_arrow.png +0 -0
  273. data/docpages/pig/images/pig-logo.gif +0 -0
  274. data/docpages/pig/piglatin_ref1.html +0 -1103
  275. data/docpages/pig/piglatin_ref2.html +0 -14340
  276. data/docpages/pig/setup.html +0 -505
  277. data/docpages/pig/skin/basic.css +0 -166
  278. data/docpages/pig/skin/breadcrumbs.js +0 -237
  279. data/docpages/pig/skin/fontsize.js +0 -166
  280. data/docpages/pig/skin/getBlank.js +0 -40
  281. data/docpages/pig/skin/getMenu.js +0 -45
  282. data/docpages/pig/skin/images/chapter.gif +0 -0
  283. data/docpages/pig/skin/images/chapter_open.gif +0 -0
  284. data/docpages/pig/skin/images/current.gif +0 -0
  285. data/docpages/pig/skin/images/external-link.gif +0 -0
  286. data/docpages/pig/skin/images/header_white_line.gif +0 -0
  287. data/docpages/pig/skin/images/page.gif +0 -0
  288. data/docpages/pig/skin/images/pdfdoc.gif +0 -0
  289. data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  290. data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  291. data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  292. data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  293. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  294. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  295. data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  296. data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  297. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  298. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  299. data/docpages/pig/skin/print.css +0 -54
  300. data/docpages/pig/skin/profile.css +0 -181
  301. data/docpages/pig/skin/screen.css +0 -587
  302. data/docpages/pig/tutorial.html +0 -1059
  303. data/docpages/pig/udf.html +0 -1509
  304. data/docpages/tutorial.textile +0 -283
  305. data/docpages/usage.textile +0 -195
  306. data/docpages/wutils.textile +0 -263
  307. data/examples/dataflow/complex.rb +0 -11
  308. data/examples/dataflow/donuts.rb +0 -13
  309. data/examples/tiny_count/jabberwocky_output.tsv +0 -92
  310. data/examples/word_count.rb +0 -48
  311. data/examples/workflow/fiddle.rb +0 -24
  312. data/lib/away/escapement.rb +0 -129
  313. data/lib/away/exe.rb +0 -11
  314. data/lib/away/experimental.rb +0 -5
  315. data/lib/away/from_file.rb +0 -52
  316. data/lib/away/job.rb +0 -56
  317. data/lib/away/job/rake_compat.rb +0 -17
  318. data/lib/away/registry.rb +0 -79
  319. data/lib/away/runner.rb +0 -276
  320. data/lib/away/runner/execute.rb +0 -121
  321. data/lib/away/script.rb +0 -161
  322. data/lib/away/script/hadoop_command.rb +0 -240
  323. data/lib/away/source/file_list_source.rb +0 -15
  324. data/lib/away/source/looper.rb +0 -18
  325. data/lib/away/task.rb +0 -219
  326. data/lib/hanuman/action.rb +0 -21
  327. data/lib/hanuman/chain.rb +0 -4
  328. data/lib/hanuman/graphviz.rb +0 -74
  329. data/lib/hanuman/resource.rb +0 -6
  330. data/lib/hanuman/slot.rb +0 -87
  331. data/lib/hanuman/slottable.rb +0 -220
  332. data/lib/wukong/bad_record.rb +0 -15
  333. data/lib/wukong/event.rb +0 -44
  334. data/lib/wukong/local_runner.rb +0 -55
  335. data/lib/wukong/mapred.rb +0 -3
  336. data/lib/wukong/universe.rb +0 -48
  337. data/lib/wukong/widget/filter.rb +0 -81
  338. data/lib/wukong/widget/gibberish.rb +0 -123
  339. data/lib/wukong/widget/monitor.rb +0 -26
  340. data/lib/wukong/widget/reducer.rb +0 -66
  341. data/lib/wukong/widget/stringifier.rb +0 -50
  342. data/lib/wukong/workflow.rb +0 -22
  343. data/lib/wukong/workflow/command.rb +0 -42
  344. data/old/config/emr-example.yaml +0 -48
  345. data/old/examples/README.txt +0 -17
  346. data/old/examples/contrib/jeans/README.markdown +0 -165
  347. data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
  348. data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
  349. data/old/examples/contrib/jeans/data/sizes +0 -3
  350. data/old/examples/contrib/jeans/normalize.rb +0 -20
  351. data/old/examples/contrib/jeans/sizes.rb +0 -55
  352. data/old/examples/corpus/bnc_word_freq.rb +0 -44
  353. data/old/examples/corpus/bucket_counter.rb +0 -47
  354. data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
  355. data/old/examples/corpus/sentence_bigrams.rb +0 -53
  356. data/old/examples/corpus/sentence_coocurrence.rb +0 -66
  357. data/old/examples/corpus/stopwords.rb +0 -138
  358. data/old/examples/corpus/words_to_bigrams.rb +0 -53
  359. data/old/examples/emr/README.textile +0 -110
  360. data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
  361. data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
  362. data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
  363. data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
  364. data/old/examples/network_graph/adjacency_list.rb +0 -74
  365. data/old/examples/network_graph/breadth_first_search.rb +0 -72
  366. data/old/examples/network_graph/gen_2paths.rb +0 -68
  367. data/old/examples/network_graph/gen_multi_edge.rb +0 -112
  368. data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
  369. data/old/examples/pagerank/README.textile +0 -6
  370. data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
  371. data/old/examples/pagerank/pagerank.rb +0 -72
  372. data/old/examples/pagerank/pagerank_initialize.rb +0 -42
  373. data/old/examples/pagerank/run_pagerank.sh +0 -21
  374. data/old/examples/sample_records.rb +0 -33
  375. data/old/examples/server_logs/apache_log_parser.rb +0 -15
  376. data/old/examples/server_logs/nook.rb +0 -48
  377. data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
  378. data/old/examples/server_logs/user_agent.rb +0 -40
  379. data/old/examples/simple_word_count.rb +0 -82
  380. data/old/examples/size.rb +0 -61
  381. data/old/examples/stats/avg_value_frequency.rb +0 -86
  382. data/old/examples/stats/binning_percentile_estimator.rb +0 -140
  383. data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
  384. data/old/examples/stats/rank_and_bin.rb +0 -173
  385. data/old/examples/stupidly_simple_filter.rb +0 -40
  386. data/old/examples/word_count.rb +0 -75
  387. data/old/graph/graphviz_builder.rb +0 -580
  388. data/old/graph_easy/Attributes.pm +0 -4181
  389. data/old/graph_easy/Graphviz.pm +0 -2232
  390. data/old/wukong.rb +0 -18
  391. data/old/wukong/and_pig.rb +0 -38
  392. data/old/wukong/bad_record.rb +0 -18
  393. data/old/wukong/datatypes.rb +0 -24
  394. data/old/wukong/datatypes/enum.rb +0 -127
  395. data/old/wukong/datatypes/fake_types.rb +0 -17
  396. data/old/wukong/decorator.rb +0 -28
  397. data/old/wukong/encoding/asciize.rb +0 -108
  398. data/old/wukong/extensions.rb +0 -16
  399. data/old/wukong/extensions/array.rb +0 -18
  400. data/old/wukong/extensions/blank.rb +0 -93
  401. data/old/wukong/extensions/class.rb +0 -189
  402. data/old/wukong/extensions/date_time.rb +0 -53
  403. data/old/wukong/extensions/emittable.rb +0 -69
  404. data/old/wukong/extensions/enumerable.rb +0 -79
  405. data/old/wukong/extensions/hash.rb +0 -167
  406. data/old/wukong/extensions/hash_keys.rb +0 -16
  407. data/old/wukong/extensions/hash_like.rb +0 -150
  408. data/old/wukong/extensions/hashlike_class.rb +0 -47
  409. data/old/wukong/extensions/module.rb +0 -2
  410. data/old/wukong/extensions/pathname.rb +0 -27
  411. data/old/wukong/extensions/string.rb +0 -65
  412. data/old/wukong/extensions/struct.rb +0 -17
  413. data/old/wukong/extensions/symbol.rb +0 -11
  414. data/old/wukong/filename_pattern.rb +0 -74
  415. data/old/wukong/helper.rb +0 -7
  416. data/old/wukong/helper/stopwords.rb +0 -195
  417. data/old/wukong/helper/tokenize.rb +0 -35
  418. data/old/wukong/logger.rb +0 -38
  419. data/old/wukong/periodic_monitor.rb +0 -72
  420. data/old/wukong/schema.rb +0 -269
  421. data/old/wukong/script.rb +0 -286
  422. data/old/wukong/script/avro_command.rb +0 -5
  423. data/old/wukong/script/cassandra_loader_script.rb +0 -40
  424. data/old/wukong/script/emr_command.rb +0 -168
  425. data/old/wukong/script/hadoop_command.rb +0 -237
  426. data/old/wukong/script/local_command.rb +0 -41
  427. data/old/wukong/store.rb +0 -10
  428. data/old/wukong/store/base.rb +0 -27
  429. data/old/wukong/store/cassandra.rb +0 -10
  430. data/old/wukong/store/cassandra/streaming.rb +0 -75
  431. data/old/wukong/store/cassandra/struct_loader.rb +0 -21
  432. data/old/wukong/store/cassandra_model.rb +0 -91
  433. data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
  434. data/old/wukong/store/chunked_flat_file_store.rb +0 -48
  435. data/old/wukong/store/conditional_store.rb +0 -57
  436. data/old/wukong/store/factory.rb +0 -8
  437. data/old/wukong/store/flat_file_store.rb +0 -89
  438. data/old/wukong/store/key_store.rb +0 -51
  439. data/old/wukong/store/null_store.rb +0 -15
  440. data/old/wukong/store/read_thru_store.rb +0 -22
  441. data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
  442. data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
  443. data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
  444. data/old/wukong/streamer.rb +0 -30
  445. data/old/wukong/streamer/accumulating_reducer.rb +0 -83
  446. data/old/wukong/streamer/base.rb +0 -126
  447. data/old/wukong/streamer/counting_reducer.rb +0 -25
  448. data/old/wukong/streamer/filter.rb +0 -20
  449. data/old/wukong/streamer/instance_streamer.rb +0 -15
  450. data/old/wukong/streamer/json_streamer.rb +0 -21
  451. data/old/wukong/streamer/line_streamer.rb +0 -12
  452. data/old/wukong/streamer/list_reducer.rb +0 -31
  453. data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
  454. data/old/wukong/streamer/record_streamer.rb +0 -14
  455. data/old/wukong/streamer/reducer.rb +0 -11
  456. data/old/wukong/streamer/set_reducer.rb +0 -14
  457. data/old/wukong/streamer/struct_streamer.rb +0 -48
  458. data/old/wukong/streamer/summing_reducer.rb +0 -29
  459. data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
  460. data/old/wukong/typed_struct.rb +0 -12
  461. data/spec/away/encoding_spec.rb +0 -32
  462. data/spec/away/exe_spec.rb +0 -20
  463. data/spec/away/flow_spec.rb +0 -82
  464. data/spec/away/graph_spec.rb +0 -6
  465. data/spec/away/job_spec.rb +0 -15
  466. data/spec/away/rake_compat_spec.rb +0 -9
  467. data/spec/away/script_spec.rb +0 -81
  468. data/spec/hanuman/graphviz_spec.rb +0 -29
  469. data/spec/hanuman/slot_spec.rb +0 -2
  470. data/spec/support/examples_helper.rb +0 -10
  471. data/spec/support/streamer_test_helpers.rb +0 -6
  472. data/spec/support/wukong_widget_helpers.rb +0 -66
  473. data/spec/wukong/processor_spec.rb +0 -109
  474. data/spec/wukong/widget/filter_spec.rb +0 -99
  475. data/spec/wukong/widget/stringifier_spec.rb +0 -51
  476. data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -1,19 +1,19 @@
1
- require 'spec_helper'
1
+ # require 'spec_helper'
2
2
 
3
- describe :sinks, :helpers => true do
3
+ # describe :sinks, :helpers => true do
4
4
 
5
- describe Wukong::Sink::Stdout do
6
- it 'dumps records to $stdout' do
7
- $stdout.should_receive(:puts).with(mock_record)
8
- subject.process(mock_record)
9
- end
10
- end
5
+ # describe Wukong::Sink::Stdout do
6
+ # it 'dumps records to $stdout' do
7
+ # $stdout.should_receive(:puts).with(mock_record)
8
+ # subject.process(mock_record)
9
+ # end
10
+ # end
11
11
 
12
- describe Wukong::Sink::Stderr do
13
- it 'dumps records to $stderr' do
14
- $stderr.should_receive(:puts).with(mock_record)
15
- subject.process(mock_record)
16
- end
12
+ # describe Wukong::Sink::Stderr do
13
+ # it 'dumps records to $stderr' do
14
+ # $stderr.should_receive(:puts).with(mock_record)
15
+ # subject.process(mock_record)
16
+ # end
17
17
 
18
- end
19
- end
18
+ # end
19
+ # end
@@ -1,41 +1,65 @@
1
- require 'spec_helper'
2
- require 'wukong/widget/gibberish'
3
-
4
- describe :sources, :helpers => true do
5
- describe Wukong::Source::Integers do
6
- subject{ described_class.receive(:size => 10) }
7
- before{ subject.setup }
8
-
9
- it 'generates integers up to the given limit' do
10
- subject.to_enum.to_a.should == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
11
- end
12
- it 'generates nothing if the initial range is void' do
13
- subject.size = 0
14
- subject.to_enum.to_a.should == []
15
- end
16
- it 'generates one thing if the min and max are equal' do
17
- subject.size = 1
18
- subject.to_enum.to_a.should == [0]
19
- end
20
-
21
- context 'dataflow method' do
22
- it 'is defined' do
23
- end
24
- it 'takes simplified args' do
25
- described_class.should_receive(:receive).with({:size => 99}).and_return(described_class.new)
26
- test_dataflow.integers(99)
27
- end
28
- end
29
- end
30
-
31
- describe Wukong::Widget::Gibberish do
32
- subject{ described_class.receive(:size => 3) }
33
- before{ subject.setup }
34
-
35
- it 'generates integers up to the given limit' do
36
- subject.rng = Random.new(8675309)
37
- subject.to_enum.to_a.should == ["loaiaeiaeo neidgfo heeume sptfmeec naet sttptlm waaaioh detov elrrltv nii ulcsnn", "set ensr poeleaa seqi tmnreoee boooral oczncgp deaia rykcoao leo rim mmibpbfii", "artrru sto quuu doo peoehrile nto esl tia gaili tuiooey lkedotp sts kaiy weeeia", "crhi tyiiae mieubmbooa teeae roi ednz taieh zaloy syhe ret kuoa deeeo xittipl mo"]
38
- end
39
-
40
- end
41
- end
1
+ # require 'spec_helper'
2
+ # require 'wukong/widget/gibberish'
3
+
4
+ # describe :sources, :helpers => true do
5
+
6
+ # describe Wukong::Source::Iter do
7
+ # subject{ described_class.new(obj: (9 .. 14), owner: test_dataflow) }
8
+ # it 'iterates over a given collection' do
9
+ # subject.to_enum.to_a.should == [9, 10, 11, 12, 13, 14]
10
+ # end
11
+ # context 'dataflow method' do
12
+ # it 'simplified args' do
13
+ # test_dataflow.iter(9 .. 14).should == subject
14
+ # end
15
+ # end
16
+ # end
17
+
18
+ # # describe Wukong::Source::FileSource do
19
+ # # let(:example_filename){ Pathname.path_to(:data, 'text/jabberwocky.txt') }
20
+ # # subject{ described_class.receive(filename: example_filename, owner: test_dataflow) }
21
+ # # before{ subject.setup }
22
+ # # it 'iterates over a given collection' do
23
+ # # subject.to_enum.to_a[6, 4].should == ["`Twas brillig, and the slithy toves", " Did gyre and gimble in the wabe:", "All mimsy were the borogoves,", " And the mome raths outgrabe.",]
24
+ # # end
25
+ # # context 'dataflow method' do
26
+ # # it 'simplified args' do
27
+ # # test_dataflow.file_source(example_filename).should == subject
28
+ # # end
29
+ # # end
30
+ # # end
31
+
32
+ # describe Wukong::Source::Integers do
33
+ # subject{ described_class.receive(qty: 10, owner: test_dataflow) }
34
+ # before{ subject.setup }
35
+
36
+ # it 'generates integers up to the given limit' do
37
+ # subject.to_enum.to_a.should == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
38
+ # end
39
+ # it 'generates nothing if the initial range is void' do
40
+ # subject.qty = 0
41
+ # subject.to_enum.to_a.should == []
42
+ # end
43
+ # it 'generates one thing if the min and max are equal' do
44
+ # subject.qty = 1
45
+ # subject.to_enum.to_a.should == [0]
46
+ # end
47
+
48
+ # context 'dataflow method' do
49
+ # it 'takes simplified args' do
50
+ # test_dataflow.integers(10).should == subject
51
+ # end
52
+ # end
53
+ # end
54
+
55
+ # describe Wukong::Widget::Gibberish do
56
+ # subject{ described_class.receive(:qty => 4) }
57
+ # before{ subject.setup }
58
+
59
+ # it 'generates integers up to the given limit' do
60
+ # subject.rng = Random.new(8675309)
61
+ # subject.to_enum.to_a.should == ["loaiaeiaeo neidgfo heeume sptfmeec naet sttptlm waaaioh detov elrrltv nii ulcsnn", "set ensr poeleaa seqi tmnreoee boooral oczncgp deaia rykcoao leo rim mmibpbfii", "artrru sto quuu doo peoehrile nto esl tia gaili tuiooey lkedotp sts kaiy weeeia", "crhi tyiiae mieubmbooa teeae roi ednz taieh zaloy syhe ret kuoa deeeo xittipl mo"]
62
+ # end
63
+
64
+ # end
65
+ # end
@@ -0,0 +1,10 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong do
4
+
5
+ it_behaves_like Hanuman::Shortcuts
6
+
7
+ it{ should respond_to(:processor) }
8
+ it{ should respond_to(:dataflow) }
9
+
10
+ end
@@ -2,12 +2,15 @@
2
2
  require File.expand_path('../lib/wukong/version', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |gem|
5
- gem.name = 'wukong'
6
- gem.version = Wukong::VERSION
7
- gem.authors = ['Philip (flip) Kromer', 'Travis Dempsey']
8
- gem.homepage = 'https://github.com/infochimps-labs/wukong'
9
- gem.summary = 'Hadoop Streaming for Ruby. Wukong makes Hadoop so easy a chimpanzee can use it, yet handles terabyte-scale computation with ease.'
10
- gem.description = <<DESC
5
+ gem.name = 'wukong'
6
+ gem.homepage = 'https://github.com/infochimps-labs/wukong'
7
+ gem.licenses = ["Apache 2.0"]
8
+ gem.email = 'coders@infochimps.org'
9
+ gem.authors = ['Infochimps', 'Philip (flip) Kromer', 'Travis Dempsey']
10
+ gem.version = Wukong::VERSION
11
+
12
+ gem.summary = 'Hadoop Streaming for Ruby. Wukong makes Hadoop so easy a chimpanzee can use it, yet handles terabyte-scale computation with ease.'
13
+ gem.description = <<-EOF
11
14
  Treat your dataset like a:
12
15
 
13
16
  * stream of lines when it's efficient to process by lines
@@ -15,21 +18,23 @@ Gem::Specification.new do |gem|
15
18
  * stream of lightweight objects when it's efficient to deal with objects
16
19
 
17
20
  Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.
18
- DESC
21
+ EOF
19
22
 
20
- gem.files = `git ls-files`.split("\n")
21
- gem.executables = [] # gem.files.grep(/^bin/).map{ |f| File.basename(f) }
23
+ gem.files = `git ls-files`.split("\n").reject { |path| path =~ /^(data|docpages|notes|old)/ }
24
+ gem.executables = ['wu-local']
22
25
  gem.test_files = gem.files.grep(/^spec/)
23
26
  gem.require_paths = ['lib']
24
27
 
25
- gem.add_dependency('bundler', '~> 1.1')
26
28
  gem.add_dependency('configliere', '~> 0.4')
27
29
  gem.add_dependency('multi_json', '>= 1.3.6')
28
30
  gem.add_dependency('vayacondios-client', '>= 0.0.3')
29
31
  gem.add_dependency('gorillib', '>= 0.4.2')
30
32
  gem.add_dependency('forgery')
31
33
  gem.add_dependency('uuidtools')
34
+ gem.add_dependency('eventmachine')
35
+ gem.add_dependency('log4r')
32
36
 
37
+ gem.add_development_dependency('bundler', '~> 1.1')
33
38
  gem.add_development_dependency('rake', '>= 0.9')
34
39
  gem.add_development_dependency('rspec', '>= 2.8')
35
40
  gem.add_development_dependency('guard', '>= 1.0')
@@ -38,5 +43,7 @@ DESC
38
43
  gem.add_development_dependency('pry')
39
44
  gem.add_development_dependency('yard')
40
45
  gem.add_development_dependency('redcarpet')
46
+ gem.add_development_dependency('addressable')
47
+ gem.add_development_dependency('htmlentities')
41
48
 
42
49
  end
metadata CHANGED
@@ -1,42 +1,37 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0.pre
4
+ version: 3.0.0.pre2
5
5
  prerelease: 6
6
6
  platform: ruby
7
7
  authors:
8
+ - Infochimps
8
9
  - Philip (flip) Kromer
9
10
  - Travis Dempsey
10
11
  autorequire:
11
12
  bindir: bin
12
13
  cert_chain: []
13
- date: 2012-09-20 00:00:00.000000000Z
14
+ date: 2012-12-01 00:00:00.000000000 Z
14
15
  dependencies:
15
16
  - !ruby/object:Gem::Dependency
16
- name: bundler
17
- requirement: &2162964540 !ruby/object:Gem::Requirement
17
+ name: configliere
18
+ requirement: !ruby/object:Gem::Requirement
18
19
  none: false
19
20
  requirements:
20
21
  - - ~>
21
22
  - !ruby/object:Gem::Version
22
- version: '1.1'
23
+ version: '0.4'
23
24
  type: :runtime
24
25
  prerelease: false
25
- version_requirements: *2162964540
26
- - !ruby/object:Gem::Dependency
27
- name: configliere
28
- requirement: &2162964020 !ruby/object:Gem::Requirement
26
+ version_requirements: !ruby/object:Gem::Requirement
29
27
  none: false
30
28
  requirements:
31
29
  - - ~>
32
30
  - !ruby/object:Gem::Version
33
31
  version: '0.4'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: *2162964020
37
32
  - !ruby/object:Gem::Dependency
38
33
  name: multi_json
39
- requirement: &2162963500 !ruby/object:Gem::Requirement
34
+ requirement: !ruby/object:Gem::Requirement
40
35
  none: false
41
36
  requirements:
42
37
  - - ! '>='
@@ -44,10 +39,15 @@ dependencies:
44
39
  version: 1.3.6
45
40
  type: :runtime
46
41
  prerelease: false
47
- version_requirements: *2162963500
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 1.3.6
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: vayacondios-client
50
- requirement: &2162962920 !ruby/object:Gem::Requirement
50
+ requirement: !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ! '>='
@@ -55,10 +55,15 @@ dependencies:
55
55
  version: 0.0.3
56
56
  type: :runtime
57
57
  prerelease: false
58
- version_requirements: *2162962920
58
+ version_requirements: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: 0.0.3
59
64
  - !ruby/object:Gem::Dependency
60
65
  name: gorillib
61
- requirement: &2162962320 !ruby/object:Gem::Requirement
66
+ requirement: !ruby/object:Gem::Requirement
62
67
  none: false
63
68
  requirements:
64
69
  - - ! '>='
@@ -66,10 +71,15 @@ dependencies:
66
71
  version: 0.4.2
67
72
  type: :runtime
68
73
  prerelease: false
69
- version_requirements: *2162962320
74
+ version_requirements: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ! '>='
78
+ - !ruby/object:Gem::Version
79
+ version: 0.4.2
70
80
  - !ruby/object:Gem::Dependency
71
81
  name: forgery
72
- requirement: &2162961820 !ruby/object:Gem::Requirement
82
+ requirement: !ruby/object:Gem::Requirement
73
83
  none: false
74
84
  requirements:
75
85
  - - ! '>='
@@ -77,10 +87,31 @@ dependencies:
77
87
  version: '0'
78
88
  type: :runtime
79
89
  prerelease: false
80
- version_requirements: *2162961820
90
+ version_requirements: !ruby/object:Gem::Requirement
91
+ none: false
92
+ requirements:
93
+ - - ! '>='
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
81
96
  - !ruby/object:Gem::Dependency
82
97
  name: uuidtools
83
- requirement: &2162961220 !ruby/object:Gem::Requirement
98
+ requirement: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ! '>='
110
+ - !ruby/object:Gem::Version
111
+ version: '0'
112
+ - !ruby/object:Gem::Dependency
113
+ name: eventmachine
114
+ requirement: !ruby/object:Gem::Requirement
84
115
  none: false
85
116
  requirements:
86
117
  - - ! '>='
@@ -88,10 +119,47 @@ dependencies:
88
119
  version: '0'
89
120
  type: :runtime
90
121
  prerelease: false
91
- version_requirements: *2162961220
122
+ version_requirements: !ruby/object:Gem::Requirement
123
+ none: false
124
+ requirements:
125
+ - - ! '>='
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ - !ruby/object:Gem::Dependency
129
+ name: log4r
130
+ requirement: !ruby/object:Gem::Requirement
131
+ none: false
132
+ requirements:
133
+ - - ! '>='
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ type: :runtime
137
+ prerelease: false
138
+ version_requirements: !ruby/object:Gem::Requirement
139
+ none: false
140
+ requirements:
141
+ - - ! '>='
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ - !ruby/object:Gem::Dependency
145
+ name: bundler
146
+ requirement: !ruby/object:Gem::Requirement
147
+ none: false
148
+ requirements:
149
+ - - ~>
150
+ - !ruby/object:Gem::Version
151
+ version: '1.1'
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ none: false
156
+ requirements:
157
+ - - ~>
158
+ - !ruby/object:Gem::Version
159
+ version: '1.1'
92
160
  - !ruby/object:Gem::Dependency
93
161
  name: rake
94
- requirement: &2162960580 !ruby/object:Gem::Requirement
162
+ requirement: !ruby/object:Gem::Requirement
95
163
  none: false
96
164
  requirements:
97
165
  - - ! '>='
@@ -99,10 +167,15 @@ dependencies:
99
167
  version: '0.9'
100
168
  type: :development
101
169
  prerelease: false
102
- version_requirements: *2162960580
170
+ version_requirements: !ruby/object:Gem::Requirement
171
+ none: false
172
+ requirements:
173
+ - - ! '>='
174
+ - !ruby/object:Gem::Version
175
+ version: '0.9'
103
176
  - !ruby/object:Gem::Dependency
104
177
  name: rspec
105
- requirement: &2162959980 !ruby/object:Gem::Requirement
178
+ requirement: !ruby/object:Gem::Requirement
106
179
  none: false
107
180
  requirements:
108
181
  - - ! '>='
@@ -110,10 +183,15 @@ dependencies:
110
183
  version: '2.8'
111
184
  type: :development
112
185
  prerelease: false
113
- version_requirements: *2162959980
186
+ version_requirements: !ruby/object:Gem::Requirement
187
+ none: false
188
+ requirements:
189
+ - - ! '>='
190
+ - !ruby/object:Gem::Version
191
+ version: '2.8'
114
192
  - !ruby/object:Gem::Dependency
115
193
  name: guard
116
- requirement: &2162959400 !ruby/object:Gem::Requirement
194
+ requirement: !ruby/object:Gem::Requirement
117
195
  none: false
118
196
  requirements:
119
197
  - - ! '>='
@@ -121,10 +199,15 @@ dependencies:
121
199
  version: '1.0'
122
200
  type: :development
123
201
  prerelease: false
124
- version_requirements: *2162959400
202
+ version_requirements: !ruby/object:Gem::Requirement
203
+ none: false
204
+ requirements:
205
+ - - ! '>='
206
+ - !ruby/object:Gem::Version
207
+ version: '1.0'
125
208
  - !ruby/object:Gem::Dependency
126
209
  name: guard-rspec
127
- requirement: &2162958780 !ruby/object:Gem::Requirement
210
+ requirement: !ruby/object:Gem::Requirement
128
211
  none: false
129
212
  requirements:
130
213
  - - ! '>='
@@ -132,10 +215,15 @@ dependencies:
132
215
  version: '0.6'
133
216
  type: :development
134
217
  prerelease: false
135
- version_requirements: *2162958780
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ none: false
220
+ requirements:
221
+ - - ! '>='
222
+ - !ruby/object:Gem::Version
223
+ version: '0.6'
136
224
  - !ruby/object:Gem::Dependency
137
225
  name: simplecov
138
- requirement: &2162958140 !ruby/object:Gem::Requirement
226
+ requirement: !ruby/object:Gem::Requirement
139
227
  none: false
140
228
  requirements:
141
229
  - - ! '>='
@@ -143,10 +231,15 @@ dependencies:
143
231
  version: '0.5'
144
232
  type: :development
145
233
  prerelease: false
146
- version_requirements: *2162958140
234
+ version_requirements: !ruby/object:Gem::Requirement
235
+ none: false
236
+ requirements:
237
+ - - ! '>='
238
+ - !ruby/object:Gem::Version
239
+ version: '0.5'
147
240
  - !ruby/object:Gem::Dependency
148
241
  name: pry
149
- requirement: &2162957700 !ruby/object:Gem::Requirement
242
+ requirement: !ruby/object:Gem::Requirement
150
243
  none: false
151
244
  requirements:
152
245
  - - ! '>='
@@ -154,10 +247,15 @@ dependencies:
154
247
  version: '0'
155
248
  type: :development
156
249
  prerelease: false
157
- version_requirements: *2162957700
250
+ version_requirements: !ruby/object:Gem::Requirement
251
+ none: false
252
+ requirements:
253
+ - - ! '>='
254
+ - !ruby/object:Gem::Version
255
+ version: '0'
158
256
  - !ruby/object:Gem::Dependency
159
257
  name: yard
160
- requirement: &2162957140 !ruby/object:Gem::Requirement
258
+ requirement: !ruby/object:Gem::Requirement
161
259
  none: false
162
260
  requirements:
163
261
  - - ! '>='
@@ -165,10 +263,47 @@ dependencies:
165
263
  version: '0'
166
264
  type: :development
167
265
  prerelease: false
168
- version_requirements: *2162957140
266
+ version_requirements: !ruby/object:Gem::Requirement
267
+ none: false
268
+ requirements:
269
+ - - ! '>='
270
+ - !ruby/object:Gem::Version
271
+ version: '0'
169
272
  - !ruby/object:Gem::Dependency
170
273
  name: redcarpet
171
- requirement: &2162956680 !ruby/object:Gem::Requirement
274
+ requirement: !ruby/object:Gem::Requirement
275
+ none: false
276
+ requirements:
277
+ - - ! '>='
278
+ - !ruby/object:Gem::Version
279
+ version: '0'
280
+ type: :development
281
+ prerelease: false
282
+ version_requirements: !ruby/object:Gem::Requirement
283
+ none: false
284
+ requirements:
285
+ - - ! '>='
286
+ - !ruby/object:Gem::Version
287
+ version: '0'
288
+ - !ruby/object:Gem::Dependency
289
+ name: addressable
290
+ requirement: !ruby/object:Gem::Requirement
291
+ none: false
292
+ requirements:
293
+ - - ! '>='
294
+ - !ruby/object:Gem::Version
295
+ version: '0'
296
+ type: :development
297
+ prerelease: false
298
+ version_requirements: !ruby/object:Gem::Requirement
299
+ none: false
300
+ requirements:
301
+ - - ! '>='
302
+ - !ruby/object:Gem::Version
303
+ version: '0'
304
+ - !ruby/object:Gem::Dependency
305
+ name: htmlentities
306
+ requirement: !ruby/object:Gem::Requirement
172
307
  none: false
173
308
  requirements:
174
309
  - - ! '>='
@@ -176,18 +311,23 @@ dependencies:
176
311
  version: '0'
177
312
  type: :development
178
313
  prerelease: false
179
- version_requirements: *2162956680
314
+ version_requirements: !ruby/object:Gem::Requirement
315
+ none: false
316
+ requirements:
317
+ - - ! '>='
318
+ - !ruby/object:Gem::Version
319
+ version: '0'
180
320
  description: ! " Treat your dataset like a:\n\n * stream of lines when it's
181
321
  efficient to process by lines\n * stream of field arrays when it's efficient
182
322
  to deal directly with fields\n * stream of lightweight objects when it's efficient
183
323
  to deal with objects\n\n Wukong is friends with Hadoop the elephant, Pig the query
184
324
  language, and the cat on your command line.\n"
185
- email:
186
- executables: []
325
+ email: coders@infochimps.org
326
+ executables:
327
+ - wu-local
187
328
  extensions: []
188
329
  extra_rdoc_files: []
189
330
  files:
190
- - .document
191
331
  - .gitignore
192
332
  - .gitmodules
193
333
  - .rspec
@@ -196,142 +336,137 @@ files:
196
336
  - CHANGELOG.md
197
337
  - Gemfile
198
338
  - Guardfile
199
- - LICENSE.textile
339
+ - LICENSE.md
200
340
  - NOTES-travis.md
341
+ - README-old.md
201
342
  - README.md
202
343
  - Rakefile
203
344
  - TODO.md
204
- - VERSION
205
345
  - bin/cutc
206
346
  - bin/cuttab
207
347
  - bin/greptrue
208
- - bin/hdp-bin
209
- - bin/hdp-bzip
210
- - bin/hdp-cat
211
- - bin/hdp-catd
212
- - bin/hdp-cp
213
- - bin/hdp-du
214
- - bin/hdp-get
215
- - bin/hdp-kill
216
- - bin/hdp-kill-task
217
- - bin/hdp-ls
218
- - bin/hdp-mkdir
219
- - bin/hdp-mkdirp
220
- - bin/hdp-mv
221
- - bin/hdp-parts_to_keys.rb
222
- - bin/hdp-ps
223
- - bin/hdp-put
224
- - bin/hdp-rm
225
- - bin/hdp-sort
226
- - bin/hdp-stream
227
- - bin/hdp-stream-flat
228
- - bin/hdp-stream2
229
- - bin/hdp-sync
230
- - bin/hdp-wc
231
348
  - bin/md5sort
232
349
  - bin/setcat
233
350
  - bin/tabchar
234
351
  - bin/uniq-ord
235
352
  - bin/uniqc
353
+ - bin/wu-clean-encoding
236
354
  - bin/wu-date
237
355
  - bin/wu-datetime
238
- - bin/wu-flow
239
356
  - bin/wu-hist
240
357
  - bin/wu-lign
241
- - bin/wu-map
358
+ - bin/wu-local
242
359
  - bin/wu-plus
243
- - bin/wu-red
360
+ - bin/wu-server
244
361
  - bin/wu-sum
245
- - bin/wukong
246
- - data/CREDITS.md
247
- - data/graph/airfares.tsv
248
- - data/log/sample_apache_log.log
249
- - data/text/gift_of_the_magi.txt
250
- - data/text/jabberwocky.txt
251
- - data/text/rectification_of_names.txt
252
- - data/twitter/a_atsigns_b.tsv
253
- - data/twitter/a_follows_b.tsv
254
- - data/twitter/tweet.tsv
255
- - data/twitter/twitter_user.tsv
256
- - data/wikipedia/dbpedia-sentences.tsv
257
- - docpages/INSTALL.textile
258
- - docpages/LICENSE.textile
259
- - docpages/README-elastic_map_reduce.textile
260
- - docpages/README-performance.textile
261
- - docpages/README-wulign.textile
262
- - docpages/UsingWukong-part1-get_ready.textile
263
- - docpages/UsingWukong-part2-ThinkingBigData.textile
264
- - docpages/UsingWukong-part3-parsing.textile
265
- - docpages/_config.yml
266
- - docpages/avro/avro_notes.textile
267
- - docpages/avro/performance.textile
268
- - docpages/avro/tethering.textile
269
- - docpages/bigdata-tips.textile
270
- - docpages/code/api_response_example.txt
271
- - docpages/code/parser_skeleton.rb
272
- - docpages/diagrams/MapReduceDiagram.graffle
273
- - docpages/favicon.ico
274
- - docpages/gem.css
275
- - docpages/hadoop-tips.textile
276
- - docpages/index.textile
277
- - docpages/intro.textile
278
- - docpages/moreinfo.textile
279
- - docpages/news.html
280
- - docpages/pig/PigLatinExpressionsList.txt
281
- - docpages/pig/PigLatinReferenceManual.txt
282
- - docpages/pig/commandline_params.txt
283
- - docpages/pig/cookbook.html
284
- - docpages/pig/images/hadoop-logo.jpg
285
- - docpages/pig/images/instruction_arrow.png
286
- - docpages/pig/images/pig-logo.gif
287
- - docpages/pig/piglatin_ref1.html
288
- - docpages/pig/piglatin_ref2.html
289
- - docpages/pig/setup.html
290
- - docpages/pig/skin/basic.css
291
- - docpages/pig/skin/breadcrumbs.js
292
- - docpages/pig/skin/fontsize.js
293
- - docpages/pig/skin/getBlank.js
294
- - docpages/pig/skin/getMenu.js
295
- - docpages/pig/skin/images/chapter.gif
296
- - docpages/pig/skin/images/chapter_open.gif
297
- - docpages/pig/skin/images/current.gif
298
- - docpages/pig/skin/images/external-link.gif
299
- - docpages/pig/skin/images/header_white_line.gif
300
- - docpages/pig/skin/images/page.gif
301
- - docpages/pig/skin/images/pdfdoc.gif
302
- - docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png
303
- - docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png
304
- - docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png
305
- - docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png
306
- - docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png
307
- - docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png
308
- - docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png
309
- - docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png
310
- - docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png
311
- - docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png
312
- - docpages/pig/skin/print.css
313
- - docpages/pig/skin/profile.css
314
- - docpages/pig/skin/screen.css
315
- - docpages/pig/tutorial.html
316
- - docpages/pig/udf.html
317
- - docpages/tutorial.textile
318
- - docpages/usage.textile
319
- - docpages/wutils.textile
320
- - examples/dataflow.rb
362
+ - examples/Gemfile
363
+ - examples/README.md
321
364
  - examples/dataflow/apache_log_line.rb
322
- - examples/dataflow/complex.rb
323
- - examples/dataflow/donuts.rb
365
+ - examples/dataflow/fibonacci_series.rb
324
366
  - examples/dataflow/parse_apache_logs.rb
325
367
  - examples/dataflow/pig_latinizer.rb
368
+ - examples/dataflow/scraper_macro_flow.rb
326
369
  - examples/dataflow/simple.rb
327
370
  - examples/dataflow/telegram.rb
328
371
  - examples/examples_helper.rb
372
+ - examples/geo.rb
373
+ - examples/geo/geo_grids.numbers
374
+ - examples/geo/geolocated.rb
375
+ - examples/geo/quadtile.rb
376
+ - examples/geo/spec/geolocated_spec.rb
377
+ - examples/geo/tile_fetcher.rb
329
378
  - examples/graph/minimum_spanning_tree.rb
330
379
  - examples/graph/union_find.rb
380
+ - examples/jabberwocky.txt
381
+ - examples/models/wikipedia.rb
382
+ - examples/munging/Gemfile
383
+ - examples/munging/airline_flights/airline.rb
384
+ - examples/munging/airline_flights/airline_flights.rake
385
+ - examples/munging/airline_flights/airplane.rb
386
+ - examples/munging/airline_flights/airport.rb
387
+ - examples/munging/airline_flights/airport_id_unification.rb
388
+ - examples/munging/airline_flights/airport_ok_chars.rb
389
+ - examples/munging/airline_flights/flight.rb
390
+ - examples/munging/airline_flights/models.rb
391
+ - examples/munging/airline_flights/parse.rb
392
+ - examples/munging/airline_flights/reconcile_airports.rb
393
+ - examples/munging/airline_flights/route.rb
394
+ - examples/munging/airline_flights/tasks.rake
395
+ - examples/munging/airline_flights/timezone_fixup.rb
396
+ - examples/munging/airline_flights/topcities.rb
397
+ - examples/munging/airports/40_wbans.txt
398
+ - examples/munging/airports/filter_weather_reports.rb
399
+ - examples/munging/airports/join.pig
400
+ - examples/munging/airports/to_tsv.rb
401
+ - examples/munging/airports/usa_wbans.pig
402
+ - examples/munging/airports/usa_wbans.txt
403
+ - examples/munging/airports/wbans.pig
404
+ - examples/munging/airports/wbans.txt
405
+ - examples/munging/geo/geo_json.rb
406
+ - examples/munging/geo/geo_models.rb
407
+ - examples/munging/geo/geonames_models.rb
408
+ - examples/munging/geo/iso_codes.rb
409
+ - examples/munging/geo/reconcile_countries.rb
410
+ - examples/munging/geo/tasks.rake
411
+ - examples/munging/rake_helper.rb
412
+ - examples/munging/weather/.gitignore
413
+ - examples/munging/weather/Gemfile
414
+ - examples/munging/weather/Rakefile
415
+ - examples/munging/weather/extract_ish.rb
416
+ - examples/munging/weather/models/weather.rb
417
+ - examples/munging/weather/utils/noaa_downloader.rb
418
+ - examples/munging/wikipedia/README.md
419
+ - examples/munging/wikipedia/Rakefile
420
+ - examples/munging/wikipedia/articles/extract_articles-parsed.rb
421
+ - examples/munging/wikipedia/articles/extract_articles-templated.rb
422
+ - examples/munging/wikipedia/articles/textualize_articles.rb
423
+ - examples/munging/wikipedia/articles/verify_structure.rb
424
+ - examples/munging/wikipedia/articles/wp2txt-LICENSE.txt
425
+ - examples/munging/wikipedia/articles/wp2txt_article.rb
426
+ - examples/munging/wikipedia/articles/wp2txt_utils.rb
427
+ - examples/munging/wikipedia/dbpedia/dbpedia_common.rb
428
+ - examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb
429
+ - examples/munging/wikipedia/dbpedia/extract_links.rb
430
+ - examples/munging/wikipedia/dbpedia/sameas_extractor.rb
431
+ - examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig
432
+ - examples/munging/wikipedia/page_metadata/extract_page_metadata.rb
433
+ - examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old
434
+ - examples/munging/wikipedia/pagelinks/augment_pagelinks.pig
435
+ - examples/munging/wikipedia/pagelinks/extract_pagelinks.rb
436
+ - examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old
437
+ - examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig
438
+ - examples/munging/wikipedia/pageviews/augment_pageviews.pig
439
+ - examples/munging/wikipedia/pageviews/extract_pageviews.rb
440
+ - examples/munging/wikipedia/pig_style_guide.md
441
+ - examples/munging/wikipedia/redirects/redirects_page_metadata.pig
442
+ - examples/munging/wikipedia/subuniverse/sub_articles.pig
443
+ - examples/munging/wikipedia/subuniverse/sub_page_metadata.pig
444
+ - examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig
445
+ - examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig
446
+ - examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig
447
+ - examples/munging/wikipedia/subuniverse/sub_pageviews.pig
448
+ - examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig
449
+ - examples/munging/wikipedia/utils/get_namespaces.rb
450
+ - examples/munging/wikipedia/utils/munging_utils.rb
451
+ - examples/munging/wikipedia/utils/namespaces.json
452
+ - examples/rake_helper.rb
453
+ - examples/server_logs/geo_ip_mapping/munge_geolite.rb
454
+ - examples/server_logs/logline.rb
455
+ - examples/server_logs/models.rb
456
+ - examples/server_logs/page_counts.pig
457
+ - examples/server_logs/server_logs-01-parse-script.rb
458
+ - examples/server_logs/server_logs-02-histograms-full.rb
459
+ - examples/server_logs/server_logs-02-histograms-mapper.rb
460
+ - examples/server_logs/server_logs-03-breadcrumbs-full.rb
461
+ - examples/server_logs/server_logs-04-page_page_edges-full.rb
462
+ - examples/string_reverser.rb
331
463
  - examples/text/latinize_text.rb
332
464
  - examples/text/pig_latin.rb
465
+ - examples/text/regional_flavor/README.md
466
+ - examples/text/regional_flavor/article_wordbags.pig
467
+ - examples/text/regional_flavor/j01-article_wordbags.rb
468
+ - examples/text/regional_flavor/simple_pig_script.pig
333
469
  - examples/tiny_count.rb
334
- - examples/tiny_count/jabberwocky_output.tsv
335
470
  - examples/twitter/locations.rb
336
471
  - examples/twitter/models.rb
337
472
  - examples/twitter/pt1-fiddle.pig
@@ -341,211 +476,99 @@ files:
341
476
  - examples/twitter/pt4-strong_links.rb
342
477
  - examples/twitter/pt5-lnglat_and_strong_links.pig
343
478
  - examples/twitter/states.tsv
344
- - examples/word_count.rb
479
+ - examples/word_count/accumulator.rb
480
+ - examples/word_count/tokenizer.rb
481
+ - examples/word_count/word_count.rb
482
+ - examples/workflow/cherry_pie.dot
345
483
  - examples/workflow/cherry_pie.md
484
+ - examples/workflow/cherry_pie.png
346
485
  - examples/workflow/cherry_pie.rb
347
- - examples/workflow/fiddle.rb
348
486
  - examples/workflow/package_gem.rb
349
- - lib/away/escapement.rb
350
- - lib/away/exe.rb
351
- - lib/away/experimental.rb
352
- - lib/away/from_file.rb
353
- - lib/away/job.rb
354
- - lib/away/job/rake_compat.rb
355
- - lib/away/registry.rb
356
- - lib/away/runner.rb
357
- - lib/away/runner/execute.rb
358
- - lib/away/script.rb
359
- - lib/away/script/hadoop_command.rb
360
- - lib/away/source/file_list_source.rb
361
- - lib/away/source/looper.rb
362
- - lib/away/task.rb
363
487
  - lib/hanuman.rb
364
- - lib/hanuman/action.rb
365
- - lib/hanuman/chain.rb
366
488
  - lib/hanuman/graph.rb
367
- - lib/hanuman/graphviz.rb
368
489
  - lib/hanuman/graphvizzer.rb
369
- - lib/hanuman/resource.rb
370
- - lib/hanuman/slot.rb
371
- - lib/hanuman/slottable.rb
490
+ - lib/hanuman/graphvizzer/gv_models.rb
491
+ - lib/hanuman/graphvizzer/gv_presenter.rb
492
+ - lib/hanuman/link.rb
493
+ - lib/hanuman/registry.rb
372
494
  - lib/hanuman/stage.rb
373
495
  - lib/wukong.rb
374
- - lib/wukong/bad_record.rb
496
+ - lib/wukong/boot.rb
497
+ - lib/wukong/configuration.rb
375
498
  - lib/wukong/dataflow.rb
376
- - lib/wukong/event.rb
377
- - lib/wukong/local_runner.rb
378
- - lib/wukong/mapred.rb
499
+ - lib/wukong/driver.rb
500
+ - lib/wukong/emitter.rb
379
501
  - lib/wukong/model/faker.rb
502
+ - lib/wukong/model/flatpack_parser/flat.rb
503
+ - lib/wukong/model/flatpack_parser/flatpack.rb
504
+ - lib/wukong/model/flatpack_parser/lang.rb
505
+ - lib/wukong/model/flatpack_parser/parser.rb
506
+ - lib/wukong/model/flatpack_parser/tokens.rb
380
507
  - lib/wukong/processor.rb
381
- - lib/wukong/settings.rb
382
- - lib/wukong/universe.rb
508
+ - lib/wukong/spec_helpers.rb
509
+ - lib/wukong/spec_helpers/integration_driver.rb
510
+ - lib/wukong/spec_helpers/integration_driver_matchers.rb
511
+ - lib/wukong/spec_helpers/processor_helpers.rb
512
+ - lib/wukong/spec_helpers/processor_methods.rb
513
+ - lib/wukong/spec_helpers/shared_examples.rb
514
+ - lib/wukong/spec_helpers/spec_driver.rb
515
+ - lib/wukong/spec_helpers/spec_driver_matchers.rb
383
516
  - lib/wukong/version.rb
384
- - lib/wukong/widget/filter.rb
385
- - lib/wukong/widget/gibberish.rb
386
- - lib/wukong/widget/monitor.rb
387
- - lib/wukong/widget/reducer.rb
517
+ - lib/wukong/widget/filters.rb
518
+ - lib/wukong/widget/processors.rb
519
+ - lib/wukong/widget/reducers.rb
520
+ - lib/wukong/widget/reducers/accumulator.rb
521
+ - lib/wukong/widget/reducers/bin.rb
522
+ - lib/wukong/widget/reducers/count.rb
523
+ - lib/wukong/widget/reducers/group.rb
524
+ - lib/wukong/widget/reducers/group_concat.rb
525
+ - lib/wukong/widget/reducers/moments.rb
526
+ - lib/wukong/widget/reducers/sort.rb
527
+ - lib/wukong/widget/serializers.rb
388
528
  - lib/wukong/widget/sink.rb
389
529
  - lib/wukong/widget/source.rb
390
- - lib/wukong/widget/stringifier.rb
391
- - lib/wukong/workflow.rb
392
- - lib/wukong/workflow/command.rb
393
- - old/config/emr-example.yaml
394
- - old/examples/README.txt
395
- - old/examples/contrib/jeans/README.markdown
396
- - old/examples/contrib/jeans/data/normalized_sizes
397
- - old/examples/contrib/jeans/data/orders.tsv
398
- - old/examples/contrib/jeans/data/sizes
399
- - old/examples/contrib/jeans/normalize.rb
400
- - old/examples/contrib/jeans/sizes.rb
401
- - old/examples/corpus/bnc_word_freq.rb
402
- - old/examples/corpus/bucket_counter.rb
403
- - old/examples/corpus/dbpedia_abstract_to_sentences.rb
404
- - old/examples/corpus/sentence_bigrams.rb
405
- - old/examples/corpus/sentence_coocurrence.rb
406
- - old/examples/corpus/stopwords.rb
407
- - old/examples/corpus/words_to_bigrams.rb
408
- - old/examples/emr/README.textile
409
- - old/examples/emr/dot_wukong_dir/credentials.json
410
- - old/examples/emr/dot_wukong_dir/emr.yaml
411
- - old/examples/emr/dot_wukong_dir/emr_bootstrap.sh
412
- - old/examples/emr/elastic_mapreduce_example.rb
413
- - old/examples/network_graph/adjacency_list.rb
414
- - old/examples/network_graph/breadth_first_search.rb
415
- - old/examples/network_graph/gen_2paths.rb
416
- - old/examples/network_graph/gen_multi_edge.rb
417
- - old/examples/network_graph/gen_symmetric_links.rb
418
- - old/examples/pagerank/README.textile
419
- - old/examples/pagerank/gen_initial_pagerank_graph.pig
420
- - old/examples/pagerank/pagerank.rb
421
- - old/examples/pagerank/pagerank_initialize.rb
422
- - old/examples/pagerank/run_pagerank.sh
423
- - old/examples/sample_records.rb
424
- - old/examples/server_logs/apache_log_parser.rb
425
- - old/examples/server_logs/breadcrumbs.rb
426
- - old/examples/server_logs/nook.rb
427
- - old/examples/server_logs/nook/faraday_dummy_adapter.rb
428
- - old/examples/server_logs/user_agent.rb
429
- - old/examples/simple_word_count.rb
430
- - old/examples/size.rb
431
- - old/examples/stats/avg_value_frequency.rb
432
- - old/examples/stats/binning_percentile_estimator.rb
433
- - old/examples/stats/data/avg_value_frequency.tsv
434
- - old/examples/stats/rank_and_bin.rb
435
- - old/examples/stupidly_simple_filter.rb
436
- - old/examples/word_count.rb
437
- - old/graph/graphviz_builder.rb
438
- - old/graph_easy/Attributes.pm
439
- - old/graph_easy/Graphviz.pm
440
- - old/wukong.rb
441
- - old/wukong/and_pig.rb
442
- - old/wukong/bad_record.rb
443
- - old/wukong/datatypes.rb
444
- - old/wukong/datatypes/enum.rb
445
- - old/wukong/datatypes/fake_types.rb
446
- - old/wukong/decorator.rb
447
- - old/wukong/encoding/asciize.rb
448
- - old/wukong/extensions.rb
449
- - old/wukong/extensions/array.rb
450
- - old/wukong/extensions/blank.rb
451
- - old/wukong/extensions/class.rb
452
- - old/wukong/extensions/date_time.rb
453
- - old/wukong/extensions/emittable.rb
454
- - old/wukong/extensions/enumerable.rb
455
- - old/wukong/extensions/hash.rb
456
- - old/wukong/extensions/hash_keys.rb
457
- - old/wukong/extensions/hash_like.rb
458
- - old/wukong/extensions/hashlike_class.rb
459
- - old/wukong/extensions/module.rb
460
- - old/wukong/extensions/pathname.rb
461
- - old/wukong/extensions/string.rb
462
- - old/wukong/extensions/struct.rb
463
- - old/wukong/extensions/symbol.rb
464
- - old/wukong/filename_pattern.rb
465
- - old/wukong/helper.rb
466
- - old/wukong/helper/stopwords.rb
467
- - old/wukong/helper/tokenize.rb
468
- - old/wukong/logger.rb
469
- - old/wukong/periodic_monitor.rb
470
- - old/wukong/schema.rb
471
- - old/wukong/script.rb
472
- - old/wukong/script/avro_command.rb
473
- - old/wukong/script/cassandra_loader_script.rb
474
- - old/wukong/script/emr_command.rb
475
- - old/wukong/script/hadoop_command.rb
476
- - old/wukong/script/local_command.rb
477
- - old/wukong/store.rb
478
- - old/wukong/store/base.rb
479
- - old/wukong/store/cassandra.rb
480
- - old/wukong/store/cassandra/streaming.rb
481
- - old/wukong/store/cassandra/struct_loader.rb
482
- - old/wukong/store/cassandra_model.rb
483
- - old/wukong/store/chh_chunked_flat_file_store.rb
484
- - old/wukong/store/chunked_flat_file_store.rb
485
- - old/wukong/store/conditional_store.rb
486
- - old/wukong/store/factory.rb
487
- - old/wukong/store/flat_file_store.rb
488
- - old/wukong/store/key_store.rb
489
- - old/wukong/store/null_store.rb
490
- - old/wukong/store/read_thru_store.rb
491
- - old/wukong/store/tokyo_tdb_key_store.rb
492
- - old/wukong/store/tyrant_rdb_key_store.rb
493
- - old/wukong/store/tyrant_tdb_key_store.rb
494
- - old/wukong/streamer.rb
495
- - old/wukong/streamer/accumulating_reducer.rb
496
- - old/wukong/streamer/base.rb
497
- - old/wukong/streamer/counting_reducer.rb
498
- - old/wukong/streamer/filter.rb
499
- - old/wukong/streamer/instance_streamer.rb
500
- - old/wukong/streamer/json_streamer.rb
501
- - old/wukong/streamer/line_streamer.rb
502
- - old/wukong/streamer/list_reducer.rb
503
- - old/wukong/streamer/rank_and_bin_reducer.rb
504
- - old/wukong/streamer/record_streamer.rb
505
- - old/wukong/streamer/reducer.rb
506
- - old/wukong/streamer/set_reducer.rb
507
- - old/wukong/streamer/struct_streamer.rb
508
- - old/wukong/streamer/summing_reducer.rb
509
- - old/wukong/streamer/uniq_by_last_reducer.rb
510
- - old/wukong/typed_struct.rb
511
- - spec/away/encoding_spec.rb
512
- - spec/away/exe_spec.rb
513
- - spec/away/flow_spec.rb
514
- - spec/away/graph_spec.rb
515
- - spec/away/job_spec.rb
516
- - spec/away/rake_compat_spec.rb
517
- - spec/away/script_spec.rb
530
+ - lib/wukong/widget/utils.rb
531
+ - lib/wukong/widgets.rb
532
+ - spec/examples/dataflow/fibonacci_series_spec.rb
518
533
  - spec/examples/dataflow/parse_apache_logs_spec.rb
519
534
  - spec/examples/dataflow/parsing_spec.rb
520
535
  - spec/examples/dataflow/simple_spec.rb
521
536
  - spec/examples/dataflow/telegram_spec.rb
522
537
  - spec/examples/graph/minimum_spanning_tree_spec.rb
538
+ - spec/examples/munging/airline_flights/identifiers_spec.rb
539
+ - spec/examples/munging/airline_flights_spec.rb
523
540
  - spec/examples/text/pig_latin_spec.rb
524
541
  - spec/examples/workflow/cherry_pie_spec.rb
525
542
  - spec/hanuman/graph_spec.rb
526
- - spec/hanuman/graphviz_spec.rb
527
- - spec/hanuman/slot_spec.rb
543
+ - spec/hanuman/hanuman_spec.rb
544
+ - spec/hanuman/registry_spec.rb
528
545
  - spec/hanuman/stage_spec.rb
529
546
  - spec/spec.opts
530
547
  - spec/spec_helper.rb
531
- - spec/support/examples_helper.rb
532
548
  - spec/support/hanuman_test_helpers.rb
533
- - spec/support/streamer_test_helpers.rb
549
+ - spec/support/shared_context_for_reducers.rb
550
+ - spec/support/shared_examples_for_builders.rb
551
+ - spec/support/shared_examples_for_shortcuts.rb
534
552
  - spec/support/wukong_test_helpers.rb
535
- - spec/support/wukong_widget_helpers.rb
536
553
  - spec/wukong/dataflow_spec.rb
537
554
  - spec/wukong/local_runner_spec.rb
538
555
  - spec/wukong/model/faker_spec.rb
539
- - spec/wukong/processor_spec.rb
540
556
  - spec/wukong/runner_spec.rb
541
- - spec/wukong/widget/filter_spec.rb
557
+ - spec/wukong/widget/filters_spec.rb
558
+ - spec/wukong/widget/processors_spec.rb
559
+ - spec/wukong/widget/reducers/bin_spec.rb
560
+ - spec/wukong/widget/reducers/count_spec.rb
561
+ - spec/wukong/widget/reducers/group_spec.rb
562
+ - spec/wukong/widget/reducers/moments_spec.rb
563
+ - spec/wukong/widget/reducers/sort_spec.rb
564
+ - spec/wukong/widget/serializers_spec.rb
542
565
  - spec/wukong/widget/sink_spec.rb
543
566
  - spec/wukong/widget/source_spec.rb
544
- - spec/wukong/widget/stringifier_spec.rb
545
- - spec/wukong/workflow/command_spec.rb
567
+ - spec/wukong/wukong_spec.rb
546
568
  - wukong.gemspec
547
569
  homepage: https://github.com/infochimps-labs/wukong
548
- licenses: []
570
+ licenses:
571
+ - Apache 2.0
549
572
  post_install_message:
550
573
  rdoc_options: []
551
574
  require_paths:
@@ -564,45 +587,46 @@ required_rubygems_version: !ruby/object:Gem::Requirement
564
587
  version: 1.3.1
565
588
  requirements: []
566
589
  rubyforge_project:
567
- rubygems_version: 1.8.15
590
+ rubygems_version: 1.8.23
568
591
  signing_key:
569
592
  specification_version: 3
570
593
  summary: Hadoop Streaming for Ruby. Wukong makes Hadoop so easy a chimpanzee can use
571
594
  it, yet handles terabyte-scale computation with ease.
572
595
  test_files:
573
- - spec/away/encoding_spec.rb
574
- - spec/away/exe_spec.rb
575
- - spec/away/flow_spec.rb
576
- - spec/away/graph_spec.rb
577
- - spec/away/job_spec.rb
578
- - spec/away/rake_compat_spec.rb
579
- - spec/away/script_spec.rb
596
+ - spec/examples/dataflow/fibonacci_series_spec.rb
580
597
  - spec/examples/dataflow/parse_apache_logs_spec.rb
581
598
  - spec/examples/dataflow/parsing_spec.rb
582
599
  - spec/examples/dataflow/simple_spec.rb
583
600
  - spec/examples/dataflow/telegram_spec.rb
584
601
  - spec/examples/graph/minimum_spanning_tree_spec.rb
602
+ - spec/examples/munging/airline_flights/identifiers_spec.rb
603
+ - spec/examples/munging/airline_flights_spec.rb
585
604
  - spec/examples/text/pig_latin_spec.rb
586
605
  - spec/examples/workflow/cherry_pie_spec.rb
587
606
  - spec/hanuman/graph_spec.rb
588
- - spec/hanuman/graphviz_spec.rb
589
- - spec/hanuman/slot_spec.rb
607
+ - spec/hanuman/hanuman_spec.rb
608
+ - spec/hanuman/registry_spec.rb
590
609
  - spec/hanuman/stage_spec.rb
591
610
  - spec/spec.opts
592
611
  - spec/spec_helper.rb
593
- - spec/support/examples_helper.rb
594
612
  - spec/support/hanuman_test_helpers.rb
595
- - spec/support/streamer_test_helpers.rb
613
+ - spec/support/shared_context_for_reducers.rb
614
+ - spec/support/shared_examples_for_builders.rb
615
+ - spec/support/shared_examples_for_shortcuts.rb
596
616
  - spec/support/wukong_test_helpers.rb
597
- - spec/support/wukong_widget_helpers.rb
598
617
  - spec/wukong/dataflow_spec.rb
599
618
  - spec/wukong/local_runner_spec.rb
600
619
  - spec/wukong/model/faker_spec.rb
601
- - spec/wukong/processor_spec.rb
602
620
  - spec/wukong/runner_spec.rb
603
- - spec/wukong/widget/filter_spec.rb
621
+ - spec/wukong/widget/filters_spec.rb
622
+ - spec/wukong/widget/processors_spec.rb
623
+ - spec/wukong/widget/reducers/bin_spec.rb
624
+ - spec/wukong/widget/reducers/count_spec.rb
625
+ - spec/wukong/widget/reducers/group_spec.rb
626
+ - spec/wukong/widget/reducers/moments_spec.rb
627
+ - spec/wukong/widget/reducers/sort_spec.rb
628
+ - spec/wukong/widget/serializers_spec.rb
604
629
  - spec/wukong/widget/sink_spec.rb
605
630
  - spec/wukong/widget/source_spec.rb
606
- - spec/wukong/widget/stringifier_spec.rb
607
- - spec/wukong/workflow/command_spec.rb
631
+ - spec/wukong/wukong_spec.rb
608
632
  has_rdoc: