wukong 3.0.0.pre → 3.0.0.pre2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (476) hide show
  1. data/.gitignore +46 -33
  2. data/.gitmodules +3 -0
  3. data/.rspec +1 -1
  4. data/.travis.yml +8 -1
  5. data/.yardopts +0 -13
  6. data/Guardfile +4 -6
  7. data/{LICENSE.textile → LICENSE.md} +43 -55
  8. data/README-old.md +422 -0
  9. data/README.md +279 -418
  10. data/Rakefile +21 -5
  11. data/TODO.md +6 -6
  12. data/bin/wu-clean-encoding +31 -0
  13. data/bin/wu-lign +2 -2
  14. data/bin/wu-local +69 -0
  15. data/bin/wu-server +70 -0
  16. data/examples/Gemfile +38 -0
  17. data/examples/README.md +9 -0
  18. data/examples/dataflow/apache_log_line.rb +64 -25
  19. data/examples/dataflow/fibonacci_series.rb +101 -0
  20. data/examples/dataflow/parse_apache_logs.rb +37 -7
  21. data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
  22. data/examples/dataflow/simple.rb +4 -4
  23. data/examples/geo.rb +4 -0
  24. data/examples/geo/geo_grids.numbers +0 -0
  25. data/examples/geo/geolocated.rb +331 -0
  26. data/examples/geo/quadtile.rb +69 -0
  27. data/examples/geo/spec/geolocated_spec.rb +247 -0
  28. data/examples/geo/tile_fetcher.rb +77 -0
  29. data/examples/graph/minimum_spanning_tree.rb +61 -61
  30. data/examples/jabberwocky.txt +36 -0
  31. data/examples/models/wikipedia.rb +20 -0
  32. data/examples/munging/Gemfile +8 -0
  33. data/examples/munging/airline_flights/airline.rb +57 -0
  34. data/examples/munging/airline_flights/airline_flights.rake +83 -0
  35. data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
  36. data/examples/munging/airline_flights/airport.rb +211 -0
  37. data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
  38. data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
  39. data/examples/munging/airline_flights/flight.rb +156 -0
  40. data/examples/munging/airline_flights/models.rb +4 -0
  41. data/examples/munging/airline_flights/parse.rb +26 -0
  42. data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
  43. data/examples/munging/airline_flights/route.rb +35 -0
  44. data/examples/munging/airline_flights/tasks.rake +83 -0
  45. data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
  46. data/examples/munging/airline_flights/topcities.rb +167 -0
  47. data/examples/munging/airports/40_wbans.txt +40 -0
  48. data/examples/munging/airports/filter_weather_reports.rb +37 -0
  49. data/examples/munging/airports/join.pig +31 -0
  50. data/examples/munging/airports/to_tsv.rb +33 -0
  51. data/examples/munging/airports/usa_wbans.pig +19 -0
  52. data/examples/munging/airports/usa_wbans.txt +2157 -0
  53. data/examples/munging/airports/wbans.pig +19 -0
  54. data/examples/munging/airports/wbans.txt +2310 -0
  55. data/examples/munging/geo/geo_json.rb +54 -0
  56. data/examples/munging/geo/geo_models.rb +69 -0
  57. data/examples/munging/geo/geonames_models.rb +78 -0
  58. data/examples/munging/geo/iso_codes.rb +172 -0
  59. data/examples/munging/geo/reconcile_countries.rb +124 -0
  60. data/examples/munging/geo/tasks.rake +71 -0
  61. data/examples/munging/rake_helper.rb +62 -0
  62. data/examples/munging/weather/.gitignore +1 -0
  63. data/examples/munging/weather/Gemfile +4 -0
  64. data/examples/munging/weather/Rakefile +28 -0
  65. data/examples/munging/weather/extract_ish.rb +13 -0
  66. data/examples/munging/weather/models/weather.rb +119 -0
  67. data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
  68. data/examples/munging/wikipedia/README.md +34 -0
  69. data/examples/munging/wikipedia/Rakefile +193 -0
  70. data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
  71. data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
  72. data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
  73. data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
  74. data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
  75. data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
  76. data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
  77. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
  78. data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
  79. data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
  80. data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
  81. data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
  82. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
  83. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
  84. data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
  85. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
  86. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
  87. data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
  88. data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
  89. data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
  90. data/examples/munging/wikipedia/pig_style_guide.md +25 -0
  91. data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
  92. data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
  93. data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
  94. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
  95. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
  96. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
  97. data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
  98. data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
  99. data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
  100. data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
  101. data/examples/munging/wikipedia/utils/namespaces.json +1 -0
  102. data/examples/rake_helper.rb +85 -0
  103. data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
  104. data/examples/server_logs/logline.rb +95 -0
  105. data/examples/server_logs/models.rb +66 -0
  106. data/examples/server_logs/page_counts.pig +48 -0
  107. data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
  108. data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
  109. data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
  110. data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
  111. data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
  112. data/examples/string_reverser.rb +26 -0
  113. data/examples/text/pig_latin.rb +2 -2
  114. data/examples/text/regional_flavor/README.md +14 -0
  115. data/examples/text/regional_flavor/article_wordbags.pig +39 -0
  116. data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
  117. data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
  118. data/examples/word_count/accumulator.rb +26 -0
  119. data/examples/word_count/tokenizer.rb +13 -0
  120. data/examples/word_count/word_count.rb +6 -0
  121. data/examples/workflow/cherry_pie.dot +97 -0
  122. data/examples/workflow/cherry_pie.png +0 -0
  123. data/examples/workflow/cherry_pie.rb +61 -26
  124. data/lib/hanuman.rb +34 -7
  125. data/lib/hanuman/graph.rb +55 -31
  126. data/lib/hanuman/graphvizzer.rb +199 -178
  127. data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
  128. data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
  129. data/lib/hanuman/link.rb +35 -0
  130. data/lib/hanuman/registry.rb +46 -0
  131. data/lib/hanuman/stage.rb +76 -32
  132. data/lib/wukong.rb +23 -24
  133. data/lib/wukong/boot.rb +87 -0
  134. data/lib/wukong/configuration.rb +8 -0
  135. data/lib/wukong/dataflow.rb +45 -78
  136. data/lib/wukong/driver.rb +99 -0
  137. data/lib/wukong/emitter.rb +22 -0
  138. data/lib/wukong/model/faker.rb +24 -24
  139. data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
  140. data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
  141. data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
  142. data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
  143. data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
  144. data/lib/wukong/processor.rb +60 -114
  145. data/lib/wukong/spec_helpers.rb +81 -0
  146. data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
  147. data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
  148. data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
  149. data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
  150. data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
  151. data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
  152. data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
  153. data/lib/wukong/version.rb +2 -1
  154. data/lib/wukong/widget/filters.rb +311 -0
  155. data/lib/wukong/widget/processors.rb +156 -0
  156. data/lib/wukong/widget/reducers.rb +7 -0
  157. data/lib/wukong/widget/reducers/accumulator.rb +73 -0
  158. data/lib/wukong/widget/reducers/bin.rb +318 -0
  159. data/lib/wukong/widget/reducers/count.rb +61 -0
  160. data/lib/wukong/widget/reducers/group.rb +85 -0
  161. data/lib/wukong/widget/reducers/group_concat.rb +70 -0
  162. data/lib/wukong/widget/reducers/moments.rb +72 -0
  163. data/lib/wukong/widget/reducers/sort.rb +130 -0
  164. data/lib/wukong/widget/serializers.rb +287 -0
  165. data/lib/wukong/widget/sink.rb +10 -52
  166. data/lib/wukong/widget/source.rb +7 -113
  167. data/lib/wukong/widget/utils.rb +46 -0
  168. data/lib/wukong/widgets.rb +6 -0
  169. data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
  170. data/spec/examples/dataflow/parsing_spec.rb +12 -11
  171. data/spec/examples/dataflow/simple_spec.rb +32 -6
  172. data/spec/examples/dataflow/telegram_spec.rb +36 -36
  173. data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
  174. data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
  175. data/spec/examples/munging/airline_flights_spec.rb +202 -0
  176. data/spec/examples/text/pig_latin_spec.rb +13 -16
  177. data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
  178. data/spec/hanuman/graph_spec.rb +27 -2
  179. data/spec/hanuman/hanuman_spec.rb +10 -0
  180. data/spec/hanuman/registry_spec.rb +123 -0
  181. data/spec/hanuman/stage_spec.rb +61 -7
  182. data/spec/spec_helper.rb +29 -19
  183. data/spec/support/hanuman_test_helpers.rb +14 -12
  184. data/spec/support/shared_context_for_reducers.rb +37 -0
  185. data/spec/support/shared_examples_for_builders.rb +101 -0
  186. data/spec/support/shared_examples_for_shortcuts.rb +57 -0
  187. data/spec/support/wukong_test_helpers.rb +37 -11
  188. data/spec/wukong/dataflow_spec.rb +77 -55
  189. data/spec/wukong/local_runner_spec.rb +24 -24
  190. data/spec/wukong/model/faker_spec.rb +132 -131
  191. data/spec/wukong/runner_spec.rb +8 -8
  192. data/spec/wukong/widget/filters_spec.rb +61 -0
  193. data/spec/wukong/widget/processors_spec.rb +126 -0
  194. data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
  195. data/spec/wukong/widget/reducers/count_spec.rb +11 -0
  196. data/spec/wukong/widget/reducers/group_spec.rb +20 -0
  197. data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
  198. data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
  199. data/spec/wukong/widget/serializers_spec.rb +92 -0
  200. data/spec/wukong/widget/sink_spec.rb +15 -15
  201. data/spec/wukong/widget/source_spec.rb +65 -41
  202. data/spec/wukong/wukong_spec.rb +10 -0
  203. data/wukong.gemspec +17 -10
  204. metadata +359 -335
  205. data/.document +0 -5
  206. data/VERSION +0 -1
  207. data/bin/hdp-bin +0 -44
  208. data/bin/hdp-bzip +0 -23
  209. data/bin/hdp-cat +0 -3
  210. data/bin/hdp-catd +0 -3
  211. data/bin/hdp-cp +0 -3
  212. data/bin/hdp-du +0 -86
  213. data/bin/hdp-get +0 -3
  214. data/bin/hdp-kill +0 -3
  215. data/bin/hdp-kill-task +0 -3
  216. data/bin/hdp-ls +0 -11
  217. data/bin/hdp-mkdir +0 -2
  218. data/bin/hdp-mkdirp +0 -12
  219. data/bin/hdp-mv +0 -3
  220. data/bin/hdp-parts_to_keys.rb +0 -77
  221. data/bin/hdp-ps +0 -3
  222. data/bin/hdp-put +0 -3
  223. data/bin/hdp-rm +0 -32
  224. data/bin/hdp-sort +0 -40
  225. data/bin/hdp-stream +0 -40
  226. data/bin/hdp-stream-flat +0 -22
  227. data/bin/hdp-stream2 +0 -39
  228. data/bin/hdp-sync +0 -17
  229. data/bin/hdp-wc +0 -67
  230. data/bin/wu-flow +0 -10
  231. data/bin/wu-map +0 -17
  232. data/bin/wu-red +0 -17
  233. data/bin/wukong +0 -17
  234. data/data/CREDITS.md +0 -355
  235. data/data/graph/airfares.tsv +0 -2174
  236. data/data/text/gift_of_the_magi.txt +0 -225
  237. data/data/text/jabberwocky.txt +0 -36
  238. data/data/text/rectification_of_names.txt +0 -33
  239. data/data/twitter/a_atsigns_b.tsv +0 -64
  240. data/data/twitter/a_follows_b.tsv +0 -53
  241. data/data/twitter/tweet.tsv +0 -167
  242. data/data/twitter/twitter_user.tsv +0 -55
  243. data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
  244. data/docpages/INSTALL.textile +0 -92
  245. data/docpages/LICENSE.textile +0 -107
  246. data/docpages/README-elastic_map_reduce.textile +0 -377
  247. data/docpages/README-performance.textile +0 -90
  248. data/docpages/README-wulign.textile +0 -65
  249. data/docpages/UsingWukong-part1-get_ready.textile +0 -17
  250. data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
  251. data/docpages/UsingWukong-part3-parsing.textile +0 -138
  252. data/docpages/_config.yml +0 -39
  253. data/docpages/avro/avro_notes.textile +0 -56
  254. data/docpages/avro/performance.textile +0 -36
  255. data/docpages/avro/tethering.textile +0 -19
  256. data/docpages/bigdata-tips.textile +0 -143
  257. data/docpages/code/api_response_example.txt +0 -20
  258. data/docpages/code/parser_skeleton.rb +0 -38
  259. data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
  260. data/docpages/favicon.ico +0 -0
  261. data/docpages/gem.css +0 -16
  262. data/docpages/hadoop-tips.textile +0 -83
  263. data/docpages/index.textile +0 -92
  264. data/docpages/intro.textile +0 -8
  265. data/docpages/moreinfo.textile +0 -174
  266. data/docpages/news.html +0 -24
  267. data/docpages/pig/PigLatinExpressionsList.txt +0 -122
  268. data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
  269. data/docpages/pig/commandline_params.txt +0 -26
  270. data/docpages/pig/cookbook.html +0 -481
  271. data/docpages/pig/images/hadoop-logo.jpg +0 -0
  272. data/docpages/pig/images/instruction_arrow.png +0 -0
  273. data/docpages/pig/images/pig-logo.gif +0 -0
  274. data/docpages/pig/piglatin_ref1.html +0 -1103
  275. data/docpages/pig/piglatin_ref2.html +0 -14340
  276. data/docpages/pig/setup.html +0 -505
  277. data/docpages/pig/skin/basic.css +0 -166
  278. data/docpages/pig/skin/breadcrumbs.js +0 -237
  279. data/docpages/pig/skin/fontsize.js +0 -166
  280. data/docpages/pig/skin/getBlank.js +0 -40
  281. data/docpages/pig/skin/getMenu.js +0 -45
  282. data/docpages/pig/skin/images/chapter.gif +0 -0
  283. data/docpages/pig/skin/images/chapter_open.gif +0 -0
  284. data/docpages/pig/skin/images/current.gif +0 -0
  285. data/docpages/pig/skin/images/external-link.gif +0 -0
  286. data/docpages/pig/skin/images/header_white_line.gif +0 -0
  287. data/docpages/pig/skin/images/page.gif +0 -0
  288. data/docpages/pig/skin/images/pdfdoc.gif +0 -0
  289. data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  290. data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  291. data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  292. data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  293. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  294. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  295. data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  296. data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  297. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  298. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  299. data/docpages/pig/skin/print.css +0 -54
  300. data/docpages/pig/skin/profile.css +0 -181
  301. data/docpages/pig/skin/screen.css +0 -587
  302. data/docpages/pig/tutorial.html +0 -1059
  303. data/docpages/pig/udf.html +0 -1509
  304. data/docpages/tutorial.textile +0 -283
  305. data/docpages/usage.textile +0 -195
  306. data/docpages/wutils.textile +0 -263
  307. data/examples/dataflow/complex.rb +0 -11
  308. data/examples/dataflow/donuts.rb +0 -13
  309. data/examples/tiny_count/jabberwocky_output.tsv +0 -92
  310. data/examples/word_count.rb +0 -48
  311. data/examples/workflow/fiddle.rb +0 -24
  312. data/lib/away/escapement.rb +0 -129
  313. data/lib/away/exe.rb +0 -11
  314. data/lib/away/experimental.rb +0 -5
  315. data/lib/away/from_file.rb +0 -52
  316. data/lib/away/job.rb +0 -56
  317. data/lib/away/job/rake_compat.rb +0 -17
  318. data/lib/away/registry.rb +0 -79
  319. data/lib/away/runner.rb +0 -276
  320. data/lib/away/runner/execute.rb +0 -121
  321. data/lib/away/script.rb +0 -161
  322. data/lib/away/script/hadoop_command.rb +0 -240
  323. data/lib/away/source/file_list_source.rb +0 -15
  324. data/lib/away/source/looper.rb +0 -18
  325. data/lib/away/task.rb +0 -219
  326. data/lib/hanuman/action.rb +0 -21
  327. data/lib/hanuman/chain.rb +0 -4
  328. data/lib/hanuman/graphviz.rb +0 -74
  329. data/lib/hanuman/resource.rb +0 -6
  330. data/lib/hanuman/slot.rb +0 -87
  331. data/lib/hanuman/slottable.rb +0 -220
  332. data/lib/wukong/bad_record.rb +0 -15
  333. data/lib/wukong/event.rb +0 -44
  334. data/lib/wukong/local_runner.rb +0 -55
  335. data/lib/wukong/mapred.rb +0 -3
  336. data/lib/wukong/universe.rb +0 -48
  337. data/lib/wukong/widget/filter.rb +0 -81
  338. data/lib/wukong/widget/gibberish.rb +0 -123
  339. data/lib/wukong/widget/monitor.rb +0 -26
  340. data/lib/wukong/widget/reducer.rb +0 -66
  341. data/lib/wukong/widget/stringifier.rb +0 -50
  342. data/lib/wukong/workflow.rb +0 -22
  343. data/lib/wukong/workflow/command.rb +0 -42
  344. data/old/config/emr-example.yaml +0 -48
  345. data/old/examples/README.txt +0 -17
  346. data/old/examples/contrib/jeans/README.markdown +0 -165
  347. data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
  348. data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
  349. data/old/examples/contrib/jeans/data/sizes +0 -3
  350. data/old/examples/contrib/jeans/normalize.rb +0 -20
  351. data/old/examples/contrib/jeans/sizes.rb +0 -55
  352. data/old/examples/corpus/bnc_word_freq.rb +0 -44
  353. data/old/examples/corpus/bucket_counter.rb +0 -47
  354. data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
  355. data/old/examples/corpus/sentence_bigrams.rb +0 -53
  356. data/old/examples/corpus/sentence_coocurrence.rb +0 -66
  357. data/old/examples/corpus/stopwords.rb +0 -138
  358. data/old/examples/corpus/words_to_bigrams.rb +0 -53
  359. data/old/examples/emr/README.textile +0 -110
  360. data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
  361. data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
  362. data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
  363. data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
  364. data/old/examples/network_graph/adjacency_list.rb +0 -74
  365. data/old/examples/network_graph/breadth_first_search.rb +0 -72
  366. data/old/examples/network_graph/gen_2paths.rb +0 -68
  367. data/old/examples/network_graph/gen_multi_edge.rb +0 -112
  368. data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
  369. data/old/examples/pagerank/README.textile +0 -6
  370. data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
  371. data/old/examples/pagerank/pagerank.rb +0 -72
  372. data/old/examples/pagerank/pagerank_initialize.rb +0 -42
  373. data/old/examples/pagerank/run_pagerank.sh +0 -21
  374. data/old/examples/sample_records.rb +0 -33
  375. data/old/examples/server_logs/apache_log_parser.rb +0 -15
  376. data/old/examples/server_logs/nook.rb +0 -48
  377. data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
  378. data/old/examples/server_logs/user_agent.rb +0 -40
  379. data/old/examples/simple_word_count.rb +0 -82
  380. data/old/examples/size.rb +0 -61
  381. data/old/examples/stats/avg_value_frequency.rb +0 -86
  382. data/old/examples/stats/binning_percentile_estimator.rb +0 -140
  383. data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
  384. data/old/examples/stats/rank_and_bin.rb +0 -173
  385. data/old/examples/stupidly_simple_filter.rb +0 -40
  386. data/old/examples/word_count.rb +0 -75
  387. data/old/graph/graphviz_builder.rb +0 -580
  388. data/old/graph_easy/Attributes.pm +0 -4181
  389. data/old/graph_easy/Graphviz.pm +0 -2232
  390. data/old/wukong.rb +0 -18
  391. data/old/wukong/and_pig.rb +0 -38
  392. data/old/wukong/bad_record.rb +0 -18
  393. data/old/wukong/datatypes.rb +0 -24
  394. data/old/wukong/datatypes/enum.rb +0 -127
  395. data/old/wukong/datatypes/fake_types.rb +0 -17
  396. data/old/wukong/decorator.rb +0 -28
  397. data/old/wukong/encoding/asciize.rb +0 -108
  398. data/old/wukong/extensions.rb +0 -16
  399. data/old/wukong/extensions/array.rb +0 -18
  400. data/old/wukong/extensions/blank.rb +0 -93
  401. data/old/wukong/extensions/class.rb +0 -189
  402. data/old/wukong/extensions/date_time.rb +0 -53
  403. data/old/wukong/extensions/emittable.rb +0 -69
  404. data/old/wukong/extensions/enumerable.rb +0 -79
  405. data/old/wukong/extensions/hash.rb +0 -167
  406. data/old/wukong/extensions/hash_keys.rb +0 -16
  407. data/old/wukong/extensions/hash_like.rb +0 -150
  408. data/old/wukong/extensions/hashlike_class.rb +0 -47
  409. data/old/wukong/extensions/module.rb +0 -2
  410. data/old/wukong/extensions/pathname.rb +0 -27
  411. data/old/wukong/extensions/string.rb +0 -65
  412. data/old/wukong/extensions/struct.rb +0 -17
  413. data/old/wukong/extensions/symbol.rb +0 -11
  414. data/old/wukong/filename_pattern.rb +0 -74
  415. data/old/wukong/helper.rb +0 -7
  416. data/old/wukong/helper/stopwords.rb +0 -195
  417. data/old/wukong/helper/tokenize.rb +0 -35
  418. data/old/wukong/logger.rb +0 -38
  419. data/old/wukong/periodic_monitor.rb +0 -72
  420. data/old/wukong/schema.rb +0 -269
  421. data/old/wukong/script.rb +0 -286
  422. data/old/wukong/script/avro_command.rb +0 -5
  423. data/old/wukong/script/cassandra_loader_script.rb +0 -40
  424. data/old/wukong/script/emr_command.rb +0 -168
  425. data/old/wukong/script/hadoop_command.rb +0 -237
  426. data/old/wukong/script/local_command.rb +0 -41
  427. data/old/wukong/store.rb +0 -10
  428. data/old/wukong/store/base.rb +0 -27
  429. data/old/wukong/store/cassandra.rb +0 -10
  430. data/old/wukong/store/cassandra/streaming.rb +0 -75
  431. data/old/wukong/store/cassandra/struct_loader.rb +0 -21
  432. data/old/wukong/store/cassandra_model.rb +0 -91
  433. data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
  434. data/old/wukong/store/chunked_flat_file_store.rb +0 -48
  435. data/old/wukong/store/conditional_store.rb +0 -57
  436. data/old/wukong/store/factory.rb +0 -8
  437. data/old/wukong/store/flat_file_store.rb +0 -89
  438. data/old/wukong/store/key_store.rb +0 -51
  439. data/old/wukong/store/null_store.rb +0 -15
  440. data/old/wukong/store/read_thru_store.rb +0 -22
  441. data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
  442. data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
  443. data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
  444. data/old/wukong/streamer.rb +0 -30
  445. data/old/wukong/streamer/accumulating_reducer.rb +0 -83
  446. data/old/wukong/streamer/base.rb +0 -126
  447. data/old/wukong/streamer/counting_reducer.rb +0 -25
  448. data/old/wukong/streamer/filter.rb +0 -20
  449. data/old/wukong/streamer/instance_streamer.rb +0 -15
  450. data/old/wukong/streamer/json_streamer.rb +0 -21
  451. data/old/wukong/streamer/line_streamer.rb +0 -12
  452. data/old/wukong/streamer/list_reducer.rb +0 -31
  453. data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
  454. data/old/wukong/streamer/record_streamer.rb +0 -14
  455. data/old/wukong/streamer/reducer.rb +0 -11
  456. data/old/wukong/streamer/set_reducer.rb +0 -14
  457. data/old/wukong/streamer/struct_streamer.rb +0 -48
  458. data/old/wukong/streamer/summing_reducer.rb +0 -29
  459. data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
  460. data/old/wukong/typed_struct.rb +0 -12
  461. data/spec/away/encoding_spec.rb +0 -32
  462. data/spec/away/exe_spec.rb +0 -20
  463. data/spec/away/flow_spec.rb +0 -82
  464. data/spec/away/graph_spec.rb +0 -6
  465. data/spec/away/job_spec.rb +0 -15
  466. data/spec/away/rake_compat_spec.rb +0 -9
  467. data/spec/away/script_spec.rb +0 -81
  468. data/spec/hanuman/graphviz_spec.rb +0 -29
  469. data/spec/hanuman/slot_spec.rb +0 -2
  470. data/spec/support/examples_helper.rb +0 -10
  471. data/spec/support/streamer_test_helpers.rb +0 -6
  472. data/spec/support/wukong_widget_helpers.rb +0 -66
  473. data/spec/wukong/processor_spec.rb +0 -109
  474. data/spec/wukong/widget/filter_spec.rb +0 -99
  475. data/spec/wukong/widget/stringifier_spec.rb +0 -51
  476. data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -1,15 +0,0 @@
1
- module Wukong
2
- class Source
3
-
4
- # A FileListSource is a collection of files
5
- #
6
- # FileLists are lazy. When given a list of glob patterns for possible files to be included in the file list, instead of searching the file structures to find the files, a FileList holds the pattern for latter use.
7
- #
8
- # This allows us to define a number of FileList to match any number of files, but only search out the actual files when then FileList itself is actually used. The key is that the first time an element of the FileList/Array is requested, the pending patterns are resolved into a real list of file names.
9
- #
10
- # @see_also http://rdoc.info/gems/rake/Rake/FileList
11
- #
12
- class FileListSource < Wukong::Source
13
- end
14
- end
15
- end
@@ -1,18 +0,0 @@
1
- module Wukong
2
- class Looper < Wukong::Source
3
-
4
- def each
5
- loop do
6
- yield generate
7
- end
8
- end
9
-
10
- end
11
-
12
- require 'forgery'
13
- class ForgeryLooper < Looper
14
- def generate
15
- Forgery.text(:sentence, 1)
16
- end
17
- end
18
- end
@@ -1,219 +0,0 @@
1
- module Wukong
2
-
3
- #
4
- #
5
- # action
6
- # only_if -- Only execute this task if the given block's result is truthy
7
- # not_if -- Do not execute this task if the given block's result is truthy
8
- #
9
- # ignore_failure -- If true, we will continue running the recipe if this resource fails for any reason. (defaults to false)
10
- # provider -- The class name of a provider to use for this resource.
11
- # retries -- Number of times to catch exceptions and retry the resource (defaults to 0). Requires Chef >= 0.10.4.
12
- # retry_delay -- Retry delay in seconds (defaults to 2). Requires Chef >= 0.10.4.
13
- # supports -- A hash of options that hint providers as to the capabilities of this resource.
14
- #
15
- module Task
16
-
17
- #
18
- # * `:nothing` -- do nothing - useful if you want to specify a resource, but only notify it of other actions.
19
- #
20
- # In the absence of another default action, `:nothing` is the default.
21
- #
22
- # @param [:delayed, :immediately] timing
23
- def trigger
24
- end
25
-
26
- def run
27
- # if dry_run? ; Log.warn "" ; return ; end
28
- # ...
29
- end
30
-
31
- # Notify another resource to take an action if this resource changes state for any reason.
32
- #
33
- # @example
34
- # notifies :action, "resource_type[resource_name]", :notification_timing
35
- def notifies ; end
36
- # Take action on this resource if another resource changes state. Works similarly to notifies, but the direction of the relationship is reversed.
37
- def subscribes ; end
38
-
39
- def depends(tasks)
40
- tasks = Array(tasks)
41
- end
42
-
43
- module ClassMethods
44
- end
45
- def self.included(base)
46
- base.send(:include, Wukong::Stage)
47
- base.extend(ClassMethods)
48
- end
49
- end
50
-
51
-
52
- module Task
53
- #
54
- #
55
- class DirectoryTask
56
- include Wukong::Task
57
- #
58
- define_action :create, :description => "Create this directory only if it does not exist. If it exists, do nothing"
59
- define_action :update, :description => "Update this directory, whether it exists or not"
60
- define_action :delete, :description => "Delete this directory, whether it exists or not"
61
- self.default_action = :create
62
- #
63
- field :path, String, :description => "The path to the directory; by default, the name"
64
- field :mode, String, :description => "The octal mode of the directory, e.g. '0755'. Numeric values are *not allowed* -- there's too much danger of saying '755' when you mean 'octal 0755'"
65
- field :recursive, :boolean, :description => "recursive=true to operate on parents and leaf, false to operate on leaf only", :default => false,
66
- :summary => %Q{- delete: remove the base directory and then recursively delete its parents until one is non-empty.
67
- - create: create recursively (ie, mkdir -p). Note: owner/group/mode only applies to the leaf directory, regardless of the value of this attribute.}
68
- end
69
-
70
- #
71
- class FileTask
72
- include Wukong::Task
73
- define_action :create, :description => "Create this file only if it does not exist. If it exists, do nothing"
74
- define_action :update, :description => "Update this file, whether it exists or not"
75
- define_action :delete, :description => "Delete this file, whether it exists or not"
76
- define_action :touch, :description => "Touch this file (update the mtime/atime). Will raise a CantTouchThisError if the filesystem does not support mtime/atimes, unless you specify the `:please_hammer => \"don't hurt em\"` option."
77
- self.default_action = :create
78
- #
79
- field :path, String, :description => "Path to the file; by default, the resource's name"
80
- field :mode, String, :description => "Octal mode of the file - e.g. '0755' default varies"
81
- # field :backup, String, :description => "How many backups of this file to keep. Set to false if you want no backups", :default => "5"
82
- end
83
-
84
- #
85
- # Creates a filesystem link, symbolic by default.
86
- #
87
- class LnTask
88
- include Wukong::Task
89
- define_action :create, :description => "Create this link only if it does not exist. If it exists, do nothing"
90
- define_action :update, :description => "Update this link, whether it exists or not"
91
- define_action :delete, :description => "Delete this link, whether it exists or not"
92
- self.default_action = :create
93
- #
94
- field :target_file, String, :description => "Path to the created link; by default, same as options[:name]"
95
- field :to, String, :description => "The real file you want to link to"
96
- field :link_type, Symbol, :description => "create a :symbolic or :hard link", :default => :symbolic
97
- end
98
-
99
- #
100
- # Create file from a given template:
101
- #
102
- class TemplateTask
103
- include Wukong::Task
104
- define_action :create, :description => "Create this file only if it does not exist. If it exists, do nothing"
105
- define_action :update, :description => "Update this file, whether it exists or not"
106
- define_action :delete, :description => "Delete this file, whether it exists or not"
107
- self.default_action = :create
108
- #
109
- field :path, String, :description => "Path to the file; by default, same as options[:name]"
110
- field :source, String, :description => "Template source file"
111
- field :variables, String, :description => "Variables to use in the template"
112
- #
113
- field :mode, String, :description => "Octal mode of the file - e.g. '0755' default varies"
114
- # field :backup, String, :description => "How many backups of this file to keep. Set to false if you want no backups", :default => 5
115
- end
116
-
117
- #
118
- # Create a file from a remote file
119
- #
120
- class RemoteFileTask
121
- include Wukong::Task
122
- define_action :create, :description => "Create this file only if it does not exist. If it exists, do nothing"
123
- define_action :update, :description => "Update this file, whether it exists or not"
124
- define_action :delete, :description => "Delete this file, whether it exists or not"
125
- self.default_action = :create
126
- #
127
- field :path, String, :description => "Path to the file; by default, same as options[:name]"
128
- field :mode, String, :description => "(optional) The octal mode of the file - e.g. '0755' default varies"
129
- # field :checksum, String, :description => "(optional) the SHA-256 checksum of the file--if the local file matches the checksum, Chef will not download it"
130
- # field :backup, String, :description => "How many backups of this file to keep. Set to false if you want no backups", :default => 5
131
- end
132
-
133
- #
134
- # Send an HTTP request
135
- #
136
- class HttpRequestTask
137
- include Wukong::Task
138
- define_action :request, :description => "Send request using the :method option"
139
- define_action :get, :description => "Send a GET request"
140
- define_action :put, :description => "Send a PUT request"
141
- define_action :patch, :description => "Send a PATCH request"
142
- define_action :post, :description => "Send a POST request"
143
- define_action :delete, :description => "Send a DELETE request"
144
- define_action :head, :description => "Send a HEAD request"
145
- define_action :options, :description => "Send an OPTIONS request"
146
- self.default_action = :get
147
- #
148
- field :url, String, :description => "The URL to send the request to"
149
- field :message, String, :description => "The message to be sent to the URL (as the message parameter)"
150
- field :headers, Hash, :description => "Hash of custom headers", :default => Hash.new
151
- field :method, String, :description => ""
152
- end
153
-
154
- #
155
- # run a command
156
- #
157
- class ExecuteTask
158
- include Wukong::Task
159
- define_action :run, :description => "runs the command"
160
- define_action :revert, :description => "reverse the effects of the primary command. If the `undo_command` field is unset, throws an error."
161
- #
162
- field :command, String, :description => "The command to execute"
163
- field :revert_cmd, String, :description => "Command to undo the effects of the primary command"
164
- field :code, String, :description => "Quoted script of code to execute"
165
- field :interpreter, String, :description => "Script interpreter to use for code execution"
166
- field :flags, [String, Hash], :description => "command line flags to pass to the interpreter when invoking. If a Hash, will be turned into `--key 'value'` pairs; all keys must be symbols or strings and all values must be strings"
167
- field :creates, String, :description => "A file this command creates - if the file exists, the command will not be run"
168
- field :cwd, String, :description => "Current working directory to run the command from"
169
- field :environment, String, :description => "A hash of environment variables to set before running this command"
170
- field :returns, Integer, :description => "The return value of the command (may be an array of accepted values) - this resource raises an exception if the return value(s) do not match", :default => 0
171
- field :timeout, Integer, :description => "How many seconds to let the command run before timing it out", :default => 3600
172
- field :umask, String, :description => "Umask for files created by the command"
173
- end
174
-
175
- #
176
- # schedule a (job? task?)
177
- #
178
- class ScheduleTask
179
- include Wukong::Task
180
- define_action :create, :description => "Create this scheduled task only if it does not exist. If it exists, do nothing"
181
- define_action :update, :description => "Update this scheduled task, whether it exists or not"
182
- define_action :delete, :description => "Delete this scheduled task, whether it exists or not"
183
- self.default_action = :create
184
- #
185
- field :minute, Integer, :description => "The minute this entry should run (0 - 59)"
186
- field :hour, Integer, :description => "The hour this entry should run (0 - 23)"
187
- field :weekday, Integer, :description => "The weekday this entry should run (0 - 6) (Sunday=0)"
188
- field :day, Integer, :description => "The day of month this entry should run (1 - 31)"
189
- field :month, Integer, :description => "The month this entry should run (1 - 12)"
190
- end
191
-
192
- # start a longrunning service in a new process
193
- class SpawnTask
194
- include Wukong::Task
195
- end
196
-
197
- #
198
- # The incoming_name accepts
199
- #
200
- # * a string
201
- # * a regexp
202
- #
203
- # The target_name accepts
204
- #
205
- # * a string
206
- # * a `Proc` to mangle the name
207
- #
208
- # @example
209
- #
210
- # rule (/part-[mr]-\d+/ => lambda{|name| rename_part(name) }) do |r|
211
- # # ...
212
- # end
213
- class RuleTask
214
- include Wukong::Task
215
- end
216
- end
217
-
218
-
219
- end
@@ -1,21 +0,0 @@
1
- module Hanuman
2
-
3
- class Action < Stage
4
- def self.register_action(meth_name=nil, &block)
5
- meth_name ||= handle ; klass = self
6
- Hanuman::Graph.send(:define_method, meth_name) do |*args, &block|
7
- begin
8
- klass.make(workflow=self, *args, &block)
9
- rescue StandardError => err ; err.polish("adding #{meth_name} to #{self.name} on #{args}") rescue nil ; raise ; end
10
- end
11
- end
12
-
13
- def self.make(workflow, *args, &block)
14
- stage = receive(*args)
15
- workflow.add_stage stage
16
- stage.receive!(&block)
17
- stage
18
- end
19
- end
20
-
21
- end
@@ -1,4 +0,0 @@
1
- module Hanuman
2
- class Chain < Graph
3
- end
4
- end
@@ -1,74 +0,0 @@
1
- module Hanuman
2
-
3
- Stage.class_eval do
4
- class_attribute :draw_shape
5
- self.draw_shape = :record
6
-
7
- def to_graphviz(gv)
8
- gv.node(self.fullname,
9
- :label => name,
10
- :shape => draw_shape)
11
- # inputs.to_a.each do |input|
12
- # gv.edge(input.fullname, self.fullname)
13
- # end
14
- end
15
- end
16
-
17
- Slottable.module_eval do
18
- def to_graphviz(gv, draw_edges=true)
19
- gv.node(self.fullname,
20
- :label => name,
21
- :inslots => inslots.to_a.map{|slot| slot.name},
22
- :outslots => outslots.to_a.map{|slot| slot.name},
23
- :shape => draw_shape
24
- )
25
- # inslots.to_a.each do |inslot|
26
- # next unless inslot.input?
27
- # gv.edge(inslot.input.fullname, inslot.fullname)
28
- # end
29
- end
30
- end
31
-
32
- InputSlot.class_eval do
33
- def fullname
34
- %Q{"#{stage.fullname}":#{name}}
35
- end
36
- end
37
-
38
- OutputSlot.class_eval do
39
- def fullname
40
- %Q{"#{stage.fullname}":out_#{name}}
41
- end
42
- end
43
-
44
- Resource.class_eval do
45
- self.draw_shape = :Mrecord
46
- end
47
-
48
- class Graph < Action
49
- self.draw_shape = :record
50
- def to_graphviz(gv)
51
- gv.graph(fullname, :label => name) do |gv2|
52
- stages.each_value{|stage| stage.to_graphviz(gv2) }
53
- edges.each_pair do |from, into|
54
- gv2.edge(from.fullname, into.fullname)
55
- end
56
- end
57
- super(gv)
58
- end
59
- end
60
-
61
- module ::Wukong::Universe
62
- def to_graphviz
63
- gv = Hanuman::Graphvizzer::Universe.new(:name => self.name)
64
- @workflows.each do |_, workflow|
65
- workflow.to_graphviz(gv)
66
- end
67
- @dataflows.each do |_, dataflow|
68
- dataflow.to_graphviz(gv)
69
- end
70
- gv
71
- end
72
- end
73
-
74
- end
@@ -1,6 +0,0 @@
1
- module Hanuman
2
- class Resource < Stage
3
- include Hanuman::IsOwnInputSlot
4
- include Hanuman::IsOwnOutputSlot
5
- end
6
- end
@@ -1,87 +0,0 @@
1
- module Hanuman
2
-
3
- #
4
- # Provides the methods required in order to accept inbound links.
5
- # Including class must provide the input attribute and the owner method.
6
- #
7
- # @see IsOwnInputSlot
8
- # @see Slottable
9
- module Inlinkable
10
- extend Gorillib::Concern
11
-
12
- def set_input(stage)
13
- write_attribute(:input, stage)
14
- self
15
- end
16
-
17
- # wire another slot into this one
18
- # @param other [Hanuman::Outlinkable] the other stage of slot
19
- # @returns this object, for chaining
20
- def <<(other)
21
- from(other)
22
- self
23
- end
24
-
25
- # wire another slot into this one
26
- # @param other [Hanuman::Outlinkable] the other stage or slot
27
- # @returns this object, for chaining
28
- def from(other)
29
- owner.connect(other, self)
30
- self
31
- end
32
- end
33
-
34
- #
35
- # Provides the methods required in order to accept outbound links.
36
- # Including class must provide the output attribute and the owner method.
37
- #
38
- # @see IsOwnOutputSlot
39
- # @see Slottable
40
- module Outlinkable
41
- extend Gorillib::Concern
42
-
43
- def set_output(stage)
44
- write_attribute(:output, stage)
45
- self
46
- end
47
-
48
- # wire this slot into another slot
49
- # @param other [Hanuman::Slot] the other stage
50
- # @returns the other slot
51
- def >(other)
52
- _, other = owner.connect(self, other)
53
- other
54
- end
55
-
56
- # wire this stage's output into another stage's input
57
- # @param other [Hanuman::Stage]the other stage
58
- # @returns this stage, for chaining
59
- def into(other)
60
- owner.connect(self, other)
61
- self
62
- end
63
- end
64
-
65
- class Slot
66
- include Gorillib::Builder
67
- field :name, Symbol
68
- field :stage, Hanuman::Stage
69
- def owner
70
- stage.owner
71
- end
72
- def to_key() name ; end
73
- end
74
-
75
- class InputSlot < Slot
76
- include Hanuman::Inlinkable
77
- magic :input, Hanuman::Stage, :writer => false, :tester => true, :doc => 'stage/slot in graph that feeds into this one'
78
- def other() input ; end
79
- end
80
-
81
- class OutputSlot < Slot
82
- include Hanuman::Outlinkable
83
- magic :output, Hanuman::Stage, :writer => false, :tester => true, :doc => 'stage/slot in graph this one feeds into'
84
- def other() ouput ; end
85
- end
86
-
87
- end