wukong 3.0.0.pre → 3.0.0.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (476) hide show
  1. data/.gitignore +46 -33
  2. data/.gitmodules +3 -0
  3. data/.rspec +1 -1
  4. data/.travis.yml +8 -1
  5. data/.yardopts +0 -13
  6. data/Guardfile +4 -6
  7. data/{LICENSE.textile → LICENSE.md} +43 -55
  8. data/README-old.md +422 -0
  9. data/README.md +279 -418
  10. data/Rakefile +21 -5
  11. data/TODO.md +6 -6
  12. data/bin/wu-clean-encoding +31 -0
  13. data/bin/wu-lign +2 -2
  14. data/bin/wu-local +69 -0
  15. data/bin/wu-server +70 -0
  16. data/examples/Gemfile +38 -0
  17. data/examples/README.md +9 -0
  18. data/examples/dataflow/apache_log_line.rb +64 -25
  19. data/examples/dataflow/fibonacci_series.rb +101 -0
  20. data/examples/dataflow/parse_apache_logs.rb +37 -7
  21. data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
  22. data/examples/dataflow/simple.rb +4 -4
  23. data/examples/geo.rb +4 -0
  24. data/examples/geo/geo_grids.numbers +0 -0
  25. data/examples/geo/geolocated.rb +331 -0
  26. data/examples/geo/quadtile.rb +69 -0
  27. data/examples/geo/spec/geolocated_spec.rb +247 -0
  28. data/examples/geo/tile_fetcher.rb +77 -0
  29. data/examples/graph/minimum_spanning_tree.rb +61 -61
  30. data/examples/jabberwocky.txt +36 -0
  31. data/examples/models/wikipedia.rb +20 -0
  32. data/examples/munging/Gemfile +8 -0
  33. data/examples/munging/airline_flights/airline.rb +57 -0
  34. data/examples/munging/airline_flights/airline_flights.rake +83 -0
  35. data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
  36. data/examples/munging/airline_flights/airport.rb +211 -0
  37. data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
  38. data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
  39. data/examples/munging/airline_flights/flight.rb +156 -0
  40. data/examples/munging/airline_flights/models.rb +4 -0
  41. data/examples/munging/airline_flights/parse.rb +26 -0
  42. data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
  43. data/examples/munging/airline_flights/route.rb +35 -0
  44. data/examples/munging/airline_flights/tasks.rake +83 -0
  45. data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
  46. data/examples/munging/airline_flights/topcities.rb +167 -0
  47. data/examples/munging/airports/40_wbans.txt +40 -0
  48. data/examples/munging/airports/filter_weather_reports.rb +37 -0
  49. data/examples/munging/airports/join.pig +31 -0
  50. data/examples/munging/airports/to_tsv.rb +33 -0
  51. data/examples/munging/airports/usa_wbans.pig +19 -0
  52. data/examples/munging/airports/usa_wbans.txt +2157 -0
  53. data/examples/munging/airports/wbans.pig +19 -0
  54. data/examples/munging/airports/wbans.txt +2310 -0
  55. data/examples/munging/geo/geo_json.rb +54 -0
  56. data/examples/munging/geo/geo_models.rb +69 -0
  57. data/examples/munging/geo/geonames_models.rb +78 -0
  58. data/examples/munging/geo/iso_codes.rb +172 -0
  59. data/examples/munging/geo/reconcile_countries.rb +124 -0
  60. data/examples/munging/geo/tasks.rake +71 -0
  61. data/examples/munging/rake_helper.rb +62 -0
  62. data/examples/munging/weather/.gitignore +1 -0
  63. data/examples/munging/weather/Gemfile +4 -0
  64. data/examples/munging/weather/Rakefile +28 -0
  65. data/examples/munging/weather/extract_ish.rb +13 -0
  66. data/examples/munging/weather/models/weather.rb +119 -0
  67. data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
  68. data/examples/munging/wikipedia/README.md +34 -0
  69. data/examples/munging/wikipedia/Rakefile +193 -0
  70. data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
  71. data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
  72. data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
  73. data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
  74. data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
  75. data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
  76. data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
  77. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
  78. data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
  79. data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
  80. data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
  81. data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
  82. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
  83. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
  84. data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
  85. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
  86. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
  87. data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
  88. data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
  89. data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
  90. data/examples/munging/wikipedia/pig_style_guide.md +25 -0
  91. data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
  92. data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
  93. data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
  94. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
  95. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
  96. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
  97. data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
  98. data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
  99. data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
  100. data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
  101. data/examples/munging/wikipedia/utils/namespaces.json +1 -0
  102. data/examples/rake_helper.rb +85 -0
  103. data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
  104. data/examples/server_logs/logline.rb +95 -0
  105. data/examples/server_logs/models.rb +66 -0
  106. data/examples/server_logs/page_counts.pig +48 -0
  107. data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
  108. data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
  109. data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
  110. data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
  111. data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
  112. data/examples/string_reverser.rb +26 -0
  113. data/examples/text/pig_latin.rb +2 -2
  114. data/examples/text/regional_flavor/README.md +14 -0
  115. data/examples/text/regional_flavor/article_wordbags.pig +39 -0
  116. data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
  117. data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
  118. data/examples/word_count/accumulator.rb +26 -0
  119. data/examples/word_count/tokenizer.rb +13 -0
  120. data/examples/word_count/word_count.rb +6 -0
  121. data/examples/workflow/cherry_pie.dot +97 -0
  122. data/examples/workflow/cherry_pie.png +0 -0
  123. data/examples/workflow/cherry_pie.rb +61 -26
  124. data/lib/hanuman.rb +34 -7
  125. data/lib/hanuman/graph.rb +55 -31
  126. data/lib/hanuman/graphvizzer.rb +199 -178
  127. data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
  128. data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
  129. data/lib/hanuman/link.rb +35 -0
  130. data/lib/hanuman/registry.rb +46 -0
  131. data/lib/hanuman/stage.rb +76 -32
  132. data/lib/wukong.rb +23 -24
  133. data/lib/wukong/boot.rb +87 -0
  134. data/lib/wukong/configuration.rb +8 -0
  135. data/lib/wukong/dataflow.rb +45 -78
  136. data/lib/wukong/driver.rb +99 -0
  137. data/lib/wukong/emitter.rb +22 -0
  138. data/lib/wukong/model/faker.rb +24 -24
  139. data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
  140. data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
  141. data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
  142. data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
  143. data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
  144. data/lib/wukong/processor.rb +60 -114
  145. data/lib/wukong/spec_helpers.rb +81 -0
  146. data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
  147. data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
  148. data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
  149. data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
  150. data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
  151. data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
  152. data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
  153. data/lib/wukong/version.rb +2 -1
  154. data/lib/wukong/widget/filters.rb +311 -0
  155. data/lib/wukong/widget/processors.rb +156 -0
  156. data/lib/wukong/widget/reducers.rb +7 -0
  157. data/lib/wukong/widget/reducers/accumulator.rb +73 -0
  158. data/lib/wukong/widget/reducers/bin.rb +318 -0
  159. data/lib/wukong/widget/reducers/count.rb +61 -0
  160. data/lib/wukong/widget/reducers/group.rb +85 -0
  161. data/lib/wukong/widget/reducers/group_concat.rb +70 -0
  162. data/lib/wukong/widget/reducers/moments.rb +72 -0
  163. data/lib/wukong/widget/reducers/sort.rb +130 -0
  164. data/lib/wukong/widget/serializers.rb +287 -0
  165. data/lib/wukong/widget/sink.rb +10 -52
  166. data/lib/wukong/widget/source.rb +7 -113
  167. data/lib/wukong/widget/utils.rb +46 -0
  168. data/lib/wukong/widgets.rb +6 -0
  169. data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
  170. data/spec/examples/dataflow/parsing_spec.rb +12 -11
  171. data/spec/examples/dataflow/simple_spec.rb +32 -6
  172. data/spec/examples/dataflow/telegram_spec.rb +36 -36
  173. data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
  174. data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
  175. data/spec/examples/munging/airline_flights_spec.rb +202 -0
  176. data/spec/examples/text/pig_latin_spec.rb +13 -16
  177. data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
  178. data/spec/hanuman/graph_spec.rb +27 -2
  179. data/spec/hanuman/hanuman_spec.rb +10 -0
  180. data/spec/hanuman/registry_spec.rb +123 -0
  181. data/spec/hanuman/stage_spec.rb +61 -7
  182. data/spec/spec_helper.rb +29 -19
  183. data/spec/support/hanuman_test_helpers.rb +14 -12
  184. data/spec/support/shared_context_for_reducers.rb +37 -0
  185. data/spec/support/shared_examples_for_builders.rb +101 -0
  186. data/spec/support/shared_examples_for_shortcuts.rb +57 -0
  187. data/spec/support/wukong_test_helpers.rb +37 -11
  188. data/spec/wukong/dataflow_spec.rb +77 -55
  189. data/spec/wukong/local_runner_spec.rb +24 -24
  190. data/spec/wukong/model/faker_spec.rb +132 -131
  191. data/spec/wukong/runner_spec.rb +8 -8
  192. data/spec/wukong/widget/filters_spec.rb +61 -0
  193. data/spec/wukong/widget/processors_spec.rb +126 -0
  194. data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
  195. data/spec/wukong/widget/reducers/count_spec.rb +11 -0
  196. data/spec/wukong/widget/reducers/group_spec.rb +20 -0
  197. data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
  198. data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
  199. data/spec/wukong/widget/serializers_spec.rb +92 -0
  200. data/spec/wukong/widget/sink_spec.rb +15 -15
  201. data/spec/wukong/widget/source_spec.rb +65 -41
  202. data/spec/wukong/wukong_spec.rb +10 -0
  203. data/wukong.gemspec +17 -10
  204. metadata +359 -335
  205. data/.document +0 -5
  206. data/VERSION +0 -1
  207. data/bin/hdp-bin +0 -44
  208. data/bin/hdp-bzip +0 -23
  209. data/bin/hdp-cat +0 -3
  210. data/bin/hdp-catd +0 -3
  211. data/bin/hdp-cp +0 -3
  212. data/bin/hdp-du +0 -86
  213. data/bin/hdp-get +0 -3
  214. data/bin/hdp-kill +0 -3
  215. data/bin/hdp-kill-task +0 -3
  216. data/bin/hdp-ls +0 -11
  217. data/bin/hdp-mkdir +0 -2
  218. data/bin/hdp-mkdirp +0 -12
  219. data/bin/hdp-mv +0 -3
  220. data/bin/hdp-parts_to_keys.rb +0 -77
  221. data/bin/hdp-ps +0 -3
  222. data/bin/hdp-put +0 -3
  223. data/bin/hdp-rm +0 -32
  224. data/bin/hdp-sort +0 -40
  225. data/bin/hdp-stream +0 -40
  226. data/bin/hdp-stream-flat +0 -22
  227. data/bin/hdp-stream2 +0 -39
  228. data/bin/hdp-sync +0 -17
  229. data/bin/hdp-wc +0 -67
  230. data/bin/wu-flow +0 -10
  231. data/bin/wu-map +0 -17
  232. data/bin/wu-red +0 -17
  233. data/bin/wukong +0 -17
  234. data/data/CREDITS.md +0 -355
  235. data/data/graph/airfares.tsv +0 -2174
  236. data/data/text/gift_of_the_magi.txt +0 -225
  237. data/data/text/jabberwocky.txt +0 -36
  238. data/data/text/rectification_of_names.txt +0 -33
  239. data/data/twitter/a_atsigns_b.tsv +0 -64
  240. data/data/twitter/a_follows_b.tsv +0 -53
  241. data/data/twitter/tweet.tsv +0 -167
  242. data/data/twitter/twitter_user.tsv +0 -55
  243. data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
  244. data/docpages/INSTALL.textile +0 -92
  245. data/docpages/LICENSE.textile +0 -107
  246. data/docpages/README-elastic_map_reduce.textile +0 -377
  247. data/docpages/README-performance.textile +0 -90
  248. data/docpages/README-wulign.textile +0 -65
  249. data/docpages/UsingWukong-part1-get_ready.textile +0 -17
  250. data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
  251. data/docpages/UsingWukong-part3-parsing.textile +0 -138
  252. data/docpages/_config.yml +0 -39
  253. data/docpages/avro/avro_notes.textile +0 -56
  254. data/docpages/avro/performance.textile +0 -36
  255. data/docpages/avro/tethering.textile +0 -19
  256. data/docpages/bigdata-tips.textile +0 -143
  257. data/docpages/code/api_response_example.txt +0 -20
  258. data/docpages/code/parser_skeleton.rb +0 -38
  259. data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
  260. data/docpages/favicon.ico +0 -0
  261. data/docpages/gem.css +0 -16
  262. data/docpages/hadoop-tips.textile +0 -83
  263. data/docpages/index.textile +0 -92
  264. data/docpages/intro.textile +0 -8
  265. data/docpages/moreinfo.textile +0 -174
  266. data/docpages/news.html +0 -24
  267. data/docpages/pig/PigLatinExpressionsList.txt +0 -122
  268. data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
  269. data/docpages/pig/commandline_params.txt +0 -26
  270. data/docpages/pig/cookbook.html +0 -481
  271. data/docpages/pig/images/hadoop-logo.jpg +0 -0
  272. data/docpages/pig/images/instruction_arrow.png +0 -0
  273. data/docpages/pig/images/pig-logo.gif +0 -0
  274. data/docpages/pig/piglatin_ref1.html +0 -1103
  275. data/docpages/pig/piglatin_ref2.html +0 -14340
  276. data/docpages/pig/setup.html +0 -505
  277. data/docpages/pig/skin/basic.css +0 -166
  278. data/docpages/pig/skin/breadcrumbs.js +0 -237
  279. data/docpages/pig/skin/fontsize.js +0 -166
  280. data/docpages/pig/skin/getBlank.js +0 -40
  281. data/docpages/pig/skin/getMenu.js +0 -45
  282. data/docpages/pig/skin/images/chapter.gif +0 -0
  283. data/docpages/pig/skin/images/chapter_open.gif +0 -0
  284. data/docpages/pig/skin/images/current.gif +0 -0
  285. data/docpages/pig/skin/images/external-link.gif +0 -0
  286. data/docpages/pig/skin/images/header_white_line.gif +0 -0
  287. data/docpages/pig/skin/images/page.gif +0 -0
  288. data/docpages/pig/skin/images/pdfdoc.gif +0 -0
  289. data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  290. data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  291. data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  292. data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  293. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  294. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  295. data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  296. data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  297. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  298. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  299. data/docpages/pig/skin/print.css +0 -54
  300. data/docpages/pig/skin/profile.css +0 -181
  301. data/docpages/pig/skin/screen.css +0 -587
  302. data/docpages/pig/tutorial.html +0 -1059
  303. data/docpages/pig/udf.html +0 -1509
  304. data/docpages/tutorial.textile +0 -283
  305. data/docpages/usage.textile +0 -195
  306. data/docpages/wutils.textile +0 -263
  307. data/examples/dataflow/complex.rb +0 -11
  308. data/examples/dataflow/donuts.rb +0 -13
  309. data/examples/tiny_count/jabberwocky_output.tsv +0 -92
  310. data/examples/word_count.rb +0 -48
  311. data/examples/workflow/fiddle.rb +0 -24
  312. data/lib/away/escapement.rb +0 -129
  313. data/lib/away/exe.rb +0 -11
  314. data/lib/away/experimental.rb +0 -5
  315. data/lib/away/from_file.rb +0 -52
  316. data/lib/away/job.rb +0 -56
  317. data/lib/away/job/rake_compat.rb +0 -17
  318. data/lib/away/registry.rb +0 -79
  319. data/lib/away/runner.rb +0 -276
  320. data/lib/away/runner/execute.rb +0 -121
  321. data/lib/away/script.rb +0 -161
  322. data/lib/away/script/hadoop_command.rb +0 -240
  323. data/lib/away/source/file_list_source.rb +0 -15
  324. data/lib/away/source/looper.rb +0 -18
  325. data/lib/away/task.rb +0 -219
  326. data/lib/hanuman/action.rb +0 -21
  327. data/lib/hanuman/chain.rb +0 -4
  328. data/lib/hanuman/graphviz.rb +0 -74
  329. data/lib/hanuman/resource.rb +0 -6
  330. data/lib/hanuman/slot.rb +0 -87
  331. data/lib/hanuman/slottable.rb +0 -220
  332. data/lib/wukong/bad_record.rb +0 -15
  333. data/lib/wukong/event.rb +0 -44
  334. data/lib/wukong/local_runner.rb +0 -55
  335. data/lib/wukong/mapred.rb +0 -3
  336. data/lib/wukong/universe.rb +0 -48
  337. data/lib/wukong/widget/filter.rb +0 -81
  338. data/lib/wukong/widget/gibberish.rb +0 -123
  339. data/lib/wukong/widget/monitor.rb +0 -26
  340. data/lib/wukong/widget/reducer.rb +0 -66
  341. data/lib/wukong/widget/stringifier.rb +0 -50
  342. data/lib/wukong/workflow.rb +0 -22
  343. data/lib/wukong/workflow/command.rb +0 -42
  344. data/old/config/emr-example.yaml +0 -48
  345. data/old/examples/README.txt +0 -17
  346. data/old/examples/contrib/jeans/README.markdown +0 -165
  347. data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
  348. data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
  349. data/old/examples/contrib/jeans/data/sizes +0 -3
  350. data/old/examples/contrib/jeans/normalize.rb +0 -20
  351. data/old/examples/contrib/jeans/sizes.rb +0 -55
  352. data/old/examples/corpus/bnc_word_freq.rb +0 -44
  353. data/old/examples/corpus/bucket_counter.rb +0 -47
  354. data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
  355. data/old/examples/corpus/sentence_bigrams.rb +0 -53
  356. data/old/examples/corpus/sentence_coocurrence.rb +0 -66
  357. data/old/examples/corpus/stopwords.rb +0 -138
  358. data/old/examples/corpus/words_to_bigrams.rb +0 -53
  359. data/old/examples/emr/README.textile +0 -110
  360. data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
  361. data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
  362. data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
  363. data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
  364. data/old/examples/network_graph/adjacency_list.rb +0 -74
  365. data/old/examples/network_graph/breadth_first_search.rb +0 -72
  366. data/old/examples/network_graph/gen_2paths.rb +0 -68
  367. data/old/examples/network_graph/gen_multi_edge.rb +0 -112
  368. data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
  369. data/old/examples/pagerank/README.textile +0 -6
  370. data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
  371. data/old/examples/pagerank/pagerank.rb +0 -72
  372. data/old/examples/pagerank/pagerank_initialize.rb +0 -42
  373. data/old/examples/pagerank/run_pagerank.sh +0 -21
  374. data/old/examples/sample_records.rb +0 -33
  375. data/old/examples/server_logs/apache_log_parser.rb +0 -15
  376. data/old/examples/server_logs/nook.rb +0 -48
  377. data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
  378. data/old/examples/server_logs/user_agent.rb +0 -40
  379. data/old/examples/simple_word_count.rb +0 -82
  380. data/old/examples/size.rb +0 -61
  381. data/old/examples/stats/avg_value_frequency.rb +0 -86
  382. data/old/examples/stats/binning_percentile_estimator.rb +0 -140
  383. data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
  384. data/old/examples/stats/rank_and_bin.rb +0 -173
  385. data/old/examples/stupidly_simple_filter.rb +0 -40
  386. data/old/examples/word_count.rb +0 -75
  387. data/old/graph/graphviz_builder.rb +0 -580
  388. data/old/graph_easy/Attributes.pm +0 -4181
  389. data/old/graph_easy/Graphviz.pm +0 -2232
  390. data/old/wukong.rb +0 -18
  391. data/old/wukong/and_pig.rb +0 -38
  392. data/old/wukong/bad_record.rb +0 -18
  393. data/old/wukong/datatypes.rb +0 -24
  394. data/old/wukong/datatypes/enum.rb +0 -127
  395. data/old/wukong/datatypes/fake_types.rb +0 -17
  396. data/old/wukong/decorator.rb +0 -28
  397. data/old/wukong/encoding/asciize.rb +0 -108
  398. data/old/wukong/extensions.rb +0 -16
  399. data/old/wukong/extensions/array.rb +0 -18
  400. data/old/wukong/extensions/blank.rb +0 -93
  401. data/old/wukong/extensions/class.rb +0 -189
  402. data/old/wukong/extensions/date_time.rb +0 -53
  403. data/old/wukong/extensions/emittable.rb +0 -69
  404. data/old/wukong/extensions/enumerable.rb +0 -79
  405. data/old/wukong/extensions/hash.rb +0 -167
  406. data/old/wukong/extensions/hash_keys.rb +0 -16
  407. data/old/wukong/extensions/hash_like.rb +0 -150
  408. data/old/wukong/extensions/hashlike_class.rb +0 -47
  409. data/old/wukong/extensions/module.rb +0 -2
  410. data/old/wukong/extensions/pathname.rb +0 -27
  411. data/old/wukong/extensions/string.rb +0 -65
  412. data/old/wukong/extensions/struct.rb +0 -17
  413. data/old/wukong/extensions/symbol.rb +0 -11
  414. data/old/wukong/filename_pattern.rb +0 -74
  415. data/old/wukong/helper.rb +0 -7
  416. data/old/wukong/helper/stopwords.rb +0 -195
  417. data/old/wukong/helper/tokenize.rb +0 -35
  418. data/old/wukong/logger.rb +0 -38
  419. data/old/wukong/periodic_monitor.rb +0 -72
  420. data/old/wukong/schema.rb +0 -269
  421. data/old/wukong/script.rb +0 -286
  422. data/old/wukong/script/avro_command.rb +0 -5
  423. data/old/wukong/script/cassandra_loader_script.rb +0 -40
  424. data/old/wukong/script/emr_command.rb +0 -168
  425. data/old/wukong/script/hadoop_command.rb +0 -237
  426. data/old/wukong/script/local_command.rb +0 -41
  427. data/old/wukong/store.rb +0 -10
  428. data/old/wukong/store/base.rb +0 -27
  429. data/old/wukong/store/cassandra.rb +0 -10
  430. data/old/wukong/store/cassandra/streaming.rb +0 -75
  431. data/old/wukong/store/cassandra/struct_loader.rb +0 -21
  432. data/old/wukong/store/cassandra_model.rb +0 -91
  433. data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
  434. data/old/wukong/store/chunked_flat_file_store.rb +0 -48
  435. data/old/wukong/store/conditional_store.rb +0 -57
  436. data/old/wukong/store/factory.rb +0 -8
  437. data/old/wukong/store/flat_file_store.rb +0 -89
  438. data/old/wukong/store/key_store.rb +0 -51
  439. data/old/wukong/store/null_store.rb +0 -15
  440. data/old/wukong/store/read_thru_store.rb +0 -22
  441. data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
  442. data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
  443. data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
  444. data/old/wukong/streamer.rb +0 -30
  445. data/old/wukong/streamer/accumulating_reducer.rb +0 -83
  446. data/old/wukong/streamer/base.rb +0 -126
  447. data/old/wukong/streamer/counting_reducer.rb +0 -25
  448. data/old/wukong/streamer/filter.rb +0 -20
  449. data/old/wukong/streamer/instance_streamer.rb +0 -15
  450. data/old/wukong/streamer/json_streamer.rb +0 -21
  451. data/old/wukong/streamer/line_streamer.rb +0 -12
  452. data/old/wukong/streamer/list_reducer.rb +0 -31
  453. data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
  454. data/old/wukong/streamer/record_streamer.rb +0 -14
  455. data/old/wukong/streamer/reducer.rb +0 -11
  456. data/old/wukong/streamer/set_reducer.rb +0 -14
  457. data/old/wukong/streamer/struct_streamer.rb +0 -48
  458. data/old/wukong/streamer/summing_reducer.rb +0 -29
  459. data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
  460. data/old/wukong/typed_struct.rb +0 -12
  461. data/spec/away/encoding_spec.rb +0 -32
  462. data/spec/away/exe_spec.rb +0 -20
  463. data/spec/away/flow_spec.rb +0 -82
  464. data/spec/away/graph_spec.rb +0 -6
  465. data/spec/away/job_spec.rb +0 -15
  466. data/spec/away/rake_compat_spec.rb +0 -9
  467. data/spec/away/script_spec.rb +0 -81
  468. data/spec/hanuman/graphviz_spec.rb +0 -29
  469. data/spec/hanuman/slot_spec.rb +0 -2
  470. data/spec/support/examples_helper.rb +0 -10
  471. data/spec/support/streamer_test_helpers.rb +0 -6
  472. data/spec/support/wukong_widget_helpers.rb +0 -66
  473. data/spec/wukong/processor_spec.rb +0 -109
  474. data/spec/wukong/widget/filter_spec.rb +0 -99
  475. data/spec/wukong/widget/stringifier_spec.rb +0 -51
  476. data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -0,0 +1,108 @@
1
+ module Wukong
2
+ module SpecHelpers
3
+ # This module defines methods to be included into the
4
+ # Wukong::Processor class.
5
+ module ProcessorSpecMethods
6
+
7
+ # An array of accumulated records to process come match-time.
8
+ attr_reader :given_records
9
+
10
+ # Give a collection of records to the processor.
11
+ #
12
+ # @param [Array] records
13
+ def given *records
14
+ @given_records ||= []
15
+ @given_records.concat(records)
16
+ self # for chaining
17
+ end
18
+
19
+ # Give a collection of records to the processor but turn each
20
+ # to JSON first.
21
+ #
22
+ # @param [Array] records
23
+ def given_json *records
24
+ self.given(*records.map { |record| MultiJson.dump(record) })
25
+ end
26
+
27
+ # Give a collection of records to the processor but join each
28
+ # in a delimited format first.
29
+ #
30
+ # @param [Array] records
31
+ def given_delimited delimiter, *records
32
+ self.given(*records.map do |record|
33
+ record.map(&:to_s).join(delimiter)
34
+ end.join("\n"))
35
+ end
36
+
37
+ # Give a collection of records to the processor but join each
38
+ # in TSV format first.
39
+ #
40
+ # @param [Array] records
41
+ def given_tsv *records
42
+ self.given_delimited("\t", *records)
43
+ end
44
+
45
+ # Give a collection of records to the processor but join each
46
+ # in CSV format first.
47
+ #
48
+ # @param [Array] records
49
+ def given_csv *records
50
+ self.given_delimited(",", *records)
51
+ end
52
+
53
+ # Return the output of the processor on the given records.
54
+ #
55
+ # Calling this method, like passing the processor to an `emit`
56
+ # matcher, will trigger processing of all the given records.
57
+ #
58
+ # Returns a SpecDriver, which is a subclass of array, so the
59
+ # usual matchers like `include` and so on should work, as well
60
+ # as explicitly indexing to introspect on particular records.
61
+ #
62
+ # @return [SpecDriver]
63
+ def output
64
+ SpecDriver.new(self).run
65
+ end
66
+
67
+ # Return the output of the processor on the given records,
68
+ # parsing as a string with the given `delimiter` first.
69
+ #
70
+ # @param [String] delimiter
71
+ # @see #output
72
+ # @return [Array<String>]
73
+ def delimited_output(delimiter)
74
+ output.map { |record| record.split(delimiter) }
75
+ end
76
+
77
+ # Return the output of the processor on the given records,
78
+ # parsing as TSV first.
79
+ #
80
+ # @see #output
81
+ # @see #delimited_output
82
+ # @return [Array<String>]
83
+ def tsv_output
84
+ delimited_output("\t")
85
+ end
86
+
87
+ # Return the output of the processor on the given records,
88
+ # parsing as CSV first.
89
+ #
90
+ # @see #output
91
+ # @see #delimited_output
92
+ # @return [Array<String>]
93
+ def csv_output
94
+ delimited_output(",")
95
+ end
96
+
97
+ # Return the output of the processor on the given records,
98
+ # parsing as JSONS first.
99
+ #
100
+ # @see #output
101
+ # @return [Hash,Array]
102
+ def json_output
103
+ output.map { |record| MultiJson.load(record) }
104
+ end
105
+
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,15 @@
1
+ shared_examples_for 'a processor' do |options={}|
2
+ name = options[:named]
3
+ if name
4
+ it "is registered with the name '#{name}'" do
5
+ Wukong.registry.retrieve(name.to_sym).should_not be_nil
6
+ end
7
+ it{ create_processor(name).should respond_to(:setup) }
8
+ it{ create_processor(name).should respond_to(:process) }
9
+ it{ create_processor(name).should respond_to(:finalize) }
10
+ it{ create_processor(name).should respond_to(:stop) }
11
+ it{ create_processor(name).should respond_to(:notify) }
12
+ else
13
+ warn "Must supply a name for a processor you want to test"
14
+ end
15
+ end
@@ -0,0 +1,28 @@
1
+ module Wukong
2
+ module SpecHelpers
3
+ class SpecDriver < Array
4
+
5
+ attr_reader :processor
6
+
7
+ def initialize processor
8
+ super()
9
+ @processor = processor
10
+ end
11
+
12
+ def run
13
+ return false unless processor
14
+ processor.given_records.each do |input|
15
+ processor.process(input) do |output|
16
+ self << output
17
+ end
18
+ end
19
+ processor.finalize do |output|
20
+ self << output
21
+ end
22
+ processor.stop
23
+ self
24
+ end
25
+
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,195 @@
1
+ require_relative('spec_driver')
2
+
3
+ module Wukong
4
+ module SpecHelpers
5
+
6
+ module SpecMatchers
7
+
8
+ def emit *expected
9
+ EmitMatcher.new(*expected)
10
+ end
11
+
12
+ def emit_json *expected
13
+ JsonMatcher.new(*expected)
14
+ end
15
+
16
+ def emit_delimited delimiter, *expected
17
+ DelimiterMatcher.new(delimiter, *expected)
18
+ end
19
+
20
+ def emit_tsv *expected
21
+ TsvMatcher.new(*expected)
22
+ end
23
+
24
+ def emit_csv *expected
25
+ CsvMatcher.new(*expected)
26
+ end
27
+ end
28
+
29
+ class EmitMatcher
30
+
31
+ attr_accessor :driver, :expected, :reason, :expected_record, :actual_record, :mismatched_index
32
+
33
+ def matches?(processor)
34
+ self.driver = SpecDriver.new(processor)
35
+ driver.run
36
+ if actual_size != expected_size
37
+ self.reason = :size
38
+ return false
39
+ end
40
+ return true if just_count?
41
+ expected.each_with_index do |expectation, index|
42
+ actual = output[index]
43
+ if actual != expectation
44
+ self.reason = :element
45
+ self.expected_record = expectation
46
+ self.actual_record = actual
47
+ self.mismatched_index = index
48
+ return false
49
+ end
50
+ end
51
+ true
52
+ end
53
+
54
+ def initialize *expected
55
+ self.expected = expected
56
+ end
57
+
58
+ def failure_message
59
+ if reason == :size
60
+ "Expected #{expected_size} records, got #{actual_size}:\n\n#{pretty_output}"
61
+ else
62
+ "Expected the #{ordinalize(mismatched_index)} record to be#{parse_modifier}\n\n#{expected_record}\n\nbut got\n\n#{pretty_output}"
63
+ end
64
+ end
65
+
66
+ def negative_failure_message
67
+ if reason == :size
68
+ "Expected to NOT get #{expected_size} records:\n\n#{output}"
69
+ else
70
+ "Expected the #{ordinalize(mismatched_index)} record to NOT be#{parse_modifier}\n\n#{pretty_output}"
71
+ end
72
+ end
73
+
74
+ def records
75
+ @just_count = true
76
+ self # chaining
77
+ end
78
+ alias_method :record, :records
79
+
80
+ private
81
+
82
+ def just_count?
83
+ @just_count
84
+ end
85
+
86
+ def actual_size
87
+ driver.size
88
+ end
89
+
90
+ def expected_size
91
+ just_count? ? expected.first.to_i : expected.size
92
+ end
93
+
94
+ def output
95
+ driver
96
+ end
97
+
98
+ def parse_modifier
99
+ end
100
+
101
+ def pretty_output
102
+ [].tap do |pretty|
103
+ output.each_with_index do |record, index|
104
+ s = (record.is_a?(String) ? record : record.inspect)
105
+ prefix = case
106
+ when output.size > 1 && index == mismatched_index
107
+ " => "
108
+ when output.size > 1
109
+ " "
110
+ else
111
+ ''
112
+ end
113
+ pretty << [prefix,s].join('')
114
+ end
115
+ end.join("\n")
116
+ end
117
+
118
+ # http://stackoverflow.com/questions/1081926/how-do-i-format-a-date-in-ruby-to-include-rd-as-in-3rd
119
+ def ordinalize array_index
120
+ n = array_index + 1
121
+ if (11..13).include?(n % 100)
122
+ "#{n}th"
123
+ else
124
+ case n % 10
125
+ when 1; "#{n}st"
126
+ when 2; "#{n}nd"
127
+ when 3; "#{n}rd"
128
+ else "#{n}th"
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ class JsonMatcher < EmitMatcher
135
+ def output
136
+ driver.map do |record|
137
+ begin
138
+ MultiJson.load(record)
139
+ rescue => e
140
+ raise Error.new("Could not parse output of processor as JSON: \n\n#{record}")
141
+ end
142
+ end
143
+ end
144
+ def parse_modifier
145
+ ' (after parsing as JSON)'
146
+ end
147
+ end
148
+
149
+ class DelimitedMatcher < EmitMatcher
150
+
151
+ attr_accessor :delimiter
152
+
153
+ def initialize delimiter, *expected
154
+ self.delimiter = delimiter
155
+ super(*expected)
156
+ end
157
+
158
+ def output
159
+ driver.map do |record|
160
+ begin
161
+ record.to_s.split(delimiter)
162
+ rescue => e
163
+ raise Error.new("Could not parse as #{delimited_type}': \n\n#{record}")
164
+ end
165
+ end
166
+ end
167
+
168
+ def delimited_type
169
+ "'#{delimiter}-delimited'"
170
+ end
171
+
172
+ def parse_modifier
173
+ " (after parsing as #{delimited_type})"
174
+ end
175
+ end
176
+
177
+ class TsvMatcher < DelimitedMatcher
178
+ def initialize *expected
179
+ super("\t", *expected)
180
+ end
181
+ def delimited_type
182
+ "TSV"
183
+ end
184
+ end
185
+
186
+ class CsvMatcher < DelimitedMatcher
187
+ def initialize *expected
188
+ super(",", *expected)
189
+ end
190
+ def delimited_type
191
+ "CSV"
192
+ end
193
+ end
194
+ end
195
+ end
@@ -1,3 +1,4 @@
1
1
  module Wukong
2
- VERSION = '3.0.0.pre'
2
+ # The current version of Wukong.
3
+ VERSION = '3.0.0.pre2'
3
4
  end
@@ -0,0 +1,311 @@
1
+ module Wukong
2
+ class Processor
3
+
4
+ # A widget which filters input records according to some
5
+ # criterion.
6
+ class Filter < Processor
7
+
8
+ # Process a `record` by yielding it only if it should be
9
+ # selected by this filter.
10
+ #
11
+ # @param [Object] record an input record
12
+ # @yield [record] yielded if this record should pass the filter
13
+ # @yieldparam [Object] record
14
+ # @see #select?
15
+ # @see #reject?
16
+ def process(record)
17
+ yield(record) if select?(record)
18
+ end
19
+
20
+ # Should the given `record` be passed by this filter?
21
+ #
22
+ # @param [Object] record
23
+ # @return [true, false]
24
+ # @see #reject?
25
+ def select?(record)
26
+ true
27
+ end
28
+
29
+ # Should the given `record` be rejected by this filter?
30
+ #
31
+ # @param [Object] record
32
+ # @return [true, false]
33
+ # @see #select?
34
+ def reject?(record)
35
+ not select?(record)
36
+ end
37
+
38
+ register
39
+ end
40
+
41
+ # A widget which passes all records, i.e. - it acts just like
42
+ # `cat`.
43
+ #
44
+ # @example Pass all records unmodified on the command line
45
+ #
46
+ # $ cat input
47
+ # 1
48
+ # 2
49
+ # 3
50
+ # $ cat input | wu-local identity
51
+ # 1
52
+ # 2
53
+ # 3
54
+ #
55
+ # @example Pass all records unmodified in a dataflow
56
+ #
57
+ # Wukong.dataflow(:uses_identity) do
58
+ # ... | identity | ...
59
+ # end
60
+ #
61
+ # @see Filter
62
+ # @see Null
63
+ class Identity < Filter
64
+ register
65
+ end
66
+
67
+ # A widget which doesn't pass any records, i.e. - it acts just
68
+ # like <tt>/dev/null</tt>.
69
+ #
70
+ # @example Filter all records on the command line
71
+ #
72
+ # $ cat input
73
+ # 1
74
+ # 2
75
+ # 3
76
+ # $ cat input | wu-local null
77
+ #
78
+ # @example Filter all records from a dataflow
79
+ #
80
+ # Wukong.dataflow(:uses_null) do
81
+ # ... | null | ...
82
+ # end
83
+ #
84
+ # @see Filter
85
+ # @see All
86
+ class Null < Filter
87
+ # Prevents any records from passing because it always returns
88
+ # `false`.
89
+ #
90
+ # @param [Object] record
91
+ # @return false
92
+ def select? record
93
+ false
94
+ end
95
+ register
96
+ end
97
+
98
+ # A widget which only passes records if they match a regular
99
+ # expression.
100
+ #
101
+ # @example Passing records which match a given expression on the command-line
102
+ #
103
+ # $ cat input
104
+ # apple
105
+ # banana
106
+ # cat
107
+ # $ cat input | wu-local regexp --match='^a'
108
+ # apple
109
+ #
110
+ # @example Passing records which match a given expression in a dataflow
111
+ #
112
+ # Wukong.dataflow(:uses_regexp) do
113
+ # ... | regexp(match: /^a/) | ...
114
+ # end
115
+ #
116
+ # @see Filter
117
+ # @see NotRegexpFilter
118
+ class RegexpFilter < Filter
119
+
120
+ # The regular expression to use to match records.
121
+ field :match, Regexp
122
+
123
+ # Selects a `record` only if it matches this widget's `match`
124
+ # field.
125
+ #
126
+ # @param [Object] record
127
+ # @return [true, false]
128
+ def select?(record)
129
+ return true unless match
130
+ match =~ record.to_s
131
+ end
132
+ register(:regexp)
133
+ end
134
+
135
+ # A widget which only passes records if they *don't* match a
136
+ # regular expression.
137
+ #
138
+ # @example Passing records which don't match a given expression on the command-line
139
+ #
140
+ # $ cat input
141
+ # apple
142
+ # banana
143
+ # cat
144
+ # $ cat input | wu-local not_regexp --match='^a'
145
+ # banana
146
+ # cat
147
+ #
148
+ # @example Passing records which don't match a given expression in a dataflow
149
+ #
150
+ # Wukong.dataflow(:uses_not_regexp) do
151
+ # ... | not_regexp(match: /^a/) | ...
152
+ # end
153
+ #
154
+ # @see Filter
155
+ # @see NotRegexpFilter
156
+ class NotRegexpFilter < RegexpFilter
157
+ # Select a `record` only if it <b>doesn't</b> match this
158
+ # widget's `match` field.
159
+ #
160
+ # @param [Object] record
161
+ # @return [true, false]
162
+ def select?(record)
163
+ return true unless match
164
+ not match =~ record.to_s
165
+ end
166
+ register(:not_regexp)
167
+ end
168
+
169
+ # A widget which only lets a certain number of records through.
170
+ #
171
+ # @example Letting the first 3 records through on the command-line
172
+ #
173
+ # $ cat input
174
+ # 1
175
+ # 2
176
+ # 3
177
+ # 4
178
+ # $ cat input | wu-local limit --max=3
179
+ # 1
180
+ # 2
181
+ # 3
182
+ #
183
+ # @example Letting the first 3 records through in a dataflow
184
+ #
185
+ # Wukong.dataflow(:uses_limit) do
186
+ # ... | limit(max: 3) | ...
187
+ # end
188
+ #
189
+ # @see Filter
190
+ class Limit < Filter
191
+
192
+ # The maximum number of records to let pass.
193
+ field :max, Integer, :default => Float::INFINITY
194
+
195
+ # The current record count.
196
+ attr_accessor :count
197
+
198
+ # Initializes the record count to zero.
199
+ def setup
200
+ self.count = 0
201
+ end
202
+
203
+ # Select a record only if we're below the max count. Increments
204
+ # the count for this widget.
205
+ #
206
+ # @param [Object] record
207
+ # @return [true, false]
208
+ def select?(record)
209
+ keep = @count < max
210
+ @count += 1
211
+ keep
212
+ end
213
+ register
214
+ end
215
+
216
+ # A widget which samples a certain fraction of input records.
217
+ #
218
+ # @example Sampling records on the command line
219
+ #
220
+ # $ cat input
221
+ # 1
222
+ # 2
223
+ # 3
224
+ # 4
225
+ # $ cat input | wu-local sample --fraction=0.5
226
+ # 1
227
+ # 3
228
+ #
229
+ # @example Sampling records in a dataflow
230
+ #
231
+ # Wukong.dataflow(:uses_sample) do
232
+ # ... | sample(fraction: 0.5) ...
233
+ # end
234
+ #
235
+ # @see Filter
236
+ # @see Limit
237
+ class Sample < Filter
238
+
239
+ # The fraction of records to let pass. Must be between 0.0 and
240
+ # 10.0
241
+ field :fraction, Float, :default => 1.0
242
+
243
+ # Selects a `record` randomly, with a probability given the the
244
+ # `fraction` for this widget.
245
+ #
246
+ # @param [Object] record
247
+ # @return [true, false]
248
+ def select?(record)
249
+ rand() < fraction
250
+ end
251
+ register
252
+ end
253
+
254
+ # A widget useful for creating filters on the fly in a dataflow.
255
+ #
256
+ # When writing a filtering processor out as a class, just use the
257
+ # DSL for creating processors:
258
+ #
259
+ # @example Creating a select filter the usual way
260
+ #
261
+ # Wukong.processor(:my_filter, Wukong::Processor::Filter) do
262
+ # def select? record
263
+ # record.length > 3
264
+ # end
265
+ # end
266
+ #
267
+ # When in a dataflow, sometimes it's easier to create a processor
268
+ # like this on the fly.
269
+ #
270
+ # @example Creating a select filter on the fly in a dataflow
271
+ #
272
+ # Wukong.dataflow(:my_flow) do
273
+ # ... | select { |record| record.length > 3 } | ...
274
+ # end
275
+ #
276
+ # @see Filter
277
+ # @see Reject
278
+ class Select < Filter
279
+
280
+ # Selects the given `record` by delegating to the
281
+ # `perform_action` method, which will automatically be
282
+ # populating by the block used to create this filter in the
283
+ # dataflow DSL.
284
+ #
285
+ # @param [Object] record
286
+ # @return [true, false]
287
+ # @see Processor#perform_action
288
+ def select?(record)
289
+ perform_action(record)
290
+ end
291
+ register
292
+ end
293
+
294
+ # A widget useful for creating filters on the fly in a dataflow.
295
+ #
296
+ # @see Select
297
+ class Reject < Filter
298
+ # Rejects the given `record` by delegating to the
299
+ # `perform_action` method.
300
+ #
301
+ # @param [Object] record
302
+ # @return [true, false]
303
+ # @see Processor#perform_action
304
+ def select?(record)
305
+ not perform_action(record)
306
+ end
307
+ register
308
+ end
309
+
310
+ end
311
+ end