wukong 3.0.0.pre → 3.0.0.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (476) hide show
  1. data/.gitignore +46 -33
  2. data/.gitmodules +3 -0
  3. data/.rspec +1 -1
  4. data/.travis.yml +8 -1
  5. data/.yardopts +0 -13
  6. data/Guardfile +4 -6
  7. data/{LICENSE.textile → LICENSE.md} +43 -55
  8. data/README-old.md +422 -0
  9. data/README.md +279 -418
  10. data/Rakefile +21 -5
  11. data/TODO.md +6 -6
  12. data/bin/wu-clean-encoding +31 -0
  13. data/bin/wu-lign +2 -2
  14. data/bin/wu-local +69 -0
  15. data/bin/wu-server +70 -0
  16. data/examples/Gemfile +38 -0
  17. data/examples/README.md +9 -0
  18. data/examples/dataflow/apache_log_line.rb +64 -25
  19. data/examples/dataflow/fibonacci_series.rb +101 -0
  20. data/examples/dataflow/parse_apache_logs.rb +37 -7
  21. data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
  22. data/examples/dataflow/simple.rb +4 -4
  23. data/examples/geo.rb +4 -0
  24. data/examples/geo/geo_grids.numbers +0 -0
  25. data/examples/geo/geolocated.rb +331 -0
  26. data/examples/geo/quadtile.rb +69 -0
  27. data/examples/geo/spec/geolocated_spec.rb +247 -0
  28. data/examples/geo/tile_fetcher.rb +77 -0
  29. data/examples/graph/minimum_spanning_tree.rb +61 -61
  30. data/examples/jabberwocky.txt +36 -0
  31. data/examples/models/wikipedia.rb +20 -0
  32. data/examples/munging/Gemfile +8 -0
  33. data/examples/munging/airline_flights/airline.rb +57 -0
  34. data/examples/munging/airline_flights/airline_flights.rake +83 -0
  35. data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
  36. data/examples/munging/airline_flights/airport.rb +211 -0
  37. data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
  38. data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
  39. data/examples/munging/airline_flights/flight.rb +156 -0
  40. data/examples/munging/airline_flights/models.rb +4 -0
  41. data/examples/munging/airline_flights/parse.rb +26 -0
  42. data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
  43. data/examples/munging/airline_flights/route.rb +35 -0
  44. data/examples/munging/airline_flights/tasks.rake +83 -0
  45. data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
  46. data/examples/munging/airline_flights/topcities.rb +167 -0
  47. data/examples/munging/airports/40_wbans.txt +40 -0
  48. data/examples/munging/airports/filter_weather_reports.rb +37 -0
  49. data/examples/munging/airports/join.pig +31 -0
  50. data/examples/munging/airports/to_tsv.rb +33 -0
  51. data/examples/munging/airports/usa_wbans.pig +19 -0
  52. data/examples/munging/airports/usa_wbans.txt +2157 -0
  53. data/examples/munging/airports/wbans.pig +19 -0
  54. data/examples/munging/airports/wbans.txt +2310 -0
  55. data/examples/munging/geo/geo_json.rb +54 -0
  56. data/examples/munging/geo/geo_models.rb +69 -0
  57. data/examples/munging/geo/geonames_models.rb +78 -0
  58. data/examples/munging/geo/iso_codes.rb +172 -0
  59. data/examples/munging/geo/reconcile_countries.rb +124 -0
  60. data/examples/munging/geo/tasks.rake +71 -0
  61. data/examples/munging/rake_helper.rb +62 -0
  62. data/examples/munging/weather/.gitignore +1 -0
  63. data/examples/munging/weather/Gemfile +4 -0
  64. data/examples/munging/weather/Rakefile +28 -0
  65. data/examples/munging/weather/extract_ish.rb +13 -0
  66. data/examples/munging/weather/models/weather.rb +119 -0
  67. data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
  68. data/examples/munging/wikipedia/README.md +34 -0
  69. data/examples/munging/wikipedia/Rakefile +193 -0
  70. data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
  71. data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
  72. data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
  73. data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
  74. data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
  75. data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
  76. data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
  77. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
  78. data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
  79. data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
  80. data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
  81. data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
  82. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
  83. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
  84. data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
  85. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
  86. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
  87. data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
  88. data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
  89. data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
  90. data/examples/munging/wikipedia/pig_style_guide.md +25 -0
  91. data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
  92. data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
  93. data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
  94. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
  95. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
  96. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
  97. data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
  98. data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
  99. data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
  100. data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
  101. data/examples/munging/wikipedia/utils/namespaces.json +1 -0
  102. data/examples/rake_helper.rb +85 -0
  103. data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
  104. data/examples/server_logs/logline.rb +95 -0
  105. data/examples/server_logs/models.rb +66 -0
  106. data/examples/server_logs/page_counts.pig +48 -0
  107. data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
  108. data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
  109. data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
  110. data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
  111. data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
  112. data/examples/string_reverser.rb +26 -0
  113. data/examples/text/pig_latin.rb +2 -2
  114. data/examples/text/regional_flavor/README.md +14 -0
  115. data/examples/text/regional_flavor/article_wordbags.pig +39 -0
  116. data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
  117. data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
  118. data/examples/word_count/accumulator.rb +26 -0
  119. data/examples/word_count/tokenizer.rb +13 -0
  120. data/examples/word_count/word_count.rb +6 -0
  121. data/examples/workflow/cherry_pie.dot +97 -0
  122. data/examples/workflow/cherry_pie.png +0 -0
  123. data/examples/workflow/cherry_pie.rb +61 -26
  124. data/lib/hanuman.rb +34 -7
  125. data/lib/hanuman/graph.rb +55 -31
  126. data/lib/hanuman/graphvizzer.rb +199 -178
  127. data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
  128. data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
  129. data/lib/hanuman/link.rb +35 -0
  130. data/lib/hanuman/registry.rb +46 -0
  131. data/lib/hanuman/stage.rb +76 -32
  132. data/lib/wukong.rb +23 -24
  133. data/lib/wukong/boot.rb +87 -0
  134. data/lib/wukong/configuration.rb +8 -0
  135. data/lib/wukong/dataflow.rb +45 -78
  136. data/lib/wukong/driver.rb +99 -0
  137. data/lib/wukong/emitter.rb +22 -0
  138. data/lib/wukong/model/faker.rb +24 -24
  139. data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
  140. data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
  141. data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
  142. data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
  143. data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
  144. data/lib/wukong/processor.rb +60 -114
  145. data/lib/wukong/spec_helpers.rb +81 -0
  146. data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
  147. data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
  148. data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
  149. data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
  150. data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
  151. data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
  152. data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
  153. data/lib/wukong/version.rb +2 -1
  154. data/lib/wukong/widget/filters.rb +311 -0
  155. data/lib/wukong/widget/processors.rb +156 -0
  156. data/lib/wukong/widget/reducers.rb +7 -0
  157. data/lib/wukong/widget/reducers/accumulator.rb +73 -0
  158. data/lib/wukong/widget/reducers/bin.rb +318 -0
  159. data/lib/wukong/widget/reducers/count.rb +61 -0
  160. data/lib/wukong/widget/reducers/group.rb +85 -0
  161. data/lib/wukong/widget/reducers/group_concat.rb +70 -0
  162. data/lib/wukong/widget/reducers/moments.rb +72 -0
  163. data/lib/wukong/widget/reducers/sort.rb +130 -0
  164. data/lib/wukong/widget/serializers.rb +287 -0
  165. data/lib/wukong/widget/sink.rb +10 -52
  166. data/lib/wukong/widget/source.rb +7 -113
  167. data/lib/wukong/widget/utils.rb +46 -0
  168. data/lib/wukong/widgets.rb +6 -0
  169. data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
  170. data/spec/examples/dataflow/parsing_spec.rb +12 -11
  171. data/spec/examples/dataflow/simple_spec.rb +32 -6
  172. data/spec/examples/dataflow/telegram_spec.rb +36 -36
  173. data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
  174. data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
  175. data/spec/examples/munging/airline_flights_spec.rb +202 -0
  176. data/spec/examples/text/pig_latin_spec.rb +13 -16
  177. data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
  178. data/spec/hanuman/graph_spec.rb +27 -2
  179. data/spec/hanuman/hanuman_spec.rb +10 -0
  180. data/spec/hanuman/registry_spec.rb +123 -0
  181. data/spec/hanuman/stage_spec.rb +61 -7
  182. data/spec/spec_helper.rb +29 -19
  183. data/spec/support/hanuman_test_helpers.rb +14 -12
  184. data/spec/support/shared_context_for_reducers.rb +37 -0
  185. data/spec/support/shared_examples_for_builders.rb +101 -0
  186. data/spec/support/shared_examples_for_shortcuts.rb +57 -0
  187. data/spec/support/wukong_test_helpers.rb +37 -11
  188. data/spec/wukong/dataflow_spec.rb +77 -55
  189. data/spec/wukong/local_runner_spec.rb +24 -24
  190. data/spec/wukong/model/faker_spec.rb +132 -131
  191. data/spec/wukong/runner_spec.rb +8 -8
  192. data/spec/wukong/widget/filters_spec.rb +61 -0
  193. data/spec/wukong/widget/processors_spec.rb +126 -0
  194. data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
  195. data/spec/wukong/widget/reducers/count_spec.rb +11 -0
  196. data/spec/wukong/widget/reducers/group_spec.rb +20 -0
  197. data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
  198. data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
  199. data/spec/wukong/widget/serializers_spec.rb +92 -0
  200. data/spec/wukong/widget/sink_spec.rb +15 -15
  201. data/spec/wukong/widget/source_spec.rb +65 -41
  202. data/spec/wukong/wukong_spec.rb +10 -0
  203. data/wukong.gemspec +17 -10
  204. metadata +359 -335
  205. data/.document +0 -5
  206. data/VERSION +0 -1
  207. data/bin/hdp-bin +0 -44
  208. data/bin/hdp-bzip +0 -23
  209. data/bin/hdp-cat +0 -3
  210. data/bin/hdp-catd +0 -3
  211. data/bin/hdp-cp +0 -3
  212. data/bin/hdp-du +0 -86
  213. data/bin/hdp-get +0 -3
  214. data/bin/hdp-kill +0 -3
  215. data/bin/hdp-kill-task +0 -3
  216. data/bin/hdp-ls +0 -11
  217. data/bin/hdp-mkdir +0 -2
  218. data/bin/hdp-mkdirp +0 -12
  219. data/bin/hdp-mv +0 -3
  220. data/bin/hdp-parts_to_keys.rb +0 -77
  221. data/bin/hdp-ps +0 -3
  222. data/bin/hdp-put +0 -3
  223. data/bin/hdp-rm +0 -32
  224. data/bin/hdp-sort +0 -40
  225. data/bin/hdp-stream +0 -40
  226. data/bin/hdp-stream-flat +0 -22
  227. data/bin/hdp-stream2 +0 -39
  228. data/bin/hdp-sync +0 -17
  229. data/bin/hdp-wc +0 -67
  230. data/bin/wu-flow +0 -10
  231. data/bin/wu-map +0 -17
  232. data/bin/wu-red +0 -17
  233. data/bin/wukong +0 -17
  234. data/data/CREDITS.md +0 -355
  235. data/data/graph/airfares.tsv +0 -2174
  236. data/data/text/gift_of_the_magi.txt +0 -225
  237. data/data/text/jabberwocky.txt +0 -36
  238. data/data/text/rectification_of_names.txt +0 -33
  239. data/data/twitter/a_atsigns_b.tsv +0 -64
  240. data/data/twitter/a_follows_b.tsv +0 -53
  241. data/data/twitter/tweet.tsv +0 -167
  242. data/data/twitter/twitter_user.tsv +0 -55
  243. data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
  244. data/docpages/INSTALL.textile +0 -92
  245. data/docpages/LICENSE.textile +0 -107
  246. data/docpages/README-elastic_map_reduce.textile +0 -377
  247. data/docpages/README-performance.textile +0 -90
  248. data/docpages/README-wulign.textile +0 -65
  249. data/docpages/UsingWukong-part1-get_ready.textile +0 -17
  250. data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
  251. data/docpages/UsingWukong-part3-parsing.textile +0 -138
  252. data/docpages/_config.yml +0 -39
  253. data/docpages/avro/avro_notes.textile +0 -56
  254. data/docpages/avro/performance.textile +0 -36
  255. data/docpages/avro/tethering.textile +0 -19
  256. data/docpages/bigdata-tips.textile +0 -143
  257. data/docpages/code/api_response_example.txt +0 -20
  258. data/docpages/code/parser_skeleton.rb +0 -38
  259. data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
  260. data/docpages/favicon.ico +0 -0
  261. data/docpages/gem.css +0 -16
  262. data/docpages/hadoop-tips.textile +0 -83
  263. data/docpages/index.textile +0 -92
  264. data/docpages/intro.textile +0 -8
  265. data/docpages/moreinfo.textile +0 -174
  266. data/docpages/news.html +0 -24
  267. data/docpages/pig/PigLatinExpressionsList.txt +0 -122
  268. data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
  269. data/docpages/pig/commandline_params.txt +0 -26
  270. data/docpages/pig/cookbook.html +0 -481
  271. data/docpages/pig/images/hadoop-logo.jpg +0 -0
  272. data/docpages/pig/images/instruction_arrow.png +0 -0
  273. data/docpages/pig/images/pig-logo.gif +0 -0
  274. data/docpages/pig/piglatin_ref1.html +0 -1103
  275. data/docpages/pig/piglatin_ref2.html +0 -14340
  276. data/docpages/pig/setup.html +0 -505
  277. data/docpages/pig/skin/basic.css +0 -166
  278. data/docpages/pig/skin/breadcrumbs.js +0 -237
  279. data/docpages/pig/skin/fontsize.js +0 -166
  280. data/docpages/pig/skin/getBlank.js +0 -40
  281. data/docpages/pig/skin/getMenu.js +0 -45
  282. data/docpages/pig/skin/images/chapter.gif +0 -0
  283. data/docpages/pig/skin/images/chapter_open.gif +0 -0
  284. data/docpages/pig/skin/images/current.gif +0 -0
  285. data/docpages/pig/skin/images/external-link.gif +0 -0
  286. data/docpages/pig/skin/images/header_white_line.gif +0 -0
  287. data/docpages/pig/skin/images/page.gif +0 -0
  288. data/docpages/pig/skin/images/pdfdoc.gif +0 -0
  289. data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  290. data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  291. data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  292. data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  293. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  294. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  295. data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  296. data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  297. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  298. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  299. data/docpages/pig/skin/print.css +0 -54
  300. data/docpages/pig/skin/profile.css +0 -181
  301. data/docpages/pig/skin/screen.css +0 -587
  302. data/docpages/pig/tutorial.html +0 -1059
  303. data/docpages/pig/udf.html +0 -1509
  304. data/docpages/tutorial.textile +0 -283
  305. data/docpages/usage.textile +0 -195
  306. data/docpages/wutils.textile +0 -263
  307. data/examples/dataflow/complex.rb +0 -11
  308. data/examples/dataflow/donuts.rb +0 -13
  309. data/examples/tiny_count/jabberwocky_output.tsv +0 -92
  310. data/examples/word_count.rb +0 -48
  311. data/examples/workflow/fiddle.rb +0 -24
  312. data/lib/away/escapement.rb +0 -129
  313. data/lib/away/exe.rb +0 -11
  314. data/lib/away/experimental.rb +0 -5
  315. data/lib/away/from_file.rb +0 -52
  316. data/lib/away/job.rb +0 -56
  317. data/lib/away/job/rake_compat.rb +0 -17
  318. data/lib/away/registry.rb +0 -79
  319. data/lib/away/runner.rb +0 -276
  320. data/lib/away/runner/execute.rb +0 -121
  321. data/lib/away/script.rb +0 -161
  322. data/lib/away/script/hadoop_command.rb +0 -240
  323. data/lib/away/source/file_list_source.rb +0 -15
  324. data/lib/away/source/looper.rb +0 -18
  325. data/lib/away/task.rb +0 -219
  326. data/lib/hanuman/action.rb +0 -21
  327. data/lib/hanuman/chain.rb +0 -4
  328. data/lib/hanuman/graphviz.rb +0 -74
  329. data/lib/hanuman/resource.rb +0 -6
  330. data/lib/hanuman/slot.rb +0 -87
  331. data/lib/hanuman/slottable.rb +0 -220
  332. data/lib/wukong/bad_record.rb +0 -15
  333. data/lib/wukong/event.rb +0 -44
  334. data/lib/wukong/local_runner.rb +0 -55
  335. data/lib/wukong/mapred.rb +0 -3
  336. data/lib/wukong/universe.rb +0 -48
  337. data/lib/wukong/widget/filter.rb +0 -81
  338. data/lib/wukong/widget/gibberish.rb +0 -123
  339. data/lib/wukong/widget/monitor.rb +0 -26
  340. data/lib/wukong/widget/reducer.rb +0 -66
  341. data/lib/wukong/widget/stringifier.rb +0 -50
  342. data/lib/wukong/workflow.rb +0 -22
  343. data/lib/wukong/workflow/command.rb +0 -42
  344. data/old/config/emr-example.yaml +0 -48
  345. data/old/examples/README.txt +0 -17
  346. data/old/examples/contrib/jeans/README.markdown +0 -165
  347. data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
  348. data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
  349. data/old/examples/contrib/jeans/data/sizes +0 -3
  350. data/old/examples/contrib/jeans/normalize.rb +0 -20
  351. data/old/examples/contrib/jeans/sizes.rb +0 -55
  352. data/old/examples/corpus/bnc_word_freq.rb +0 -44
  353. data/old/examples/corpus/bucket_counter.rb +0 -47
  354. data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
  355. data/old/examples/corpus/sentence_bigrams.rb +0 -53
  356. data/old/examples/corpus/sentence_coocurrence.rb +0 -66
  357. data/old/examples/corpus/stopwords.rb +0 -138
  358. data/old/examples/corpus/words_to_bigrams.rb +0 -53
  359. data/old/examples/emr/README.textile +0 -110
  360. data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
  361. data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
  362. data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
  363. data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
  364. data/old/examples/network_graph/adjacency_list.rb +0 -74
  365. data/old/examples/network_graph/breadth_first_search.rb +0 -72
  366. data/old/examples/network_graph/gen_2paths.rb +0 -68
  367. data/old/examples/network_graph/gen_multi_edge.rb +0 -112
  368. data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
  369. data/old/examples/pagerank/README.textile +0 -6
  370. data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
  371. data/old/examples/pagerank/pagerank.rb +0 -72
  372. data/old/examples/pagerank/pagerank_initialize.rb +0 -42
  373. data/old/examples/pagerank/run_pagerank.sh +0 -21
  374. data/old/examples/sample_records.rb +0 -33
  375. data/old/examples/server_logs/apache_log_parser.rb +0 -15
  376. data/old/examples/server_logs/nook.rb +0 -48
  377. data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
  378. data/old/examples/server_logs/user_agent.rb +0 -40
  379. data/old/examples/simple_word_count.rb +0 -82
  380. data/old/examples/size.rb +0 -61
  381. data/old/examples/stats/avg_value_frequency.rb +0 -86
  382. data/old/examples/stats/binning_percentile_estimator.rb +0 -140
  383. data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
  384. data/old/examples/stats/rank_and_bin.rb +0 -173
  385. data/old/examples/stupidly_simple_filter.rb +0 -40
  386. data/old/examples/word_count.rb +0 -75
  387. data/old/graph/graphviz_builder.rb +0 -580
  388. data/old/graph_easy/Attributes.pm +0 -4181
  389. data/old/graph_easy/Graphviz.pm +0 -2232
  390. data/old/wukong.rb +0 -18
  391. data/old/wukong/and_pig.rb +0 -38
  392. data/old/wukong/bad_record.rb +0 -18
  393. data/old/wukong/datatypes.rb +0 -24
  394. data/old/wukong/datatypes/enum.rb +0 -127
  395. data/old/wukong/datatypes/fake_types.rb +0 -17
  396. data/old/wukong/decorator.rb +0 -28
  397. data/old/wukong/encoding/asciize.rb +0 -108
  398. data/old/wukong/extensions.rb +0 -16
  399. data/old/wukong/extensions/array.rb +0 -18
  400. data/old/wukong/extensions/blank.rb +0 -93
  401. data/old/wukong/extensions/class.rb +0 -189
  402. data/old/wukong/extensions/date_time.rb +0 -53
  403. data/old/wukong/extensions/emittable.rb +0 -69
  404. data/old/wukong/extensions/enumerable.rb +0 -79
  405. data/old/wukong/extensions/hash.rb +0 -167
  406. data/old/wukong/extensions/hash_keys.rb +0 -16
  407. data/old/wukong/extensions/hash_like.rb +0 -150
  408. data/old/wukong/extensions/hashlike_class.rb +0 -47
  409. data/old/wukong/extensions/module.rb +0 -2
  410. data/old/wukong/extensions/pathname.rb +0 -27
  411. data/old/wukong/extensions/string.rb +0 -65
  412. data/old/wukong/extensions/struct.rb +0 -17
  413. data/old/wukong/extensions/symbol.rb +0 -11
  414. data/old/wukong/filename_pattern.rb +0 -74
  415. data/old/wukong/helper.rb +0 -7
  416. data/old/wukong/helper/stopwords.rb +0 -195
  417. data/old/wukong/helper/tokenize.rb +0 -35
  418. data/old/wukong/logger.rb +0 -38
  419. data/old/wukong/periodic_monitor.rb +0 -72
  420. data/old/wukong/schema.rb +0 -269
  421. data/old/wukong/script.rb +0 -286
  422. data/old/wukong/script/avro_command.rb +0 -5
  423. data/old/wukong/script/cassandra_loader_script.rb +0 -40
  424. data/old/wukong/script/emr_command.rb +0 -168
  425. data/old/wukong/script/hadoop_command.rb +0 -237
  426. data/old/wukong/script/local_command.rb +0 -41
  427. data/old/wukong/store.rb +0 -10
  428. data/old/wukong/store/base.rb +0 -27
  429. data/old/wukong/store/cassandra.rb +0 -10
  430. data/old/wukong/store/cassandra/streaming.rb +0 -75
  431. data/old/wukong/store/cassandra/struct_loader.rb +0 -21
  432. data/old/wukong/store/cassandra_model.rb +0 -91
  433. data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
  434. data/old/wukong/store/chunked_flat_file_store.rb +0 -48
  435. data/old/wukong/store/conditional_store.rb +0 -57
  436. data/old/wukong/store/factory.rb +0 -8
  437. data/old/wukong/store/flat_file_store.rb +0 -89
  438. data/old/wukong/store/key_store.rb +0 -51
  439. data/old/wukong/store/null_store.rb +0 -15
  440. data/old/wukong/store/read_thru_store.rb +0 -22
  441. data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
  442. data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
  443. data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
  444. data/old/wukong/streamer.rb +0 -30
  445. data/old/wukong/streamer/accumulating_reducer.rb +0 -83
  446. data/old/wukong/streamer/base.rb +0 -126
  447. data/old/wukong/streamer/counting_reducer.rb +0 -25
  448. data/old/wukong/streamer/filter.rb +0 -20
  449. data/old/wukong/streamer/instance_streamer.rb +0 -15
  450. data/old/wukong/streamer/json_streamer.rb +0 -21
  451. data/old/wukong/streamer/line_streamer.rb +0 -12
  452. data/old/wukong/streamer/list_reducer.rb +0 -31
  453. data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
  454. data/old/wukong/streamer/record_streamer.rb +0 -14
  455. data/old/wukong/streamer/reducer.rb +0 -11
  456. data/old/wukong/streamer/set_reducer.rb +0 -14
  457. data/old/wukong/streamer/struct_streamer.rb +0 -48
  458. data/old/wukong/streamer/summing_reducer.rb +0 -29
  459. data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
  460. data/old/wukong/typed_struct.rb +0 -12
  461. data/spec/away/encoding_spec.rb +0 -32
  462. data/spec/away/exe_spec.rb +0 -20
  463. data/spec/away/flow_spec.rb +0 -82
  464. data/spec/away/graph_spec.rb +0 -6
  465. data/spec/away/job_spec.rb +0 -15
  466. data/spec/away/rake_compat_spec.rb +0 -9
  467. data/spec/away/script_spec.rb +0 -81
  468. data/spec/hanuman/graphviz_spec.rb +0 -29
  469. data/spec/hanuman/slot_spec.rb +0 -2
  470. data/spec/support/examples_helper.rb +0 -10
  471. data/spec/support/streamer_test_helpers.rb +0 -6
  472. data/spec/support/wukong_widget_helpers.rb +0 -66
  473. data/spec/wukong/processor_spec.rb +0 -109
  474. data/spec/wukong/widget/filter_spec.rb +0 -99
  475. data/spec/wukong/widget/stringifier_spec.rb +0 -51
  476. data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -0,0 +1,81 @@
1
+ require 'wukong'
2
+ require 'wukong/boot'
3
+ require_relative('spec_helpers/processor_helpers')
4
+ require_relative('spec_helpers/processor_methods')
5
+ require_relative('spec_helpers/spec_driver_matchers')
6
+ require_relative('spec_helpers/integration_driver')
7
+ require_relative('spec_helpers/integration_driver_matchers')
8
+ require_relative('spec_helpers/shared_examples')
9
+
10
+ module Wukong
11
+
12
+ # This module lets you use write processor specs at a high level.
13
+ # Require it in your <tt>spec_helper.rb</tt> file:
14
+ #
15
+ # # in your spec/spec_helper.rb
16
+ # require 'wukong/spec_helpers'
17
+ # RSpec.configure do |config|
18
+ # include Wukong::SpecHelpers
19
+ # end
20
+ #
21
+ # Processors in a Wukong spec will have a collection of
22
+ # <tt>given_*</tt> methods you can use to (lazily) feed them records
23
+ # without having to have to build your own driver to run the
24
+ # processors.
25
+ #
26
+ # To each <tt>given_*</tt> method corresponds an <tt>emit_*</tt>
27
+ # matcher which will actually run the processor on the given
28
+ # inputs and compare against expected results. Here's an example,
29
+ # using a simple `tokenizer` processor.
30
+ #
31
+ # subject { processor(:tokenizer) }
32
+ #
33
+ # it "emits each word in a given string" do
34
+ # given("It was the best of times, it was the worst of times.").should emit(12).records
35
+ # end
36
+ #
37
+ # # Give similar input and check against explicit expected
38
+ # # output.
39
+ # it "should ignore punctuation and capitalization" do
40
+ # processor.given("You're crazy!").should emit("youre", "crazy")
41
+ # end
42
+ #
43
+ # # Pass the input but transform to JSON first (delimited and
44
+ # # as_tsv also work).
45
+ # it "should tokenize the 'text' attribute of a record if given JSON" do
46
+ # processor.given("text" => "Will be cast to JSON").as_json.should emit("will", "be", "cast", "to", "json")
47
+ # end
48
+ #
49
+ # # Initialize the :tokenizer processor with arguments to test
50
+ # # behavior under different conditions.
51
+ # it "should output a single record when asked for JSON output" do
52
+ # processor(:json => true).given("It was the best of times, it was the worst of times.").should emit(1).records
53
+ # end
54
+ #
55
+ # # Initialize processor with arguments and express that the
56
+ # # expected output will be in JSON though given as an object.
57
+ # it "should output all the tokens for its input record with its JSON output" do
58
+ # processor(:json => true).given("You're crazy!").should emit("tokens" => ["youre", "crazy"]).as_json
59
+ # end
60
+ #
61
+ # # Initialize processor with arguments, and both input and
62
+ # # output will be serialized/deserialized to/from JSON
63
+ # # automatically.
64
+ # it "can read and write pure JSON" do
65
+ # processor(:json => true).given("text" => "You're crazy!").as_json.should emit("tokens" => ["youre", "crazy"]).as_json
66
+ # end
67
+ #
68
+ # # Use a processor outside the scope of the top-level :describe
69
+ # # block.
70
+ # it "has a friend which does the same thing" do
71
+ # processor(:similar_tokenizer, :json => true).given("hi there").should emit(2).records
72
+ # end
73
+ module SpecHelpers
74
+ include ProcessorHelpers
75
+ include SpecMatchers
76
+ include IntegrationRunner
77
+ include IntegrationMatchers
78
+ end
79
+
80
+ Processor.class_eval { include SpecHelpers::ProcessorSpecMethods }
81
+ end
@@ -0,0 +1,144 @@
1
+ require 'open3'
2
+
3
+ module Wukong
4
+ module SpecHelpers
5
+
6
+ # Provides a `command` method for writing integration tests for
7
+ # commands.
8
+ module IntegrationRunner
9
+
10
+ # Spawn a command and capture its STDOUT, STDERR, and exit code.
11
+ #
12
+ # The `args` will be joined together into a command line.
13
+ #
14
+ # It is expected that you will use the matchers defined in
15
+ # IntegrationMatchers in your integration tests:
16
+ #
17
+ # @example Check output of 'ls' includes a string 'foo.txt'
18
+ # it "lists files" do
19
+ # command('ls').should have_output('foo.txt')
20
+ # end
21
+ #
22
+ # @example More complicated
23
+ # context "long format" do
24
+ # it "lists files with timestamps" do
25
+ # command('ls', '-l').should have_output('foo.txt', /\w+ \d+ \d+:\d+/)
26
+ # end
27
+ # end
28
+ #
29
+ # @param [Array<String>] args
30
+ #
31
+ # @overload command(*args, options={})
32
+ # If the last element of `args` is a Hash it will be used for
33
+ # options.
34
+ #
35
+ # The :env option specifies the command line environment to
36
+ # use for the command. By default this will be the value of
37
+ # the Ruby process's own `ENV` variable. If running in a
38
+ # context in which the `integration_env` method is defined,
39
+ # its return value will be merged on top of `ENV`. An
40
+ # explicitly provided :env option will again be merged on top.
41
+ #
42
+ # The :cwd option specifies the working directory to start in.
43
+ # It defaults to the value of <tt>Dir.pwd</tt>
44
+ #
45
+ # @param [Array<String>] args
46
+ # @param [Hash] options
47
+ # @option options [Hash] env the shell environment to spawn the command with
48
+ # @option options [Hash] cwd the directory to execute the command in
49
+ def command *args
50
+ a = args.flatten.compact
51
+ options = (a.last.is_a?(Hash) ? a.pop : {})
52
+
53
+ env = ENV.to_hash.dup
54
+ env.merge!(integration_env) if respond_to?(:integration_env)
55
+ env.merge!(options[:env] || {})
56
+
57
+ cwd = options[:cwd]
58
+ cwd ||= (respond_to?(:integration_cwd) ? integration_cwd : Dir.pwd)
59
+
60
+ IntegrationDriver.new(a, cwd: cwd, env: env)
61
+ end
62
+ end
63
+
64
+ # A driver for running commands in a subprocess.
65
+ class IntegrationDriver
66
+
67
+ # The command to execute
68
+ attr_accessor :cmd
69
+
70
+ # The directory in which to execute the command.
71
+ attr_accessor :cwd
72
+
73
+ # The ID of the spawned subprocess (while it was running).
74
+ attr_accessor :pid
75
+
76
+ # The STDOUT of the spawned process.
77
+ attr_accessor :stdout
78
+
79
+ # The STDERR of the spawned process.
80
+ attr_accessor :stderr
81
+
82
+ # The exit code of the spawned process.
83
+ attr_accessor :exit_code
84
+
85
+ # Run the command and capture its outputs and exit code.
86
+ #
87
+ # @return [true, false]
88
+ def run!
89
+ return false if ran?
90
+ Open3.popen3(env, cmd) do |i, o, e, wait_thr|
91
+ self.pid = wait_thr.pid
92
+
93
+ @inputs.each { |input| i.puts(input) }
94
+ i.close
95
+
96
+ self.stdout = o.read
97
+ self.stderr = e.read
98
+ self.exit_code = wait_thr.value.to_i
99
+ end
100
+ @ran = true
101
+ end
102
+
103
+ # Initialize a new IntegrationDriver to run a given command.
104
+ def initialize args, options
105
+ @args = args
106
+ @env = options[:env]
107
+ @cwd = options[:cwd]
108
+ @inputs = []
109
+ end
110
+
111
+ def cmd
112
+ @args.compact.map(&:to_s).join(' ')
113
+ end
114
+
115
+ def on *events
116
+ @inputs.concat(events)
117
+ self
118
+ end
119
+
120
+ def env
121
+ ENV.to_hash.merge(@env || {})
122
+ end
123
+
124
+ def ran?
125
+ @ran
126
+ end
127
+
128
+ def cmd_summary
129
+ [
130
+ cmd,
131
+ "with env #{env_summary}",
132
+ "in dir #{cwd}"
133
+ ].join("\n")
134
+ end
135
+
136
+ def env_summary
137
+ { "PATH" => env["PATH"], "RUBYLIB" => env["RUBYLIB"] }.inspect
138
+ end
139
+
140
+ end
141
+ end
142
+ end
143
+
144
+
@@ -0,0 +1,219 @@
1
+ module Wukong
2
+ module SpecHelpers
3
+
4
+ # Provides matchers for STDOUT, STDERR, and exit code when writing
5
+ # integration tests for Wukong's command-line APIs.
6
+ module IntegrationMatchers
7
+
8
+ # Checks that each `expectation` appears in the STDOUT of the
9
+ # command. Order is irrelevant and each `expectation` can be
10
+ # either a String to check for inclusion or a Regexp to match
11
+ # with.
12
+ #
13
+ # @param [Array<String,Regexp>] expectations
14
+ def have_stdout *expectations
15
+ StdoutMatcher.new(*expectations)
16
+ end
17
+
18
+ # Checks that each `expectation` appears in the STDERR of the
19
+ # command. Order is irrelevant and each `expectation` can be
20
+ # either a String to check for inclusion or a Regexp to match
21
+ # with.
22
+ #
23
+ # @param [Array<String,Regexp>] expectations
24
+ def have_stderr *expectations
25
+ StderrMatcher.new(*expectations)
26
+ end
27
+
28
+ # Checks that the command exits with the given `code`.
29
+ #
30
+ # @param [Integer] code
31
+ def exit_with code
32
+ ExitCodeMatcher.new(code)
33
+ end
34
+
35
+ end
36
+
37
+ # A class for running commands and capturing their STDOUT, STDERR,
38
+ # and exit code. This class is designed to work with the matchers
39
+ # defined in IntegrationMatchers.
40
+ class IntegrationMatcher
41
+
42
+ # The driver used to run the actual commands.
43
+ attr_accessor :driver
44
+
45
+ # An array of expectations about the output of the driver.
46
+ attr_accessor :expectations
47
+
48
+ # The expectation which caused failure.
49
+ attr_accessor :failed_expectation
50
+
51
+ # Return whether or not the given command's output matches
52
+ # expectations.
53
+ #
54
+ # If an expectation failes to match, the `failed_expectation`
55
+ # attribute will be set accordingly.
56
+ #
57
+ # @param [IntegrationDriver] driver
58
+ # @return [true, false]
59
+ def matches?(driver)
60
+ self.driver = driver
61
+ driver.run!
62
+ expectations.each do |expectation|
63
+ unless output.send(match_function(expectation), expectation)
64
+ self.failed_expectation = expectation
65
+ return false
66
+ end
67
+ end
68
+ true
69
+ end
70
+
71
+ # Create a matcher on the given expectations. Each expectation
72
+ # can be either a String or a Regexp. Strings will be tested
73
+ # for inclusion in the output, Regexps will be tested for a
74
+ # match against the output.
75
+ #
76
+ # @param [Array<String,Regexp>] expectations
77
+ def initialize *expectations
78
+ self.expectations = expectations
79
+ end
80
+
81
+ # :nodoc:
82
+ def failure_message
83
+ "Ran\n\n #{formatted_command}\n\nand expected #{output_description}\n\n#{formatted_output}\n\nto #{match_type}\n\n #{failed_expectation}"
84
+ end
85
+
86
+ # :nodoc:
87
+ def negative_failure_message
88
+ "Expected #{output_description} of #{driver.cmd}\n\n#{output}\n\nto NOT #{match_type}\n\n#{self.failed_expectation}."
89
+ end
90
+
91
+ # :nodoc:
92
+ def formatted_output
93
+ output.split("\n").map { |line| ' ' + line }.join("\n")
94
+ end
95
+
96
+ # :nodoc:
97
+ def formatted_command
98
+ "$ #{driver.cmd}"
99
+ end
100
+
101
+ # :nodoc:
102
+ def match_function expectation
103
+ expectation.is_a?(Regexp) ? :match : :include?
104
+ end
105
+
106
+ # :nodoc:
107
+ def match_type
108
+ failed_expectation.is_a?(Regexp) ? 'match' : 'include'
109
+ end
110
+
111
+ end
112
+
113
+ # A matcher for the STDOUT of a command.
114
+ class StdoutMatcher < IntegrationMatcher
115
+
116
+ # Picks the STDOUT of the command.
117
+ def output
118
+ driver.stdout
119
+ end
120
+
121
+ # :nodoc:
122
+ def output_description
123
+ "STDOUT"
124
+ end
125
+
126
+ def description
127
+ "have the correct #{output_description}"
128
+ end
129
+
130
+ end
131
+
132
+ # A matcher for the STDOUT of a command.
133
+ class StderrMatcher < IntegrationMatcher
134
+
135
+ # Picks the STDOUT of the command.
136
+ def output
137
+ driver.stderr
138
+ end
139
+
140
+ # :nodoc:
141
+ def output_description
142
+ "STDERR"
143
+ end
144
+
145
+ def description
146
+ "print an appropriate error message on #{output_description}"
147
+ end
148
+ end
149
+
150
+ # A matcher for the exit code of a command.
151
+ class ExitCodeMatcher < IntegrationMatcher
152
+
153
+ # Initialize this matcher with the given `code`.
154
+ #
155
+ # If `code` is the symbol <tt>:non_zero</tt> then the
156
+ # expectation will be any non-zero exit code.
157
+ #
158
+ # @param [Integer,Symbol] code
159
+ def initialize code
160
+ if code == :non_zero
161
+ @expected_code = :non_zero
162
+ else
163
+ @expected_code = code.to_i
164
+ end
165
+ end
166
+
167
+ # Return whether or not the given command's exit code matches
168
+ # the expectation.
169
+ #
170
+ # @param [IntegrationDriver] driver
171
+ # @return [true, false]
172
+ def matches?(driver)
173
+ self.driver = driver
174
+ driver.run!
175
+ if non_zero_exit_code?
176
+ @failed = true if driver.exit_code == 0
177
+ else
178
+ @failed = true if driver.exit_code != expected_exit_code
179
+ end
180
+ @failed ? false : true
181
+ end
182
+
183
+ # :nodoc:
184
+ def failure_message
185
+ "Ran\n\n #{formatted_command}\n\nexpecting #{expected_exit_code_description} Got #{driver.exit_code} instead."
186
+ end
187
+
188
+ # :nodoc:
189
+ def negative_failure_message
190
+ "Ran\n\n #{formatted_command}\n\nNOT expecting #{expected_exit_code_description}."
191
+ end
192
+
193
+ # :nodoc:
194
+ def non_zero_exit_code?
195
+ @expected_code == :non_zero
196
+ end
197
+
198
+ # :nodoc:
199
+ def expected_exit_code
200
+ (@expected_code || 0).to_i
201
+ end
202
+
203
+ # :nodoc:
204
+ def expected_exit_code_description
205
+ if non_zero_exit_code?
206
+ "a non-zero exit code"
207
+ else
208
+ "an exit code of #{expected_exit_code}"
209
+ end
210
+ end
211
+
212
+ # :nodoc:
213
+ def description
214
+ "exit with #{expected_exit_code_description}"
215
+ end
216
+
217
+ end
218
+ end
219
+ end
@@ -0,0 +1,95 @@
1
+ module Wukong
2
+ module SpecHelpers
3
+ module ProcessorHelpers
4
+
5
+ # Creates a new processor in a variety of convenient ways.
6
+ #
7
+ # Most simply, called without args, will return a new instance of
8
+ # a the klass named in the containing `describe` or `context`:
9
+ #
10
+ # context MyApp::Tokenizer do
11
+ # it "uses whitespace as the default separator between tokens" do
12
+ # processor.separator.should == /\s+/
13
+ # end
14
+ # end
15
+ #
16
+ # if your processor has been registered (you created it with the
17
+ # <tt>Wukong.processor</tt> helper method or otherwise
18
+ # registered it yourself) then you can use its name:
19
+ #
20
+ # context :tokenizer do
21
+ # it "uses whitespace as the default separator between tokens" do
22
+ # processor.separator.should == /\s+/
23
+ # end
24
+ # end
25
+ #
26
+ # The `processor` method can also be used inside RSpec's
27
+ # `subject` and `let` methods:
28
+ #
29
+ # context "with no arguments" do
30
+ # subject { processor }
31
+ # it "uses whitespace as the default separator between tokens" do
32
+ # separator.should == /\s+/
33
+ # end
34
+ # end
35
+ # end
36
+ #
37
+ # and you can easily pass arguments, just like you would on the
38
+ # command line or in a dataflow definition:
39
+ #
40
+ # context "with arguments" do
41
+ # subject { processor(separator: ' ') }
42
+ # it "uses whitespace as the default separator between tokens" do
43
+ # separator.should == ' '
44
+ # end
45
+ # end
46
+ # end
47
+ #
48
+ # You can even name the processor directly if you want to:
49
+ #
50
+ # context "tokenizers" do
51
+ # let(:default_tokenizer) { processor(:tokenizer) }
52
+ # let(:complex_tokenizer) { processor(:complex_tokenizer, stemming: true) }
53
+ # let(:french_tokenizer) { processor(:complex_tokenizer, stemming: true) }
54
+ # ...
55
+ # end
56
+ def processor *args, &block
57
+ case
58
+ when args.empty?
59
+ create_processor(self.class.description, {}, &block)
60
+ when args.first.is_a?(Hash)
61
+ create_processor(self.class.description, args.first, &block)
62
+ else
63
+ create_processor(args[0], (args[1] || {}), &block)
64
+ end
65
+ end
66
+ alias_method :flow, :processor
67
+
68
+ # Is the given `klass` a Wukong::Processor?
69
+ #
70
+ # @param [Class] klass
71
+ # @return [true, false]
72
+ def processor? klass
73
+ klass.build.is_a?(Processor)
74
+ end
75
+
76
+ # :nodoc:
77
+ def create_processor name_or_klass, options={}, &block
78
+ if name_or_klass.is_a?(Class)
79
+ klass = name_or_klass
80
+ else
81
+ klass = Wukong.registry.retrieve(name_or_klass.to_s.to_sym)
82
+ raise Error.new("Could not find a Wukong::Processor class named '#{name_or_klass}'") if klass.nil?
83
+ end
84
+ raise Error.new("#{klass} is not a subclass of Wukong::Processor") unless processor?(klass)
85
+ settings = Configliere::Param.new
86
+ Wukong.boot!(settings)
87
+ proc = klass.build(settings.merge(options))
88
+ proc.setup
89
+ proc.instance_eval(&block) if block_given?
90
+ proc
91
+ end
92
+ end
93
+ end
94
+ end
95
+