wukong 3.0.0.pre → 3.0.0.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (476) hide show
  1. data/.gitignore +46 -33
  2. data/.gitmodules +3 -0
  3. data/.rspec +1 -1
  4. data/.travis.yml +8 -1
  5. data/.yardopts +0 -13
  6. data/Guardfile +4 -6
  7. data/{LICENSE.textile → LICENSE.md} +43 -55
  8. data/README-old.md +422 -0
  9. data/README.md +279 -418
  10. data/Rakefile +21 -5
  11. data/TODO.md +6 -6
  12. data/bin/wu-clean-encoding +31 -0
  13. data/bin/wu-lign +2 -2
  14. data/bin/wu-local +69 -0
  15. data/bin/wu-server +70 -0
  16. data/examples/Gemfile +38 -0
  17. data/examples/README.md +9 -0
  18. data/examples/dataflow/apache_log_line.rb +64 -25
  19. data/examples/dataflow/fibonacci_series.rb +101 -0
  20. data/examples/dataflow/parse_apache_logs.rb +37 -7
  21. data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
  22. data/examples/dataflow/simple.rb +4 -4
  23. data/examples/geo.rb +4 -0
  24. data/examples/geo/geo_grids.numbers +0 -0
  25. data/examples/geo/geolocated.rb +331 -0
  26. data/examples/geo/quadtile.rb +69 -0
  27. data/examples/geo/spec/geolocated_spec.rb +247 -0
  28. data/examples/geo/tile_fetcher.rb +77 -0
  29. data/examples/graph/minimum_spanning_tree.rb +61 -61
  30. data/examples/jabberwocky.txt +36 -0
  31. data/examples/models/wikipedia.rb +20 -0
  32. data/examples/munging/Gemfile +8 -0
  33. data/examples/munging/airline_flights/airline.rb +57 -0
  34. data/examples/munging/airline_flights/airline_flights.rake +83 -0
  35. data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
  36. data/examples/munging/airline_flights/airport.rb +211 -0
  37. data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
  38. data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
  39. data/examples/munging/airline_flights/flight.rb +156 -0
  40. data/examples/munging/airline_flights/models.rb +4 -0
  41. data/examples/munging/airline_flights/parse.rb +26 -0
  42. data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
  43. data/examples/munging/airline_flights/route.rb +35 -0
  44. data/examples/munging/airline_flights/tasks.rake +83 -0
  45. data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
  46. data/examples/munging/airline_flights/topcities.rb +167 -0
  47. data/examples/munging/airports/40_wbans.txt +40 -0
  48. data/examples/munging/airports/filter_weather_reports.rb +37 -0
  49. data/examples/munging/airports/join.pig +31 -0
  50. data/examples/munging/airports/to_tsv.rb +33 -0
  51. data/examples/munging/airports/usa_wbans.pig +19 -0
  52. data/examples/munging/airports/usa_wbans.txt +2157 -0
  53. data/examples/munging/airports/wbans.pig +19 -0
  54. data/examples/munging/airports/wbans.txt +2310 -0
  55. data/examples/munging/geo/geo_json.rb +54 -0
  56. data/examples/munging/geo/geo_models.rb +69 -0
  57. data/examples/munging/geo/geonames_models.rb +78 -0
  58. data/examples/munging/geo/iso_codes.rb +172 -0
  59. data/examples/munging/geo/reconcile_countries.rb +124 -0
  60. data/examples/munging/geo/tasks.rake +71 -0
  61. data/examples/munging/rake_helper.rb +62 -0
  62. data/examples/munging/weather/.gitignore +1 -0
  63. data/examples/munging/weather/Gemfile +4 -0
  64. data/examples/munging/weather/Rakefile +28 -0
  65. data/examples/munging/weather/extract_ish.rb +13 -0
  66. data/examples/munging/weather/models/weather.rb +119 -0
  67. data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
  68. data/examples/munging/wikipedia/README.md +34 -0
  69. data/examples/munging/wikipedia/Rakefile +193 -0
  70. data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
  71. data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
  72. data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
  73. data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
  74. data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
  75. data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
  76. data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
  77. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
  78. data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
  79. data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
  80. data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
  81. data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
  82. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
  83. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
  84. data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
  85. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
  86. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
  87. data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
  88. data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
  89. data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
  90. data/examples/munging/wikipedia/pig_style_guide.md +25 -0
  91. data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
  92. data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
  93. data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
  94. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
  95. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
  96. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
  97. data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
  98. data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
  99. data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
  100. data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
  101. data/examples/munging/wikipedia/utils/namespaces.json +1 -0
  102. data/examples/rake_helper.rb +85 -0
  103. data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
  104. data/examples/server_logs/logline.rb +95 -0
  105. data/examples/server_logs/models.rb +66 -0
  106. data/examples/server_logs/page_counts.pig +48 -0
  107. data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
  108. data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
  109. data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
  110. data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
  111. data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
  112. data/examples/string_reverser.rb +26 -0
  113. data/examples/text/pig_latin.rb +2 -2
  114. data/examples/text/regional_flavor/README.md +14 -0
  115. data/examples/text/regional_flavor/article_wordbags.pig +39 -0
  116. data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
  117. data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
  118. data/examples/word_count/accumulator.rb +26 -0
  119. data/examples/word_count/tokenizer.rb +13 -0
  120. data/examples/word_count/word_count.rb +6 -0
  121. data/examples/workflow/cherry_pie.dot +97 -0
  122. data/examples/workflow/cherry_pie.png +0 -0
  123. data/examples/workflow/cherry_pie.rb +61 -26
  124. data/lib/hanuman.rb +34 -7
  125. data/lib/hanuman/graph.rb +55 -31
  126. data/lib/hanuman/graphvizzer.rb +199 -178
  127. data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
  128. data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
  129. data/lib/hanuman/link.rb +35 -0
  130. data/lib/hanuman/registry.rb +46 -0
  131. data/lib/hanuman/stage.rb +76 -32
  132. data/lib/wukong.rb +23 -24
  133. data/lib/wukong/boot.rb +87 -0
  134. data/lib/wukong/configuration.rb +8 -0
  135. data/lib/wukong/dataflow.rb +45 -78
  136. data/lib/wukong/driver.rb +99 -0
  137. data/lib/wukong/emitter.rb +22 -0
  138. data/lib/wukong/model/faker.rb +24 -24
  139. data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
  140. data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
  141. data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
  142. data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
  143. data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
  144. data/lib/wukong/processor.rb +60 -114
  145. data/lib/wukong/spec_helpers.rb +81 -0
  146. data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
  147. data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
  148. data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
  149. data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
  150. data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
  151. data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
  152. data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
  153. data/lib/wukong/version.rb +2 -1
  154. data/lib/wukong/widget/filters.rb +311 -0
  155. data/lib/wukong/widget/processors.rb +156 -0
  156. data/lib/wukong/widget/reducers.rb +7 -0
  157. data/lib/wukong/widget/reducers/accumulator.rb +73 -0
  158. data/lib/wukong/widget/reducers/bin.rb +318 -0
  159. data/lib/wukong/widget/reducers/count.rb +61 -0
  160. data/lib/wukong/widget/reducers/group.rb +85 -0
  161. data/lib/wukong/widget/reducers/group_concat.rb +70 -0
  162. data/lib/wukong/widget/reducers/moments.rb +72 -0
  163. data/lib/wukong/widget/reducers/sort.rb +130 -0
  164. data/lib/wukong/widget/serializers.rb +287 -0
  165. data/lib/wukong/widget/sink.rb +10 -52
  166. data/lib/wukong/widget/source.rb +7 -113
  167. data/lib/wukong/widget/utils.rb +46 -0
  168. data/lib/wukong/widgets.rb +6 -0
  169. data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
  170. data/spec/examples/dataflow/parsing_spec.rb +12 -11
  171. data/spec/examples/dataflow/simple_spec.rb +32 -6
  172. data/spec/examples/dataflow/telegram_spec.rb +36 -36
  173. data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
  174. data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
  175. data/spec/examples/munging/airline_flights_spec.rb +202 -0
  176. data/spec/examples/text/pig_latin_spec.rb +13 -16
  177. data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
  178. data/spec/hanuman/graph_spec.rb +27 -2
  179. data/spec/hanuman/hanuman_spec.rb +10 -0
  180. data/spec/hanuman/registry_spec.rb +123 -0
  181. data/spec/hanuman/stage_spec.rb +61 -7
  182. data/spec/spec_helper.rb +29 -19
  183. data/spec/support/hanuman_test_helpers.rb +14 -12
  184. data/spec/support/shared_context_for_reducers.rb +37 -0
  185. data/spec/support/shared_examples_for_builders.rb +101 -0
  186. data/spec/support/shared_examples_for_shortcuts.rb +57 -0
  187. data/spec/support/wukong_test_helpers.rb +37 -11
  188. data/spec/wukong/dataflow_spec.rb +77 -55
  189. data/spec/wukong/local_runner_spec.rb +24 -24
  190. data/spec/wukong/model/faker_spec.rb +132 -131
  191. data/spec/wukong/runner_spec.rb +8 -8
  192. data/spec/wukong/widget/filters_spec.rb +61 -0
  193. data/spec/wukong/widget/processors_spec.rb +126 -0
  194. data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
  195. data/spec/wukong/widget/reducers/count_spec.rb +11 -0
  196. data/spec/wukong/widget/reducers/group_spec.rb +20 -0
  197. data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
  198. data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
  199. data/spec/wukong/widget/serializers_spec.rb +92 -0
  200. data/spec/wukong/widget/sink_spec.rb +15 -15
  201. data/spec/wukong/widget/source_spec.rb +65 -41
  202. data/spec/wukong/wukong_spec.rb +10 -0
  203. data/wukong.gemspec +17 -10
  204. metadata +359 -335
  205. data/.document +0 -5
  206. data/VERSION +0 -1
  207. data/bin/hdp-bin +0 -44
  208. data/bin/hdp-bzip +0 -23
  209. data/bin/hdp-cat +0 -3
  210. data/bin/hdp-catd +0 -3
  211. data/bin/hdp-cp +0 -3
  212. data/bin/hdp-du +0 -86
  213. data/bin/hdp-get +0 -3
  214. data/bin/hdp-kill +0 -3
  215. data/bin/hdp-kill-task +0 -3
  216. data/bin/hdp-ls +0 -11
  217. data/bin/hdp-mkdir +0 -2
  218. data/bin/hdp-mkdirp +0 -12
  219. data/bin/hdp-mv +0 -3
  220. data/bin/hdp-parts_to_keys.rb +0 -77
  221. data/bin/hdp-ps +0 -3
  222. data/bin/hdp-put +0 -3
  223. data/bin/hdp-rm +0 -32
  224. data/bin/hdp-sort +0 -40
  225. data/bin/hdp-stream +0 -40
  226. data/bin/hdp-stream-flat +0 -22
  227. data/bin/hdp-stream2 +0 -39
  228. data/bin/hdp-sync +0 -17
  229. data/bin/hdp-wc +0 -67
  230. data/bin/wu-flow +0 -10
  231. data/bin/wu-map +0 -17
  232. data/bin/wu-red +0 -17
  233. data/bin/wukong +0 -17
  234. data/data/CREDITS.md +0 -355
  235. data/data/graph/airfares.tsv +0 -2174
  236. data/data/text/gift_of_the_magi.txt +0 -225
  237. data/data/text/jabberwocky.txt +0 -36
  238. data/data/text/rectification_of_names.txt +0 -33
  239. data/data/twitter/a_atsigns_b.tsv +0 -64
  240. data/data/twitter/a_follows_b.tsv +0 -53
  241. data/data/twitter/tweet.tsv +0 -167
  242. data/data/twitter/twitter_user.tsv +0 -55
  243. data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
  244. data/docpages/INSTALL.textile +0 -92
  245. data/docpages/LICENSE.textile +0 -107
  246. data/docpages/README-elastic_map_reduce.textile +0 -377
  247. data/docpages/README-performance.textile +0 -90
  248. data/docpages/README-wulign.textile +0 -65
  249. data/docpages/UsingWukong-part1-get_ready.textile +0 -17
  250. data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
  251. data/docpages/UsingWukong-part3-parsing.textile +0 -138
  252. data/docpages/_config.yml +0 -39
  253. data/docpages/avro/avro_notes.textile +0 -56
  254. data/docpages/avro/performance.textile +0 -36
  255. data/docpages/avro/tethering.textile +0 -19
  256. data/docpages/bigdata-tips.textile +0 -143
  257. data/docpages/code/api_response_example.txt +0 -20
  258. data/docpages/code/parser_skeleton.rb +0 -38
  259. data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
  260. data/docpages/favicon.ico +0 -0
  261. data/docpages/gem.css +0 -16
  262. data/docpages/hadoop-tips.textile +0 -83
  263. data/docpages/index.textile +0 -92
  264. data/docpages/intro.textile +0 -8
  265. data/docpages/moreinfo.textile +0 -174
  266. data/docpages/news.html +0 -24
  267. data/docpages/pig/PigLatinExpressionsList.txt +0 -122
  268. data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
  269. data/docpages/pig/commandline_params.txt +0 -26
  270. data/docpages/pig/cookbook.html +0 -481
  271. data/docpages/pig/images/hadoop-logo.jpg +0 -0
  272. data/docpages/pig/images/instruction_arrow.png +0 -0
  273. data/docpages/pig/images/pig-logo.gif +0 -0
  274. data/docpages/pig/piglatin_ref1.html +0 -1103
  275. data/docpages/pig/piglatin_ref2.html +0 -14340
  276. data/docpages/pig/setup.html +0 -505
  277. data/docpages/pig/skin/basic.css +0 -166
  278. data/docpages/pig/skin/breadcrumbs.js +0 -237
  279. data/docpages/pig/skin/fontsize.js +0 -166
  280. data/docpages/pig/skin/getBlank.js +0 -40
  281. data/docpages/pig/skin/getMenu.js +0 -45
  282. data/docpages/pig/skin/images/chapter.gif +0 -0
  283. data/docpages/pig/skin/images/chapter_open.gif +0 -0
  284. data/docpages/pig/skin/images/current.gif +0 -0
  285. data/docpages/pig/skin/images/external-link.gif +0 -0
  286. data/docpages/pig/skin/images/header_white_line.gif +0 -0
  287. data/docpages/pig/skin/images/page.gif +0 -0
  288. data/docpages/pig/skin/images/pdfdoc.gif +0 -0
  289. data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  290. data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  291. data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  292. data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  293. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  294. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  295. data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  296. data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  297. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  298. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  299. data/docpages/pig/skin/print.css +0 -54
  300. data/docpages/pig/skin/profile.css +0 -181
  301. data/docpages/pig/skin/screen.css +0 -587
  302. data/docpages/pig/tutorial.html +0 -1059
  303. data/docpages/pig/udf.html +0 -1509
  304. data/docpages/tutorial.textile +0 -283
  305. data/docpages/usage.textile +0 -195
  306. data/docpages/wutils.textile +0 -263
  307. data/examples/dataflow/complex.rb +0 -11
  308. data/examples/dataflow/donuts.rb +0 -13
  309. data/examples/tiny_count/jabberwocky_output.tsv +0 -92
  310. data/examples/word_count.rb +0 -48
  311. data/examples/workflow/fiddle.rb +0 -24
  312. data/lib/away/escapement.rb +0 -129
  313. data/lib/away/exe.rb +0 -11
  314. data/lib/away/experimental.rb +0 -5
  315. data/lib/away/from_file.rb +0 -52
  316. data/lib/away/job.rb +0 -56
  317. data/lib/away/job/rake_compat.rb +0 -17
  318. data/lib/away/registry.rb +0 -79
  319. data/lib/away/runner.rb +0 -276
  320. data/lib/away/runner/execute.rb +0 -121
  321. data/lib/away/script.rb +0 -161
  322. data/lib/away/script/hadoop_command.rb +0 -240
  323. data/lib/away/source/file_list_source.rb +0 -15
  324. data/lib/away/source/looper.rb +0 -18
  325. data/lib/away/task.rb +0 -219
  326. data/lib/hanuman/action.rb +0 -21
  327. data/lib/hanuman/chain.rb +0 -4
  328. data/lib/hanuman/graphviz.rb +0 -74
  329. data/lib/hanuman/resource.rb +0 -6
  330. data/lib/hanuman/slot.rb +0 -87
  331. data/lib/hanuman/slottable.rb +0 -220
  332. data/lib/wukong/bad_record.rb +0 -15
  333. data/lib/wukong/event.rb +0 -44
  334. data/lib/wukong/local_runner.rb +0 -55
  335. data/lib/wukong/mapred.rb +0 -3
  336. data/lib/wukong/universe.rb +0 -48
  337. data/lib/wukong/widget/filter.rb +0 -81
  338. data/lib/wukong/widget/gibberish.rb +0 -123
  339. data/lib/wukong/widget/monitor.rb +0 -26
  340. data/lib/wukong/widget/reducer.rb +0 -66
  341. data/lib/wukong/widget/stringifier.rb +0 -50
  342. data/lib/wukong/workflow.rb +0 -22
  343. data/lib/wukong/workflow/command.rb +0 -42
  344. data/old/config/emr-example.yaml +0 -48
  345. data/old/examples/README.txt +0 -17
  346. data/old/examples/contrib/jeans/README.markdown +0 -165
  347. data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
  348. data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
  349. data/old/examples/contrib/jeans/data/sizes +0 -3
  350. data/old/examples/contrib/jeans/normalize.rb +0 -20
  351. data/old/examples/contrib/jeans/sizes.rb +0 -55
  352. data/old/examples/corpus/bnc_word_freq.rb +0 -44
  353. data/old/examples/corpus/bucket_counter.rb +0 -47
  354. data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
  355. data/old/examples/corpus/sentence_bigrams.rb +0 -53
  356. data/old/examples/corpus/sentence_coocurrence.rb +0 -66
  357. data/old/examples/corpus/stopwords.rb +0 -138
  358. data/old/examples/corpus/words_to_bigrams.rb +0 -53
  359. data/old/examples/emr/README.textile +0 -110
  360. data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
  361. data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
  362. data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
  363. data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
  364. data/old/examples/network_graph/adjacency_list.rb +0 -74
  365. data/old/examples/network_graph/breadth_first_search.rb +0 -72
  366. data/old/examples/network_graph/gen_2paths.rb +0 -68
  367. data/old/examples/network_graph/gen_multi_edge.rb +0 -112
  368. data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
  369. data/old/examples/pagerank/README.textile +0 -6
  370. data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
  371. data/old/examples/pagerank/pagerank.rb +0 -72
  372. data/old/examples/pagerank/pagerank_initialize.rb +0 -42
  373. data/old/examples/pagerank/run_pagerank.sh +0 -21
  374. data/old/examples/sample_records.rb +0 -33
  375. data/old/examples/server_logs/apache_log_parser.rb +0 -15
  376. data/old/examples/server_logs/nook.rb +0 -48
  377. data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
  378. data/old/examples/server_logs/user_agent.rb +0 -40
  379. data/old/examples/simple_word_count.rb +0 -82
  380. data/old/examples/size.rb +0 -61
  381. data/old/examples/stats/avg_value_frequency.rb +0 -86
  382. data/old/examples/stats/binning_percentile_estimator.rb +0 -140
  383. data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
  384. data/old/examples/stats/rank_and_bin.rb +0 -173
  385. data/old/examples/stupidly_simple_filter.rb +0 -40
  386. data/old/examples/word_count.rb +0 -75
  387. data/old/graph/graphviz_builder.rb +0 -580
  388. data/old/graph_easy/Attributes.pm +0 -4181
  389. data/old/graph_easy/Graphviz.pm +0 -2232
  390. data/old/wukong.rb +0 -18
  391. data/old/wukong/and_pig.rb +0 -38
  392. data/old/wukong/bad_record.rb +0 -18
  393. data/old/wukong/datatypes.rb +0 -24
  394. data/old/wukong/datatypes/enum.rb +0 -127
  395. data/old/wukong/datatypes/fake_types.rb +0 -17
  396. data/old/wukong/decorator.rb +0 -28
  397. data/old/wukong/encoding/asciize.rb +0 -108
  398. data/old/wukong/extensions.rb +0 -16
  399. data/old/wukong/extensions/array.rb +0 -18
  400. data/old/wukong/extensions/blank.rb +0 -93
  401. data/old/wukong/extensions/class.rb +0 -189
  402. data/old/wukong/extensions/date_time.rb +0 -53
  403. data/old/wukong/extensions/emittable.rb +0 -69
  404. data/old/wukong/extensions/enumerable.rb +0 -79
  405. data/old/wukong/extensions/hash.rb +0 -167
  406. data/old/wukong/extensions/hash_keys.rb +0 -16
  407. data/old/wukong/extensions/hash_like.rb +0 -150
  408. data/old/wukong/extensions/hashlike_class.rb +0 -47
  409. data/old/wukong/extensions/module.rb +0 -2
  410. data/old/wukong/extensions/pathname.rb +0 -27
  411. data/old/wukong/extensions/string.rb +0 -65
  412. data/old/wukong/extensions/struct.rb +0 -17
  413. data/old/wukong/extensions/symbol.rb +0 -11
  414. data/old/wukong/filename_pattern.rb +0 -74
  415. data/old/wukong/helper.rb +0 -7
  416. data/old/wukong/helper/stopwords.rb +0 -195
  417. data/old/wukong/helper/tokenize.rb +0 -35
  418. data/old/wukong/logger.rb +0 -38
  419. data/old/wukong/periodic_monitor.rb +0 -72
  420. data/old/wukong/schema.rb +0 -269
  421. data/old/wukong/script.rb +0 -286
  422. data/old/wukong/script/avro_command.rb +0 -5
  423. data/old/wukong/script/cassandra_loader_script.rb +0 -40
  424. data/old/wukong/script/emr_command.rb +0 -168
  425. data/old/wukong/script/hadoop_command.rb +0 -237
  426. data/old/wukong/script/local_command.rb +0 -41
  427. data/old/wukong/store.rb +0 -10
  428. data/old/wukong/store/base.rb +0 -27
  429. data/old/wukong/store/cassandra.rb +0 -10
  430. data/old/wukong/store/cassandra/streaming.rb +0 -75
  431. data/old/wukong/store/cassandra/struct_loader.rb +0 -21
  432. data/old/wukong/store/cassandra_model.rb +0 -91
  433. data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
  434. data/old/wukong/store/chunked_flat_file_store.rb +0 -48
  435. data/old/wukong/store/conditional_store.rb +0 -57
  436. data/old/wukong/store/factory.rb +0 -8
  437. data/old/wukong/store/flat_file_store.rb +0 -89
  438. data/old/wukong/store/key_store.rb +0 -51
  439. data/old/wukong/store/null_store.rb +0 -15
  440. data/old/wukong/store/read_thru_store.rb +0 -22
  441. data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
  442. data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
  443. data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
  444. data/old/wukong/streamer.rb +0 -30
  445. data/old/wukong/streamer/accumulating_reducer.rb +0 -83
  446. data/old/wukong/streamer/base.rb +0 -126
  447. data/old/wukong/streamer/counting_reducer.rb +0 -25
  448. data/old/wukong/streamer/filter.rb +0 -20
  449. data/old/wukong/streamer/instance_streamer.rb +0 -15
  450. data/old/wukong/streamer/json_streamer.rb +0 -21
  451. data/old/wukong/streamer/line_streamer.rb +0 -12
  452. data/old/wukong/streamer/list_reducer.rb +0 -31
  453. data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
  454. data/old/wukong/streamer/record_streamer.rb +0 -14
  455. data/old/wukong/streamer/reducer.rb +0 -11
  456. data/old/wukong/streamer/set_reducer.rb +0 -14
  457. data/old/wukong/streamer/struct_streamer.rb +0 -48
  458. data/old/wukong/streamer/summing_reducer.rb +0 -29
  459. data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
  460. data/old/wukong/typed_struct.rb +0 -12
  461. data/spec/away/encoding_spec.rb +0 -32
  462. data/spec/away/exe_spec.rb +0 -20
  463. data/spec/away/flow_spec.rb +0 -82
  464. data/spec/away/graph_spec.rb +0 -6
  465. data/spec/away/job_spec.rb +0 -15
  466. data/spec/away/rake_compat_spec.rb +0 -9
  467. data/spec/away/script_spec.rb +0 -81
  468. data/spec/hanuman/graphviz_spec.rb +0 -29
  469. data/spec/hanuman/slot_spec.rb +0 -2
  470. data/spec/support/examples_helper.rb +0 -10
  471. data/spec/support/streamer_test_helpers.rb +0 -6
  472. data/spec/support/wukong_widget_helpers.rb +0 -66
  473. data/spec/wukong/processor_spec.rb +0 -109
  474. data/spec/wukong/widget/filter_spec.rb +0 -99
  475. data/spec/wukong/widget/stringifier_spec.rb +0 -51
  476. data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -1,12 +1,12 @@
1
1
  require 'spec_helper'
2
- require 'wukong'
2
+ # require 'wukong'
3
3
 
4
- describe :runner, :helpers => true do
4
+ # describe :runner, :helpers => true do
5
5
 
6
- context 'tiny_count example script' do
7
- it 'is shorter than a tweet' do
8
- example_script_contents('tiny_count.rb').length.should < 140
9
- end
6
+ # context 'tiny_count example script' do
7
+ # it 'is shorter than a tweet' do
8
+ # example_script_contents('tiny_count.rb').length.should < 140
9
+ # end
10
10
 
11
- end
12
- end
11
+ # end
12
+ # end
@@ -0,0 +1,61 @@
1
+ require 'spec_helper'
2
+
3
+ describe "Filters" do
4
+
5
+ context :null do
6
+ it_behaves_like 'a processor', :named => :null
7
+ it "should not pass anything, ever" do
8
+ processor.given('', 3, 'hi', nil).should emit(0).records
9
+ end
10
+ end
11
+
12
+ context :identity do
13
+ it_behaves_like 'a processor', :named => :identity
14
+ it "should pass everything, always" do
15
+ processor.given('', 3, 'hi', nil).should emit('', 3, 'hi', nil)
16
+ end
17
+ end
18
+
19
+ context :regexp do
20
+ it_behaves_like 'a processor', :named => :regexp
21
+ it "should pass everything given no 'match' argument" do
22
+ processor.given('snap', 'crackle', 'pop').should emit('snap', 'crackle', 'pop')
23
+ end
24
+ it "should pass everything its 'match' argument matches" do
25
+ processor(match: /a/).given('snap', 'crackle', 'pop').should emit('snap', 'crackle')
26
+ end
27
+ end
28
+
29
+ context :not_regexp do
30
+ it_behaves_like 'a processor', :named => :not_regexp
31
+ it "should pass everything given no 'match' argument" do
32
+ processor.given('snap', 'crackle', 'pop').should emit('snap', 'crackle', 'pop')
33
+ end
34
+ it "should pass everything its 'match' argument matches" do
35
+ processor(match: /a/).given('snap', 'crackle', 'pop').should emit('pop')
36
+ end
37
+ end
38
+
39
+ context :limit do
40
+ it_behaves_like 'a processor', :named => :limit
41
+ it "should pass everything given no 'max' argument" do
42
+ processor.given('snap', 'crackle', 'pop').should emit('snap', 'crackle', 'pop')
43
+ end
44
+ it "should pass only as many records as its 'max' argument" do
45
+ processor(max: 2).given('snap', 'crackle', 'pop', 'whoa').should emit('snap', 'crackle')
46
+ end
47
+ end
48
+
49
+ context :sample do
50
+ it_behaves_like 'a processor', :named => :sample
51
+ it "should pass everything given no 'fraction' argument" do
52
+ processor.given('snap', 'crackle', 'pop').should emit('snap', 'crackle', 'pop')
53
+ end
54
+ it "should pass everything given no 'fraction' argument" do
55
+ processor(:fraction => 0.5).tap do |proc|
56
+ proc.should_receive(:rand).and_return(0.7, 0.1, 0.6)
57
+ end.given('snap', 'crackle', 'pop').should emit('crackle')
58
+ end
59
+ end
60
+
61
+ end
@@ -0,0 +1,126 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong::Processor do
4
+
5
+ let(:hsh) { { "hi" => "there", "top" => { "lower" => { "lowest" => "value" } } } }
6
+ let(:ary) { ['1', 2, 'three'] }
7
+
8
+ context :logger do
9
+ it_behaves_like "a processor", :named => :logger
10
+
11
+ it "logs each event at the 'info' level by default" do
12
+ log = mock("logger")
13
+ log.should_receive(:info).with('hi there')
14
+ log.should_receive(:info).with('buddy')
15
+ processor(:logger) do
16
+ stub!(:log).and_return(log)
17
+ end.given('hi there', 'buddy').should emit(0).records
18
+ end
19
+
20
+ it "logs each event at the a desired level set with an argument" do
21
+ log = mock("logger")
22
+ log.should_receive(:debug).with('hi there')
23
+ log.should_receive(:debug).with('buddy')
24
+ processor(:logger, level: :debug) do
25
+ stub!(:log).and_return(log)
26
+ end.given('hi there', 'buddy').should emit(0).records
27
+ end
28
+ end
29
+
30
+
31
+ context :extract do
32
+ subject { processor(:extract) }
33
+
34
+ it_behaves_like 'a processor', :named => :extract
35
+
36
+ context "on a string" do
37
+ it "emits the string with no arguments" do
38
+ processor(:extract).given('hi there', 'buddy').should emit('hi there', 'buddy')
39
+ end
40
+ end
41
+ context "on a Fixnum" do
42
+ it "emits the number with no arguments" do
43
+ processor(:extract).given(3, 3.0).should emit(3, 3.0)
44
+ end
45
+ end
46
+ context "on a Hash" do
47
+ it "emits the hash with no arguments" do
48
+ processor(:extract).given(hsh).should emit(hsh)
49
+ end
50
+ it "can extract a key" do
51
+ processor(:extract, part: 'hi').given(hsh).should emit('there')
52
+ end
53
+ it "emits nil when the value of the key is nil" do
54
+ processor(:extract, part: 'bye').given(hsh).should emit(nil)
55
+ end
56
+ it "can extract a nested key" do
57
+ processor(:extract, part: 'top.lower.lowest').given(hsh).should emit('value')
58
+ end
59
+ it "emits nil when the value of this nested key is nil" do
60
+ processor(:extract, part: 'foo.bar.baz').given(hsh).should emit(nil)
61
+ end
62
+ end
63
+ context "on an Array" do
64
+ it "emits the array with no arguments" do
65
+ processor(:extract).given(ary).should emit(ary)
66
+ end
67
+ it "can extract the nth value with an integer argument" do
68
+ processor(:extract, part: 2).given(ary).should emit(2)
69
+ end
70
+ it "can extract the nth value with a string argument" do
71
+ processor(:extract, part: '2').given(ary).should emit(2)
72
+ end
73
+ end
74
+ context "on JSON" do
75
+ let(:garbage) { '{"239823:' }
76
+ it "emits the JSON with no arguments" do
77
+ processor(:extract).given_json(hsh).should emit_json(hsh)
78
+ end
79
+ it "will skip badly formed records" do
80
+ processor(:extract).given(garbage).should emit(garbage)
81
+ end
82
+ it "can extract a key" do
83
+ processor(:extract, part: 'hi').given_json(hsh).should emit('there')
84
+ end
85
+ it "can extract a nested key" do
86
+ processor(:extract, part: 'top.lower.lowest').given_json(hsh).should emit('value')
87
+ end
88
+ it "emits nil when the record is missing the key" do
89
+ processor(:extract, part: 'foo.bar.baz').given_json(hsh).should emit(nil)
90
+ end
91
+ end
92
+ context "on delimited data" do
93
+ it "emits the row with no arguments" do
94
+ processor(:extract).given_delimited('|', ary).should emit(ary.map(&:to_s).join('|'))
95
+ end
96
+ it "can extract the nth value with an integer argument" do
97
+ processor(:extract, part: 2, separator: '|').given_delimited('|', ary).should emit('2')
98
+ end
99
+ it "can extract nth value with a string argument" do
100
+ processor(:extract, part: '2', separator: '|').given_delimited('|', ary).should emit('2')
101
+ end
102
+ end
103
+ context "on TSV" do
104
+ it "emits the TSV with no arguments" do
105
+ processor(:extract).given_tsv(ary).should emit(ary.map(&:to_s).join("\t"))
106
+ end
107
+ it "can extract the nth value with an integer argument" do
108
+ processor(:extract, part: 2).given_tsv(ary).should emit('2')
109
+ end
110
+ it "can extract the nth value with a string argument" do
111
+ processor(:extract, part: '2').given_tsv(ary).should emit('2')
112
+ end
113
+ end
114
+ context "on CSV" do
115
+ it "emits the CSV with no arguments" do
116
+ processor(:extract).given_csv(ary).should emit(ary.map(&:to_s).join(","))
117
+ end
118
+ it "can extract the nth value with an integer argument" do
119
+ processor(:extract, part: 2, separator: ',').given_csv(ary).should emit('2')
120
+ end
121
+ it "can extract the nth value with a string argument" do
122
+ processor(:extract, part: '2', separator: ',').given_csv(ary).should emit('2')
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,92 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong::Processor do
4
+ describe :bin do
5
+ include_context "reducers"
6
+ it_behaves_like 'a processor', :named => :bin
7
+
8
+ let(:bins) {
9
+ [
10
+ ['0.0', '2.000', '9.000'],
11
+ ['2.000', '4.000', '9.000'],
12
+ ['4.000', '6.000', '8.000'],
13
+ ['6.000', '8.000', '11.000'],
14
+ ['8.000', '10.000', '13.000']
15
+ ]
16
+ }
17
+
18
+ it "raises an error when called with a non-positive-definite number of bins" do
19
+ lambda { processor(num_bins: -1) }.should raise_error(Wukong::Error)
20
+ end
21
+
22
+ it "raises an error when called with a a minimum that's less than or equal to the maximum" do
23
+ lambda { processor(min: 10, max: 0) }.should raise_error(Wukong::Error)
24
+ end
25
+
26
+ it "will bin 50 numbers into 7 bins (uses the square root)" do
27
+ processor.given(*nums).should emit(7).records
28
+ end
29
+
30
+ it "will bin 50 numbers into 5 bins if asked" do
31
+ processor(num_bins: 10).given(*nums).should emit(10).records
32
+ end
33
+
34
+ it "counts correctly in each bin" do
35
+ processor(num_bins: 5).given(*nums).should emit_tsv(*bins)
36
+ end
37
+
38
+ it "can express counts logarithmically" do
39
+ row = processor(num_bins: 5, log_counts: true).given(*nums).tsv_output.first
40
+ row.size.should == 3
41
+ row[2].to_f.should be_within(0.1).of(2.197)
42
+ end
43
+
44
+ it "can add a normalized frequency" do
45
+ row = processor(num_bins: 5, normalize: true).given(*nums).tsv_output.first
46
+ row.size.should == 4
47
+ row[3].to_f.should be_within(0.1).of(0.18)
48
+ end
49
+
50
+ it "can add a normalized frequency and express counts logarithmically" do
51
+ row = processor(num_bins: 5, normalize: true, log_counts: true).given(*nums).tsv_output.first
52
+ row.size.should == 4
53
+ row[2].to_f.should be_within(0.1).of(2.197)
54
+ row[3].to_f.should be_within(0.1).of(-1.715)
55
+ end
56
+
57
+ it "can bin on the fly given min, max, and num_bins options" do
58
+ output = processor(min: -30, max: 30, num_bins: 3) do
59
+ # we can bin on the fly
60
+ values.should_not_receive(:<<)
61
+ should_not_receive(:bin!)
62
+ end.given(*nums).tsv_output
63
+
64
+ output.size.should == 3
65
+ output.first[0].to_f.should be_within(0.1).of(-30)
66
+ output.last[1].to_f.should be_within(0.1).of(30)
67
+ end
68
+
69
+ it "can bin on the fly given fixed bin edges" do
70
+ output = processor(edges: [0,1,5,10]) do
71
+ # we can bin on the fly
72
+ values.should_not_receive(:<<)
73
+ should_not_receive(:bin!)
74
+ end.given(*nums).tsv_output
75
+ output.size.should == 3
76
+ output[0][0].to_f.should be_within(0.1).of(0.0)
77
+ output[0][1].to_f.should be_within(0.1).of(1.0)
78
+ output[1][0].to_f.should be_within(0.1).of(1.0)
79
+ output[1][1].to_f.should be_within(0.1).of(5.0)
80
+ output[2][0].to_f.should be_within(0.1).of(5.0)
81
+ output[2][1].to_f.should be_within(0.1).of(10.0)
82
+ end
83
+
84
+ it "can extract the value to bin by from an object" do
85
+ output = processor(by: 'data.n', min: 0).given(*json).tsv_output
86
+ output.size.should == 2
87
+ output.first[0].to_f.should be_within(0.1).of(0.0)
88
+ output.last[1].to_f.should be_within(0.1).of(100.0)
89
+ end
90
+
91
+ end
92
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong::Processor do
4
+ describe :count do
5
+ include_context "reducers"
6
+ it_behaves_like 'a processor', :named => :count
7
+ it "should emit the total count of records" do
8
+ processor.given(*strings).should emit(4)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,20 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong::Processor do
4
+ describe :group do
5
+ include_context "reducers"
6
+ it_behaves_like 'a processor', :named => :group
7
+
8
+ let(:grouped_strings) { [['apple', '2'], ['banana', '1'], ['cookie', '1']] }
9
+ let(:grouped_nums) { [['', '2'], ['1', '1'], ['5', '1'], ['10', '1'], ['100', '1']] }
10
+ it "will group single values" do
11
+ processor(:group).given(*strings.sort).should emit_tsv(*grouped_strings)
12
+ end
13
+ it "can group from within a JSON hash" do
14
+ proc = processor(:group, by: 'data.n').given(*json_sorted_n).should emit_tsv(*grouped_nums)
15
+ end
16
+ it "can group from within a TSV row" do
17
+ proc = processor(:group, by: '3').given(*tsv_sorted).should emit_tsv(*grouped_nums)
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong::Processor do
4
+ describe :moments do
5
+ include_context "reducers"
6
+ it_behaves_like 'a processor', :named => :moments
7
+
8
+ it "behaves like group when not called with any arguments" do
9
+ processor(:moments).given(*strings.sort).should emit(
10
+ {group: 'apple', count: 2, results: {}},
11
+ {group: 'banana', count: 1, results: {}},
12
+ {group: 'cookie', count: 1, results: {}}
13
+ )
14
+ end
15
+
16
+ it "behaves calculates the moments of numeric fields" do
17
+ processor(:moments, group_by: 'outer', of: 'data.n').given(*json_sorted_outer).should emit(
18
+ {group: nil, count: 2, results: {"data.n" => {}}},
19
+ {group: 'apple', count: 2, results: {"data.n"=>{:count=>2, :mean=>3.0, :std_dev=>2.0}}},
20
+ {group: 'banana', count: 1, results: {"data.n"=>{:count=>1, :mean=>100.0, :std_dev=>0.0}}},
21
+ {group: 'cookie', count: 1, results: {"data.n"=>{:count=>1, :mean=>10.0, :std_dev=>0.0}}}
22
+ )
23
+ end
24
+
25
+ it "will leave off the standard deviation if desired" do
26
+ processor(:moments, group_by: 'outer', of: 'data.n', std_dev: false).given(*json_sorted_outer).should emit(
27
+ {group: nil, count: 2, results: {"data.n" => {}}},
28
+ {group: 'apple', count: 2, results: {"data.n"=>{:count=>2, :mean=>3.0 }}},
29
+ {group: 'banana', count: 1, results: {"data.n"=>{:count=>1, :mean=>100.0 }}},
30
+ {group: 'cookie', count: 1, results: {"data.n"=>{:count=>1, :mean=>10.0 }}}
31
+ )
32
+ end
33
+
34
+ end
35
+ end
36
+
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong::Processor do
4
+ describe :sort do
5
+ include_context "reducers"
6
+ it_behaves_like 'a processor', :named => :sort
7
+ it "will use ascending order by default" do
8
+ processor(:sort).given(*strings).should emit(*strings.sort)
9
+ end
10
+ it "can sort in reversed (descending) order" do
11
+ processor(:sort, reverse: true).given(*strings).should emit(*strings.sort.reverse)
12
+ end
13
+ it "will use lexical order by default" do
14
+ processor(:sort).given(*nums).should emit(*nums.sort)
15
+ end
16
+ it "can sort in numerical order" do
17
+ processor(:sort, numeric: true).given(*nums).should emit(*nums.map(&:to_i).sort.map(&:to_s))
18
+ end
19
+ it "can sort from within a JSON hash" do
20
+ proc = processor(:sort, numeric: true, on: 'data.n').given(*json).should emit(*json_sorted_n)
21
+ end
22
+ it "can sort from within a TSV row" do
23
+ proc = processor(:sort, numeric: true, on: '3').given(*tsv).should emit(*tsv_sorted)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,92 @@
1
+ require 'spec_helper'
2
+
3
+ describe "Serializing" do
4
+
5
+ context :to_json do
6
+
7
+ let(:emittable) { {"hi" => "there"} }
8
+ let(:not_emittable) { {"n" => Float::INFINITY} }
9
+
10
+ it_behaves_like 'a processor', :named => :to_json
11
+
12
+ it "should handle valid records" do
13
+ processor.given(emittable).should emit_json(emittable)
14
+ end
15
+
16
+ it "should skip bad records" do
17
+ processor.given(not_emittable).should emit(0).records
18
+ end
19
+
20
+ end
21
+
22
+ context :to_tsv do
23
+ let(:emittable) { ["foo", 2, :a] }
24
+ let(:not_emittable) { nil }
25
+
26
+ it_behaves_like 'a processor', :named => :to_tsv
27
+
28
+ it "should handle valid records" do
29
+ processor.given(emittable).should emit_tsv(emittable.map(&:to_s))
30
+ end
31
+
32
+ it "should skip bad records" do
33
+ processor.given(not_emittable).should emit(0).records
34
+ end
35
+ end
36
+ end
37
+
38
+ describe "Deserializing" do
39
+
40
+ context :from_json do
41
+ let(:parseable) { '{"hi": "there"}' }
42
+ let(:not_parseable) { '{"832323:' }
43
+
44
+ it_behaves_like 'a processor', :named => :from_json
45
+
46
+ it "should handle valid records" do
47
+ processor.given(parseable).should emit({'hi' => 'there'})
48
+ end
49
+
50
+ it "should skip bad records" do
51
+ processor.given(not_parseable).should emit(0).records
52
+ end
53
+ end
54
+
55
+ context :from_tsv do
56
+
57
+ let(:parseable) { "foo\t2\ta" }
58
+ let(:not_parseable) { nil }
59
+
60
+ it_behaves_like 'a processor', :named => :from_tsv
61
+
62
+ it "should handle valid records" do
63
+ processor.given(parseable).should emit(parseable.split("\t"))
64
+ end
65
+
66
+ it "should skip bad records" do
67
+ processor.given(not_parseable).should emit(0).records
68
+ end
69
+ end
70
+ end
71
+
72
+ describe "Pretty printing" do
73
+
74
+ context "JSON" do
75
+ let(:parseable) { '{"hi": "there"}' }
76
+ let(:not_parseable) { '{"832323:' }
77
+
78
+ it_behaves_like 'a processor', :named => :pretty
79
+
80
+ it "should prettify parseable records" do
81
+ processor(:pretty).given(parseable).should emit_json({'hi' => 'there'})
82
+ end
83
+
84
+ it "should pass on non parseable records" do
85
+ processor(:pretty).given(not_parseable).should emit(not_parseable)
86
+ end
87
+ end
88
+
89
+ it "should pass on everything else" do
90
+ processor(:pretty).given('foobar').should emit('foobar')
91
+ end
92
+ end