wukong 3.0.0.pre → 3.0.0.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (476) hide show
  1. data/.gitignore +46 -33
  2. data/.gitmodules +3 -0
  3. data/.rspec +1 -1
  4. data/.travis.yml +8 -1
  5. data/.yardopts +0 -13
  6. data/Guardfile +4 -6
  7. data/{LICENSE.textile → LICENSE.md} +43 -55
  8. data/README-old.md +422 -0
  9. data/README.md +279 -418
  10. data/Rakefile +21 -5
  11. data/TODO.md +6 -6
  12. data/bin/wu-clean-encoding +31 -0
  13. data/bin/wu-lign +2 -2
  14. data/bin/wu-local +69 -0
  15. data/bin/wu-server +70 -0
  16. data/examples/Gemfile +38 -0
  17. data/examples/README.md +9 -0
  18. data/examples/dataflow/apache_log_line.rb +64 -25
  19. data/examples/dataflow/fibonacci_series.rb +101 -0
  20. data/examples/dataflow/parse_apache_logs.rb +37 -7
  21. data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
  22. data/examples/dataflow/simple.rb +4 -4
  23. data/examples/geo.rb +4 -0
  24. data/examples/geo/geo_grids.numbers +0 -0
  25. data/examples/geo/geolocated.rb +331 -0
  26. data/examples/geo/quadtile.rb +69 -0
  27. data/examples/geo/spec/geolocated_spec.rb +247 -0
  28. data/examples/geo/tile_fetcher.rb +77 -0
  29. data/examples/graph/minimum_spanning_tree.rb +61 -61
  30. data/examples/jabberwocky.txt +36 -0
  31. data/examples/models/wikipedia.rb +20 -0
  32. data/examples/munging/Gemfile +8 -0
  33. data/examples/munging/airline_flights/airline.rb +57 -0
  34. data/examples/munging/airline_flights/airline_flights.rake +83 -0
  35. data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
  36. data/examples/munging/airline_flights/airport.rb +211 -0
  37. data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
  38. data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
  39. data/examples/munging/airline_flights/flight.rb +156 -0
  40. data/examples/munging/airline_flights/models.rb +4 -0
  41. data/examples/munging/airline_flights/parse.rb +26 -0
  42. data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
  43. data/examples/munging/airline_flights/route.rb +35 -0
  44. data/examples/munging/airline_flights/tasks.rake +83 -0
  45. data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
  46. data/examples/munging/airline_flights/topcities.rb +167 -0
  47. data/examples/munging/airports/40_wbans.txt +40 -0
  48. data/examples/munging/airports/filter_weather_reports.rb +37 -0
  49. data/examples/munging/airports/join.pig +31 -0
  50. data/examples/munging/airports/to_tsv.rb +33 -0
  51. data/examples/munging/airports/usa_wbans.pig +19 -0
  52. data/examples/munging/airports/usa_wbans.txt +2157 -0
  53. data/examples/munging/airports/wbans.pig +19 -0
  54. data/examples/munging/airports/wbans.txt +2310 -0
  55. data/examples/munging/geo/geo_json.rb +54 -0
  56. data/examples/munging/geo/geo_models.rb +69 -0
  57. data/examples/munging/geo/geonames_models.rb +78 -0
  58. data/examples/munging/geo/iso_codes.rb +172 -0
  59. data/examples/munging/geo/reconcile_countries.rb +124 -0
  60. data/examples/munging/geo/tasks.rake +71 -0
  61. data/examples/munging/rake_helper.rb +62 -0
  62. data/examples/munging/weather/.gitignore +1 -0
  63. data/examples/munging/weather/Gemfile +4 -0
  64. data/examples/munging/weather/Rakefile +28 -0
  65. data/examples/munging/weather/extract_ish.rb +13 -0
  66. data/examples/munging/weather/models/weather.rb +119 -0
  67. data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
  68. data/examples/munging/wikipedia/README.md +34 -0
  69. data/examples/munging/wikipedia/Rakefile +193 -0
  70. data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
  71. data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
  72. data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
  73. data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
  74. data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
  75. data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
  76. data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
  77. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
  78. data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
  79. data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
  80. data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
  81. data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
  82. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
  83. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
  84. data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
  85. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
  86. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
  87. data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
  88. data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
  89. data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
  90. data/examples/munging/wikipedia/pig_style_guide.md +25 -0
  91. data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
  92. data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
  93. data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
  94. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
  95. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
  96. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
  97. data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
  98. data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
  99. data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
  100. data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
  101. data/examples/munging/wikipedia/utils/namespaces.json +1 -0
  102. data/examples/rake_helper.rb +85 -0
  103. data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
  104. data/examples/server_logs/logline.rb +95 -0
  105. data/examples/server_logs/models.rb +66 -0
  106. data/examples/server_logs/page_counts.pig +48 -0
  107. data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
  108. data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
  109. data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
  110. data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
  111. data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
  112. data/examples/string_reverser.rb +26 -0
  113. data/examples/text/pig_latin.rb +2 -2
  114. data/examples/text/regional_flavor/README.md +14 -0
  115. data/examples/text/regional_flavor/article_wordbags.pig +39 -0
  116. data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
  117. data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
  118. data/examples/word_count/accumulator.rb +26 -0
  119. data/examples/word_count/tokenizer.rb +13 -0
  120. data/examples/word_count/word_count.rb +6 -0
  121. data/examples/workflow/cherry_pie.dot +97 -0
  122. data/examples/workflow/cherry_pie.png +0 -0
  123. data/examples/workflow/cherry_pie.rb +61 -26
  124. data/lib/hanuman.rb +34 -7
  125. data/lib/hanuman/graph.rb +55 -31
  126. data/lib/hanuman/graphvizzer.rb +199 -178
  127. data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
  128. data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
  129. data/lib/hanuman/link.rb +35 -0
  130. data/lib/hanuman/registry.rb +46 -0
  131. data/lib/hanuman/stage.rb +76 -32
  132. data/lib/wukong.rb +23 -24
  133. data/lib/wukong/boot.rb +87 -0
  134. data/lib/wukong/configuration.rb +8 -0
  135. data/lib/wukong/dataflow.rb +45 -78
  136. data/lib/wukong/driver.rb +99 -0
  137. data/lib/wukong/emitter.rb +22 -0
  138. data/lib/wukong/model/faker.rb +24 -24
  139. data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
  140. data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
  141. data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
  142. data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
  143. data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
  144. data/lib/wukong/processor.rb +60 -114
  145. data/lib/wukong/spec_helpers.rb +81 -0
  146. data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
  147. data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
  148. data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
  149. data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
  150. data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
  151. data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
  152. data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
  153. data/lib/wukong/version.rb +2 -1
  154. data/lib/wukong/widget/filters.rb +311 -0
  155. data/lib/wukong/widget/processors.rb +156 -0
  156. data/lib/wukong/widget/reducers.rb +7 -0
  157. data/lib/wukong/widget/reducers/accumulator.rb +73 -0
  158. data/lib/wukong/widget/reducers/bin.rb +318 -0
  159. data/lib/wukong/widget/reducers/count.rb +61 -0
  160. data/lib/wukong/widget/reducers/group.rb +85 -0
  161. data/lib/wukong/widget/reducers/group_concat.rb +70 -0
  162. data/lib/wukong/widget/reducers/moments.rb +72 -0
  163. data/lib/wukong/widget/reducers/sort.rb +130 -0
  164. data/lib/wukong/widget/serializers.rb +287 -0
  165. data/lib/wukong/widget/sink.rb +10 -52
  166. data/lib/wukong/widget/source.rb +7 -113
  167. data/lib/wukong/widget/utils.rb +46 -0
  168. data/lib/wukong/widgets.rb +6 -0
  169. data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
  170. data/spec/examples/dataflow/parsing_spec.rb +12 -11
  171. data/spec/examples/dataflow/simple_spec.rb +32 -6
  172. data/spec/examples/dataflow/telegram_spec.rb +36 -36
  173. data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
  174. data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
  175. data/spec/examples/munging/airline_flights_spec.rb +202 -0
  176. data/spec/examples/text/pig_latin_spec.rb +13 -16
  177. data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
  178. data/spec/hanuman/graph_spec.rb +27 -2
  179. data/spec/hanuman/hanuman_spec.rb +10 -0
  180. data/spec/hanuman/registry_spec.rb +123 -0
  181. data/spec/hanuman/stage_spec.rb +61 -7
  182. data/spec/spec_helper.rb +29 -19
  183. data/spec/support/hanuman_test_helpers.rb +14 -12
  184. data/spec/support/shared_context_for_reducers.rb +37 -0
  185. data/spec/support/shared_examples_for_builders.rb +101 -0
  186. data/spec/support/shared_examples_for_shortcuts.rb +57 -0
  187. data/spec/support/wukong_test_helpers.rb +37 -11
  188. data/spec/wukong/dataflow_spec.rb +77 -55
  189. data/spec/wukong/local_runner_spec.rb +24 -24
  190. data/spec/wukong/model/faker_spec.rb +132 -131
  191. data/spec/wukong/runner_spec.rb +8 -8
  192. data/spec/wukong/widget/filters_spec.rb +61 -0
  193. data/spec/wukong/widget/processors_spec.rb +126 -0
  194. data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
  195. data/spec/wukong/widget/reducers/count_spec.rb +11 -0
  196. data/spec/wukong/widget/reducers/group_spec.rb +20 -0
  197. data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
  198. data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
  199. data/spec/wukong/widget/serializers_spec.rb +92 -0
  200. data/spec/wukong/widget/sink_spec.rb +15 -15
  201. data/spec/wukong/widget/source_spec.rb +65 -41
  202. data/spec/wukong/wukong_spec.rb +10 -0
  203. data/wukong.gemspec +17 -10
  204. metadata +359 -335
  205. data/.document +0 -5
  206. data/VERSION +0 -1
  207. data/bin/hdp-bin +0 -44
  208. data/bin/hdp-bzip +0 -23
  209. data/bin/hdp-cat +0 -3
  210. data/bin/hdp-catd +0 -3
  211. data/bin/hdp-cp +0 -3
  212. data/bin/hdp-du +0 -86
  213. data/bin/hdp-get +0 -3
  214. data/bin/hdp-kill +0 -3
  215. data/bin/hdp-kill-task +0 -3
  216. data/bin/hdp-ls +0 -11
  217. data/bin/hdp-mkdir +0 -2
  218. data/bin/hdp-mkdirp +0 -12
  219. data/bin/hdp-mv +0 -3
  220. data/bin/hdp-parts_to_keys.rb +0 -77
  221. data/bin/hdp-ps +0 -3
  222. data/bin/hdp-put +0 -3
  223. data/bin/hdp-rm +0 -32
  224. data/bin/hdp-sort +0 -40
  225. data/bin/hdp-stream +0 -40
  226. data/bin/hdp-stream-flat +0 -22
  227. data/bin/hdp-stream2 +0 -39
  228. data/bin/hdp-sync +0 -17
  229. data/bin/hdp-wc +0 -67
  230. data/bin/wu-flow +0 -10
  231. data/bin/wu-map +0 -17
  232. data/bin/wu-red +0 -17
  233. data/bin/wukong +0 -17
  234. data/data/CREDITS.md +0 -355
  235. data/data/graph/airfares.tsv +0 -2174
  236. data/data/text/gift_of_the_magi.txt +0 -225
  237. data/data/text/jabberwocky.txt +0 -36
  238. data/data/text/rectification_of_names.txt +0 -33
  239. data/data/twitter/a_atsigns_b.tsv +0 -64
  240. data/data/twitter/a_follows_b.tsv +0 -53
  241. data/data/twitter/tweet.tsv +0 -167
  242. data/data/twitter/twitter_user.tsv +0 -55
  243. data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
  244. data/docpages/INSTALL.textile +0 -92
  245. data/docpages/LICENSE.textile +0 -107
  246. data/docpages/README-elastic_map_reduce.textile +0 -377
  247. data/docpages/README-performance.textile +0 -90
  248. data/docpages/README-wulign.textile +0 -65
  249. data/docpages/UsingWukong-part1-get_ready.textile +0 -17
  250. data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
  251. data/docpages/UsingWukong-part3-parsing.textile +0 -138
  252. data/docpages/_config.yml +0 -39
  253. data/docpages/avro/avro_notes.textile +0 -56
  254. data/docpages/avro/performance.textile +0 -36
  255. data/docpages/avro/tethering.textile +0 -19
  256. data/docpages/bigdata-tips.textile +0 -143
  257. data/docpages/code/api_response_example.txt +0 -20
  258. data/docpages/code/parser_skeleton.rb +0 -38
  259. data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
  260. data/docpages/favicon.ico +0 -0
  261. data/docpages/gem.css +0 -16
  262. data/docpages/hadoop-tips.textile +0 -83
  263. data/docpages/index.textile +0 -92
  264. data/docpages/intro.textile +0 -8
  265. data/docpages/moreinfo.textile +0 -174
  266. data/docpages/news.html +0 -24
  267. data/docpages/pig/PigLatinExpressionsList.txt +0 -122
  268. data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
  269. data/docpages/pig/commandline_params.txt +0 -26
  270. data/docpages/pig/cookbook.html +0 -481
  271. data/docpages/pig/images/hadoop-logo.jpg +0 -0
  272. data/docpages/pig/images/instruction_arrow.png +0 -0
  273. data/docpages/pig/images/pig-logo.gif +0 -0
  274. data/docpages/pig/piglatin_ref1.html +0 -1103
  275. data/docpages/pig/piglatin_ref2.html +0 -14340
  276. data/docpages/pig/setup.html +0 -505
  277. data/docpages/pig/skin/basic.css +0 -166
  278. data/docpages/pig/skin/breadcrumbs.js +0 -237
  279. data/docpages/pig/skin/fontsize.js +0 -166
  280. data/docpages/pig/skin/getBlank.js +0 -40
  281. data/docpages/pig/skin/getMenu.js +0 -45
  282. data/docpages/pig/skin/images/chapter.gif +0 -0
  283. data/docpages/pig/skin/images/chapter_open.gif +0 -0
  284. data/docpages/pig/skin/images/current.gif +0 -0
  285. data/docpages/pig/skin/images/external-link.gif +0 -0
  286. data/docpages/pig/skin/images/header_white_line.gif +0 -0
  287. data/docpages/pig/skin/images/page.gif +0 -0
  288. data/docpages/pig/skin/images/pdfdoc.gif +0 -0
  289. data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  290. data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  291. data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  292. data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  293. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  294. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  295. data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  296. data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  297. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  298. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  299. data/docpages/pig/skin/print.css +0 -54
  300. data/docpages/pig/skin/profile.css +0 -181
  301. data/docpages/pig/skin/screen.css +0 -587
  302. data/docpages/pig/tutorial.html +0 -1059
  303. data/docpages/pig/udf.html +0 -1509
  304. data/docpages/tutorial.textile +0 -283
  305. data/docpages/usage.textile +0 -195
  306. data/docpages/wutils.textile +0 -263
  307. data/examples/dataflow/complex.rb +0 -11
  308. data/examples/dataflow/donuts.rb +0 -13
  309. data/examples/tiny_count/jabberwocky_output.tsv +0 -92
  310. data/examples/word_count.rb +0 -48
  311. data/examples/workflow/fiddle.rb +0 -24
  312. data/lib/away/escapement.rb +0 -129
  313. data/lib/away/exe.rb +0 -11
  314. data/lib/away/experimental.rb +0 -5
  315. data/lib/away/from_file.rb +0 -52
  316. data/lib/away/job.rb +0 -56
  317. data/lib/away/job/rake_compat.rb +0 -17
  318. data/lib/away/registry.rb +0 -79
  319. data/lib/away/runner.rb +0 -276
  320. data/lib/away/runner/execute.rb +0 -121
  321. data/lib/away/script.rb +0 -161
  322. data/lib/away/script/hadoop_command.rb +0 -240
  323. data/lib/away/source/file_list_source.rb +0 -15
  324. data/lib/away/source/looper.rb +0 -18
  325. data/lib/away/task.rb +0 -219
  326. data/lib/hanuman/action.rb +0 -21
  327. data/lib/hanuman/chain.rb +0 -4
  328. data/lib/hanuman/graphviz.rb +0 -74
  329. data/lib/hanuman/resource.rb +0 -6
  330. data/lib/hanuman/slot.rb +0 -87
  331. data/lib/hanuman/slottable.rb +0 -220
  332. data/lib/wukong/bad_record.rb +0 -15
  333. data/lib/wukong/event.rb +0 -44
  334. data/lib/wukong/local_runner.rb +0 -55
  335. data/lib/wukong/mapred.rb +0 -3
  336. data/lib/wukong/universe.rb +0 -48
  337. data/lib/wukong/widget/filter.rb +0 -81
  338. data/lib/wukong/widget/gibberish.rb +0 -123
  339. data/lib/wukong/widget/monitor.rb +0 -26
  340. data/lib/wukong/widget/reducer.rb +0 -66
  341. data/lib/wukong/widget/stringifier.rb +0 -50
  342. data/lib/wukong/workflow.rb +0 -22
  343. data/lib/wukong/workflow/command.rb +0 -42
  344. data/old/config/emr-example.yaml +0 -48
  345. data/old/examples/README.txt +0 -17
  346. data/old/examples/contrib/jeans/README.markdown +0 -165
  347. data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
  348. data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
  349. data/old/examples/contrib/jeans/data/sizes +0 -3
  350. data/old/examples/contrib/jeans/normalize.rb +0 -20
  351. data/old/examples/contrib/jeans/sizes.rb +0 -55
  352. data/old/examples/corpus/bnc_word_freq.rb +0 -44
  353. data/old/examples/corpus/bucket_counter.rb +0 -47
  354. data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
  355. data/old/examples/corpus/sentence_bigrams.rb +0 -53
  356. data/old/examples/corpus/sentence_coocurrence.rb +0 -66
  357. data/old/examples/corpus/stopwords.rb +0 -138
  358. data/old/examples/corpus/words_to_bigrams.rb +0 -53
  359. data/old/examples/emr/README.textile +0 -110
  360. data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
  361. data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
  362. data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
  363. data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
  364. data/old/examples/network_graph/adjacency_list.rb +0 -74
  365. data/old/examples/network_graph/breadth_first_search.rb +0 -72
  366. data/old/examples/network_graph/gen_2paths.rb +0 -68
  367. data/old/examples/network_graph/gen_multi_edge.rb +0 -112
  368. data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
  369. data/old/examples/pagerank/README.textile +0 -6
  370. data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
  371. data/old/examples/pagerank/pagerank.rb +0 -72
  372. data/old/examples/pagerank/pagerank_initialize.rb +0 -42
  373. data/old/examples/pagerank/run_pagerank.sh +0 -21
  374. data/old/examples/sample_records.rb +0 -33
  375. data/old/examples/server_logs/apache_log_parser.rb +0 -15
  376. data/old/examples/server_logs/nook.rb +0 -48
  377. data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
  378. data/old/examples/server_logs/user_agent.rb +0 -40
  379. data/old/examples/simple_word_count.rb +0 -82
  380. data/old/examples/size.rb +0 -61
  381. data/old/examples/stats/avg_value_frequency.rb +0 -86
  382. data/old/examples/stats/binning_percentile_estimator.rb +0 -140
  383. data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
  384. data/old/examples/stats/rank_and_bin.rb +0 -173
  385. data/old/examples/stupidly_simple_filter.rb +0 -40
  386. data/old/examples/word_count.rb +0 -75
  387. data/old/graph/graphviz_builder.rb +0 -580
  388. data/old/graph_easy/Attributes.pm +0 -4181
  389. data/old/graph_easy/Graphviz.pm +0 -2232
  390. data/old/wukong.rb +0 -18
  391. data/old/wukong/and_pig.rb +0 -38
  392. data/old/wukong/bad_record.rb +0 -18
  393. data/old/wukong/datatypes.rb +0 -24
  394. data/old/wukong/datatypes/enum.rb +0 -127
  395. data/old/wukong/datatypes/fake_types.rb +0 -17
  396. data/old/wukong/decorator.rb +0 -28
  397. data/old/wukong/encoding/asciize.rb +0 -108
  398. data/old/wukong/extensions.rb +0 -16
  399. data/old/wukong/extensions/array.rb +0 -18
  400. data/old/wukong/extensions/blank.rb +0 -93
  401. data/old/wukong/extensions/class.rb +0 -189
  402. data/old/wukong/extensions/date_time.rb +0 -53
  403. data/old/wukong/extensions/emittable.rb +0 -69
  404. data/old/wukong/extensions/enumerable.rb +0 -79
  405. data/old/wukong/extensions/hash.rb +0 -167
  406. data/old/wukong/extensions/hash_keys.rb +0 -16
  407. data/old/wukong/extensions/hash_like.rb +0 -150
  408. data/old/wukong/extensions/hashlike_class.rb +0 -47
  409. data/old/wukong/extensions/module.rb +0 -2
  410. data/old/wukong/extensions/pathname.rb +0 -27
  411. data/old/wukong/extensions/string.rb +0 -65
  412. data/old/wukong/extensions/struct.rb +0 -17
  413. data/old/wukong/extensions/symbol.rb +0 -11
  414. data/old/wukong/filename_pattern.rb +0 -74
  415. data/old/wukong/helper.rb +0 -7
  416. data/old/wukong/helper/stopwords.rb +0 -195
  417. data/old/wukong/helper/tokenize.rb +0 -35
  418. data/old/wukong/logger.rb +0 -38
  419. data/old/wukong/periodic_monitor.rb +0 -72
  420. data/old/wukong/schema.rb +0 -269
  421. data/old/wukong/script.rb +0 -286
  422. data/old/wukong/script/avro_command.rb +0 -5
  423. data/old/wukong/script/cassandra_loader_script.rb +0 -40
  424. data/old/wukong/script/emr_command.rb +0 -168
  425. data/old/wukong/script/hadoop_command.rb +0 -237
  426. data/old/wukong/script/local_command.rb +0 -41
  427. data/old/wukong/store.rb +0 -10
  428. data/old/wukong/store/base.rb +0 -27
  429. data/old/wukong/store/cassandra.rb +0 -10
  430. data/old/wukong/store/cassandra/streaming.rb +0 -75
  431. data/old/wukong/store/cassandra/struct_loader.rb +0 -21
  432. data/old/wukong/store/cassandra_model.rb +0 -91
  433. data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
  434. data/old/wukong/store/chunked_flat_file_store.rb +0 -48
  435. data/old/wukong/store/conditional_store.rb +0 -57
  436. data/old/wukong/store/factory.rb +0 -8
  437. data/old/wukong/store/flat_file_store.rb +0 -89
  438. data/old/wukong/store/key_store.rb +0 -51
  439. data/old/wukong/store/null_store.rb +0 -15
  440. data/old/wukong/store/read_thru_store.rb +0 -22
  441. data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
  442. data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
  443. data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
  444. data/old/wukong/streamer.rb +0 -30
  445. data/old/wukong/streamer/accumulating_reducer.rb +0 -83
  446. data/old/wukong/streamer/base.rb +0 -126
  447. data/old/wukong/streamer/counting_reducer.rb +0 -25
  448. data/old/wukong/streamer/filter.rb +0 -20
  449. data/old/wukong/streamer/instance_streamer.rb +0 -15
  450. data/old/wukong/streamer/json_streamer.rb +0 -21
  451. data/old/wukong/streamer/line_streamer.rb +0 -12
  452. data/old/wukong/streamer/list_reducer.rb +0 -31
  453. data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
  454. data/old/wukong/streamer/record_streamer.rb +0 -14
  455. data/old/wukong/streamer/reducer.rb +0 -11
  456. data/old/wukong/streamer/set_reducer.rb +0 -14
  457. data/old/wukong/streamer/struct_streamer.rb +0 -48
  458. data/old/wukong/streamer/summing_reducer.rb +0 -29
  459. data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
  460. data/old/wukong/typed_struct.rb +0 -12
  461. data/spec/away/encoding_spec.rb +0 -32
  462. data/spec/away/exe_spec.rb +0 -20
  463. data/spec/away/flow_spec.rb +0 -82
  464. data/spec/away/graph_spec.rb +0 -6
  465. data/spec/away/job_spec.rb +0 -15
  466. data/spec/away/rake_compat_spec.rb +0 -9
  467. data/spec/away/script_spec.rb +0 -81
  468. data/spec/hanuman/graphviz_spec.rb +0 -29
  469. data/spec/hanuman/slot_spec.rb +0 -2
  470. data/spec/support/examples_helper.rb +0 -10
  471. data/spec/support/streamer_test_helpers.rb +0 -6
  472. data/spec/support/wukong_widget_helpers.rb +0 -66
  473. data/spec/wukong/processor_spec.rb +0 -109
  474. data/spec/wukong/widget/filter_spec.rb +0 -99
  475. data/spec/wukong/widget/stringifier_spec.rb +0 -51
  476. data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -0,0 +1,156 @@
1
+ require_relative('utils')
2
+
3
+ module Wukong
4
+ class Processor
5
+
6
+ # A widget that will log all incoming records.
7
+ #
8
+ # @example Logging records from the command line
9
+ #
10
+ # $ cat input
11
+ # 1
12
+ # 2
13
+ # 3
14
+ # $ cat input | wu-local logger
15
+ # 2012-11-28 18:20:46 [INFO] Logger: 1
16
+ # 2012-11-28 18:20:46 [INFO] Logger: 2
17
+ # 2012-11-28 18:20:46 [INFO] Logger: 3
18
+ #
19
+ # @example Logging records within a dataflow
20
+ #
21
+ # Wukong.dataflow(:uses_logger) do
22
+ # ... | logger
23
+ # end
24
+ class Logger < Processor
25
+ # The level to use for logging.
26
+ field :level, Symbol, :default => :info
27
+
28
+ # Process a given `record` by logging it.
29
+ #
30
+ # @param [Object] record
31
+ def process(record)
32
+ log.send(level, record)
33
+ end
34
+ register
35
+ end
36
+
37
+ # A widget that extracts parts of incoming records.
38
+ #
39
+ # This widget can extract part of the following kinds of objects:
40
+ #
41
+ # - Hash
42
+ # - Array
43
+ # - JSON string
44
+ # - delimited string ("\t" or "," or other)
45
+ # - models
46
+ #
47
+ # In each case it will attempt to appropriately parse its
48
+ # <tt>:part</tt> argument.
49
+ #
50
+ # @example Extracting a column from an input TSV record on the command-line
51
+ #
52
+ # $ cat input
53
+ # snap crackle pop
54
+ # 1 2 3
55
+ # $ cat input | wu-local extract --part=2
56
+ # crackle
57
+ # pop
58
+ #
59
+ # @example Extracting a column from delimited data with a different delimiter
60
+ #
61
+ # $ cat input
62
+ # snap,crackle,pop
63
+ # 1,2,3
64
+ # $ cat input | wu-local extract --part=2 --delimiter=,
65
+ # crackle
66
+ # pop
67
+ #
68
+ # @example Extracting a field from within some JSON record on the command-line
69
+ #
70
+ # $ cat input
71
+ # {"id": 1, "text": "hi there"}
72
+ # {"id": 2, "text": "goodbye"}
73
+ # $ cat input | wu-local extract --part="text"
74
+ # hi there
75
+ # goodbye
76
+ #
77
+ # This even works on nested keys using a dot ('.') to separate the
78
+ # keys:
79
+ #
80
+ # @example Extracting a nested field from within some JSON record on the command-line
81
+ #
82
+ # $ cat input
83
+ # {"id": 1, {"data": {"text": "hi there"}}
84
+ # {"id": 2, {"data": {"text": "goodbye"}}
85
+ # $ cat input | wu-local extract --part="data.text"
86
+ # hi there
87
+ # goodbye
88
+ #
89
+ # Objects like Hashes, Arrays, and models, which would have to
90
+ # serialize within a command-line flow, can also be extracted from
91
+ # within a dataflow:
92
+ #
93
+ # @example Extracting a field from within a Hash in a dataflow
94
+ #
95
+ # Wukong.dataflow(:uses_extract) do
96
+ # ... | extract(part: 'data.text') | ...
97
+ # end
98
+ #
99
+ # @see DynamicGet
100
+ class Extract < Processor
101
+ include DynamicGet
102
+
103
+ # The part to extract.
104
+ field :part, Whatever, :default => nil
105
+
106
+ # Extract a `part` of a `record`.
107
+ #
108
+ # @param [Object] record
109
+ # @yield [part]
110
+ # @yieldparam [Object] part the part extracted from the record
111
+ def process record
112
+ yield get(self.part, record)
113
+ end
114
+ register
115
+ end
116
+
117
+ class Topic < Processor
118
+ field :topic, Symbol
119
+ def process(record)
120
+ yield perform_action(record)
121
+ end
122
+
123
+ def perform_action(record)
124
+ assign_topic(record, topic)
125
+ end
126
+
127
+ def assign_topic(record, topic_name)
128
+ record.define_singleton_method(:topic){ topic_name }
129
+ record
130
+ end
131
+ register
132
+ end
133
+
134
+ # Until further notice, this processor is unusable due to the invocation of yield
135
+ # class Foreach < Processor
136
+ # def process(record, &blk)
137
+ # perform_action(record, &blk)
138
+ # end
139
+ # register
140
+ # end
141
+
142
+ class Map < Processor
143
+ def process(record)
144
+ yield perform_action(record)
145
+ end
146
+ register
147
+ end
148
+
149
+ class Flatten < Processor
150
+ def process(records)
151
+ records.respond_to?(:each) ? records.each{ |record| yield(record) } : yield(records)
152
+ end
153
+ register
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,7 @@
1
+ require_relative("reducers/accumulator")
2
+ require_relative("reducers/sort")
3
+ require_relative("reducers/count")
4
+ require_relative("reducers/group")
5
+ require_relative("reducers/group_concat")
6
+ require_relative("reducers/moments")
7
+ require_relative("reducers/bin")
@@ -0,0 +1,73 @@
1
+ module Wukong
2
+ class Processor
3
+
4
+ # A base widget for building more complex accumulative widgets.
5
+ class Accumulator < Processor
6
+
7
+ # The current key used to define the current group being
8
+ # accumulated.
9
+ attr_accessor :key
10
+
11
+ # The current group of records.
12
+ attr_accessor :group
13
+
14
+ # Sets up this accumulator by defining an initial key (with a
15
+ # value that is unlikely to be found in real data) and calling
16
+ # `#start` with no record.
17
+ def setup
18
+ @key = :__first_group__
19
+ start(nil)
20
+ end
21
+
22
+ # Processes the `record`.
23
+ #
24
+ # If the record is part of the current group (has a key that is
25
+ # the same as the current key) then will call `accumulate` with
26
+ # the record.
27
+ #
28
+ # If the record has a different key, will call `finalize` and
29
+ # then call `start` with the record.
30
+ #
31
+ # @param [Object] record
32
+ # @yield [finalized_record] each record yielded by `finalize`
33
+ # @yieldparam [Object] finalized_record
34
+ # @see #accumulate
35
+ # @see #finalize
36
+ # @see #get_key
37
+ # @see #start
38
+ def process(record)
39
+ this_key = get_key(record)
40
+ if this_key != self.key
41
+ finalize { |record| yield record } unless self.key == :__first_group__
42
+ self.key = this_key
43
+ start record
44
+ end
45
+ accumulate(record)
46
+ end
47
+
48
+ # Starts accumulation for a new group of records with a new key.
49
+ # This is where you can reset counters, clear caches, &c.
50
+ #
51
+ # @param [Object] record
52
+ def start record
53
+ end
54
+
55
+ # Gets the key from the given +record+. By default a record's
56
+ # key is just the record itself.
57
+ #
58
+ # @param [Object] record
59
+ # @return [Object] the record's key
60
+ def get_key record
61
+ record
62
+ end
63
+
64
+ # Accumulates another +record+.
65
+ #
66
+ # Does nothing by default, intended for you to override.
67
+ #
68
+ # @param [Object] record
69
+ def accumulate record
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,318 @@
1
+ module Wukong
2
+ class Processor
3
+
4
+ # A widget for binning input data. Will emit
5
+ #
6
+ #
7
+ #
8
+ # @example Binning some input data on the command-line
9
+ #
10
+ # $ cat input
11
+ # 0.94628
12
+ # 0.03480
13
+ # 0.74418
14
+ # ...
15
+ # $ cat input | wu-local bin
16
+ #
17
+ # 0.02935 0.12638500000000003 7
18
+ # 0.12638500000000003 0.22342000000000004 11
19
+ # 0.22342000000000004 0.32045500000000005 15
20
+ #
21
+ # @example Control how the bins are defined and displayed
22
+ #
23
+ # $ cat input | wu-local bin --min=0.0 --max=1.0 --num_bins=10 --precision=1
24
+ # 0.0 0.1 10.0
25
+ # 0.1 0.2 12.0
26
+ # 0.2 0.3 8.0
27
+ # ...
28
+ #
29
+ # @example Include an additional column of normalized (fractional) counts
30
+ #
31
+ # $ cat input | wu-local bin --min=0.0 --max=1.0 --num_bins=10 --precision=1 --normalize
32
+ # 0.0 0.1 10.0 0.3
33
+ # 0.1 0.2 12.0 0.36
34
+ # 0.2 0.3 8.0 0.24
35
+ # ...
36
+ #
37
+ # @example Make a log-log histogram
38
+ #
39
+ # $ cat input | wu-local bin --log_bins --log_counts
40
+ # 1.000 3.162 1.099
41
+ # 3.162 10.000 1.946
42
+ # 10.000 31.623 3.045
43
+ # 31.623 100.000 4.234
44
+ #
45
+ # This widget works nicely with the Extract widget at the end of a
46
+ # data flow:
47
+ #
48
+ # @example Use the bin at the end of a dataflow
49
+ #
50
+ # Wukong.processor(:bins_at_end) do
51
+ # ... | extract(part: 'age') | bin(num_bins: 10)
52
+ # end
53
+ #
54
+ # @see Accumulator
55
+ # @see Extract
56
+ class Bin < Accumulator
57
+
58
+ field :num_bins, Integer
59
+ field :edges, Array
60
+ field :min, Float
61
+ field :max, Float
62
+
63
+ field :format_string, String
64
+ field :precision, Integer, :default => 3
65
+
66
+ include DynamicGet
67
+ field :by, Whatever
68
+
69
+ field :log_bins, :boolean, :default => false
70
+ field :log_counts, :boolean, :default => false
71
+ field :base, Float, :default => Math::E
72
+
73
+ field :normalize, :boolean, :default => false
74
+
75
+ # The accumulated values
76
+ attr_accessor :values
77
+
78
+ # The bins (pairs of edges)
79
+ attr_accessor :bins
80
+
81
+ # The value counts within each bin.
82
+ attr_accessor :counts
83
+
84
+ # The total number of accumulated values.
85
+ attr_accessor :total_count
86
+
87
+ # Initializes all storage. If we can calculate bins in advance,
88
+ # do so now.
89
+ def setup
90
+ super()
91
+ self.values = []
92
+ self.bins = []
93
+ self.counts = []
94
+ self.total_count = 0
95
+ if edges.nil?
96
+ set_edges_from_min_max_and_num_bins! if min && max && num_bins
97
+ else
98
+ set_bins_and_counts_from_edges!
99
+ end
100
+ end
101
+
102
+ # Keep all records in the same "group", at least from the
103
+ # Accumulator's perspective.
104
+ #
105
+ # @param [Object] record
106
+ # @return [:__first__group__]
107
+ def get_key record
108
+ :__first__group__
109
+ end
110
+
111
+ # Accumulates a single `record`.
112
+ #
113
+ # First we extract the value from the record. If we already
114
+ # have bins, add the value to the appropriate bin. Otherwise,
115
+ # store the value, updating any properties like `max` or `min`
116
+ # as necessary.
117
+ #
118
+ # @param [Object] record
119
+ def accumulate record
120
+ value = (value_from(record) or return)
121
+ self.total_count += 1
122
+ if bins?
123
+ add_to_some_bin(value)
124
+ else
125
+ self.min ||= value
126
+ self.min = value if value < min
127
+ self.max ||= value
128
+ self.max = value if value > max
129
+ self.values << value
130
+ end
131
+ end
132
+
133
+ # Emits each bin with its edges and count. Adds the normalized
134
+ # count if requested.
135
+ #
136
+ # Will bins the values if we haven't done so on the fly already.
137
+ #
138
+ # @yield [lower, upper, count, normalized_count]
139
+ # @yieldparam [String] lower the lower (left) edge of the bin
140
+ # @yieldparam [String] upper the upper (right) edge of the bin
141
+ # @yieldparam [String] count the (logarithmic if requested) count of values in the bin
142
+ # @yieldparam [String] normalized_count the (logarithmic if requested) normalized count of values in the bin if requested
143
+ def finalize
144
+ bin! unless bins?
145
+ counts.each_with_index do |count, index|
146
+ bin = bins[index]
147
+ bin << log_count_if_necessary(count)
148
+ if normalize && total_count > 0
149
+ bin << log_count_if_necessary((count.to_f / total_count.to_f))
150
+ end
151
+ yield bin.map { |n| format(n) }.join("\t")
152
+ end
153
+ end
154
+
155
+ # Formats `n` so it's readable and compact.
156
+ #
157
+ # If this widget is given an explicit `format_string` then it
158
+ # will be used here (the value of `format_string` should have a
159
+ # slot for a float).
160
+ #
161
+ # Otherwise, large (or small) numbers will be formatted in
162
+ # scientific notation while "medium numbers" (0.001 < |n| <
163
+ # 1000) are merely printed, all with the given `precision`.
164
+ #
165
+ # @param [Float] n
166
+ # @return [String]
167
+ def format n
168
+ case
169
+ when format_string
170
+ format_string % n
171
+ when n == 0.0
172
+ 0.0
173
+ when n.abs > 1000 || n.abs < 0.001
174
+ "%#{precision}.#{precision}E" % n
175
+ else
176
+ "%#{precision}.#{precision}f" % n
177
+ end
178
+ end
179
+
180
+ # Bins the accumulated values.
181
+ #
182
+ # @see #bins?
183
+ def bin!
184
+ set_num_bins_from_total_count! unless self.num_bins
185
+ set_edges_from_min_max_and_num_bins!
186
+ until values.empty?
187
+ value = values.shift
188
+ add_to_some_bin(value.to_f) if value
189
+ end
190
+ end
191
+
192
+ # Does this widget have a populated list of bins?
193
+ #
194
+ # @return [true, false]
195
+ def bins?
196
+ bins && (! bins.empty?)
197
+ end
198
+
199
+ # Get a value from a given `record`.
200
+ #
201
+ # @param [Object] record
202
+ # @return [Float, nil]
203
+ def value_from record
204
+ val = get(self.by, record)
205
+ return unless val
206
+ val.to_f rescue nil
207
+ end
208
+
209
+ # Returns `val`, taking a logarithm to the appropriate base if
210
+ # required.
211
+ #
212
+ # @param [Float] val
213
+ # @return [Float] the original value or its logarithm if required
214
+ def log_count_if_necessary val
215
+ log_counts ? log_if_possible(val) : val
216
+ end
217
+
218
+ # Returns the logarithm of the given `val` if possible.
219
+ #
220
+ # Will return the original value if negative.
221
+ #
222
+ # @param [Float] val
223
+ # @return [Float]
224
+ def log_if_possible val
225
+ val > 0 ? Math.log(val, base) : val
226
+ end
227
+
228
+ private
229
+
230
+ # :nodoc
231
+ def receive_min new_min
232
+ raise Error.new("The minimum value must be strictly less than the maximum value") if max && new_min.to_f >= max
233
+ @min = new_min.to_f
234
+ end
235
+
236
+ # :nodoc
237
+ def receive_max new_max
238
+ raise Error.new("The maximum value must be strictly greater than the minimum value") if min && new_max.to_f <= min
239
+ @max = new_max.to_f
240
+ end
241
+
242
+ # :nodoc
243
+ def receive_num_bins n
244
+ raise Error.new("The number of bins must be a postive-definite integer") if n.to_i <= 0
245
+ @num_bins = n.to_i
246
+ end
247
+
248
+ # :nodoc
249
+ def receive_edges es
250
+ @edges = case es
251
+ when String then es.split(',')
252
+ when Array then es
253
+ end.map(&:to_f).sort
254
+ set_bins_and_counts_from_edges! if @edges
255
+ @edges
256
+ end
257
+
258
+ # :nodoc
259
+ def set_num_bins_from_total_count!
260
+ self.num_bins = Math.sqrt(total_count).to_i
261
+ end
262
+
263
+ # :nodoc
264
+ def set_bins_and_counts_from_edges!
265
+ @bins = [].tap do |b|
266
+ edges[0..-2].each_with_index do |edge, index|
267
+ b << [edge, edges[index+1]]
268
+ end
269
+ end
270
+ @counts = bins.length.times.map { 0 }
271
+ end
272
+
273
+ # :nodoc
274
+ def set_edges_from_min_max_and_num_bins!
275
+ e = []
276
+
277
+ if log_bins
278
+ bin_min = log_if_possible(min)
279
+ bin_max = log_if_possible(max)
280
+ else
281
+ bin_min = min
282
+ bin_max = max
283
+ end
284
+
285
+ bin_diff = (bin_max - bin_min) / num_bins
286
+ e << bin_min
287
+ current = bin_min + bin_diff
288
+ while current < bin_max
289
+ e << current
290
+ current += bin_diff
291
+ end
292
+ e << bin_max
293
+
294
+ if log_bins
295
+ self.edges = e.map { |n| Math.exp(n) }
296
+ else
297
+ self.edges = e
298
+ end
299
+ set_bins_and_counts_from_edges!
300
+ end
301
+
302
+ # :nodoc:
303
+ def add_to_some_bin value
304
+ # FIXME optimize this O(n) algorithm...
305
+ bins.each_with_index do |bin, index|
306
+ lower, upper = bin
307
+ if value >= lower && value < upper
308
+ counts[index] += 1
309
+ return
310
+ end
311
+ end
312
+ counts[-1] += 1 # if it's the maximal element
313
+ end
314
+
315
+ register
316
+ end
317
+ end
318
+ end