wukong 3.0.0.pre → 3.0.0.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (476) hide show
  1. data/.gitignore +46 -33
  2. data/.gitmodules +3 -0
  3. data/.rspec +1 -1
  4. data/.travis.yml +8 -1
  5. data/.yardopts +0 -13
  6. data/Guardfile +4 -6
  7. data/{LICENSE.textile → LICENSE.md} +43 -55
  8. data/README-old.md +422 -0
  9. data/README.md +279 -418
  10. data/Rakefile +21 -5
  11. data/TODO.md +6 -6
  12. data/bin/wu-clean-encoding +31 -0
  13. data/bin/wu-lign +2 -2
  14. data/bin/wu-local +69 -0
  15. data/bin/wu-server +70 -0
  16. data/examples/Gemfile +38 -0
  17. data/examples/README.md +9 -0
  18. data/examples/dataflow/apache_log_line.rb +64 -25
  19. data/examples/dataflow/fibonacci_series.rb +101 -0
  20. data/examples/dataflow/parse_apache_logs.rb +37 -7
  21. data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
  22. data/examples/dataflow/simple.rb +4 -4
  23. data/examples/geo.rb +4 -0
  24. data/examples/geo/geo_grids.numbers +0 -0
  25. data/examples/geo/geolocated.rb +331 -0
  26. data/examples/geo/quadtile.rb +69 -0
  27. data/examples/geo/spec/geolocated_spec.rb +247 -0
  28. data/examples/geo/tile_fetcher.rb +77 -0
  29. data/examples/graph/minimum_spanning_tree.rb +61 -61
  30. data/examples/jabberwocky.txt +36 -0
  31. data/examples/models/wikipedia.rb +20 -0
  32. data/examples/munging/Gemfile +8 -0
  33. data/examples/munging/airline_flights/airline.rb +57 -0
  34. data/examples/munging/airline_flights/airline_flights.rake +83 -0
  35. data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
  36. data/examples/munging/airline_flights/airport.rb +211 -0
  37. data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
  38. data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
  39. data/examples/munging/airline_flights/flight.rb +156 -0
  40. data/examples/munging/airline_flights/models.rb +4 -0
  41. data/examples/munging/airline_flights/parse.rb +26 -0
  42. data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
  43. data/examples/munging/airline_flights/route.rb +35 -0
  44. data/examples/munging/airline_flights/tasks.rake +83 -0
  45. data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
  46. data/examples/munging/airline_flights/topcities.rb +167 -0
  47. data/examples/munging/airports/40_wbans.txt +40 -0
  48. data/examples/munging/airports/filter_weather_reports.rb +37 -0
  49. data/examples/munging/airports/join.pig +31 -0
  50. data/examples/munging/airports/to_tsv.rb +33 -0
  51. data/examples/munging/airports/usa_wbans.pig +19 -0
  52. data/examples/munging/airports/usa_wbans.txt +2157 -0
  53. data/examples/munging/airports/wbans.pig +19 -0
  54. data/examples/munging/airports/wbans.txt +2310 -0
  55. data/examples/munging/geo/geo_json.rb +54 -0
  56. data/examples/munging/geo/geo_models.rb +69 -0
  57. data/examples/munging/geo/geonames_models.rb +78 -0
  58. data/examples/munging/geo/iso_codes.rb +172 -0
  59. data/examples/munging/geo/reconcile_countries.rb +124 -0
  60. data/examples/munging/geo/tasks.rake +71 -0
  61. data/examples/munging/rake_helper.rb +62 -0
  62. data/examples/munging/weather/.gitignore +1 -0
  63. data/examples/munging/weather/Gemfile +4 -0
  64. data/examples/munging/weather/Rakefile +28 -0
  65. data/examples/munging/weather/extract_ish.rb +13 -0
  66. data/examples/munging/weather/models/weather.rb +119 -0
  67. data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
  68. data/examples/munging/wikipedia/README.md +34 -0
  69. data/examples/munging/wikipedia/Rakefile +193 -0
  70. data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
  71. data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
  72. data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
  73. data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
  74. data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
  75. data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
  76. data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
  77. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
  78. data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
  79. data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
  80. data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
  81. data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
  82. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
  83. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
  84. data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
  85. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
  86. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
  87. data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
  88. data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
  89. data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
  90. data/examples/munging/wikipedia/pig_style_guide.md +25 -0
  91. data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
  92. data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
  93. data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
  94. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
  95. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
  96. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
  97. data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
  98. data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
  99. data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
  100. data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
  101. data/examples/munging/wikipedia/utils/namespaces.json +1 -0
  102. data/examples/rake_helper.rb +85 -0
  103. data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
  104. data/examples/server_logs/logline.rb +95 -0
  105. data/examples/server_logs/models.rb +66 -0
  106. data/examples/server_logs/page_counts.pig +48 -0
  107. data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
  108. data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
  109. data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
  110. data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
  111. data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
  112. data/examples/string_reverser.rb +26 -0
  113. data/examples/text/pig_latin.rb +2 -2
  114. data/examples/text/regional_flavor/README.md +14 -0
  115. data/examples/text/regional_flavor/article_wordbags.pig +39 -0
  116. data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
  117. data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
  118. data/examples/word_count/accumulator.rb +26 -0
  119. data/examples/word_count/tokenizer.rb +13 -0
  120. data/examples/word_count/word_count.rb +6 -0
  121. data/examples/workflow/cherry_pie.dot +97 -0
  122. data/examples/workflow/cherry_pie.png +0 -0
  123. data/examples/workflow/cherry_pie.rb +61 -26
  124. data/lib/hanuman.rb +34 -7
  125. data/lib/hanuman/graph.rb +55 -31
  126. data/lib/hanuman/graphvizzer.rb +199 -178
  127. data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
  128. data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
  129. data/lib/hanuman/link.rb +35 -0
  130. data/lib/hanuman/registry.rb +46 -0
  131. data/lib/hanuman/stage.rb +76 -32
  132. data/lib/wukong.rb +23 -24
  133. data/lib/wukong/boot.rb +87 -0
  134. data/lib/wukong/configuration.rb +8 -0
  135. data/lib/wukong/dataflow.rb +45 -78
  136. data/lib/wukong/driver.rb +99 -0
  137. data/lib/wukong/emitter.rb +22 -0
  138. data/lib/wukong/model/faker.rb +24 -24
  139. data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
  140. data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
  141. data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
  142. data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
  143. data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
  144. data/lib/wukong/processor.rb +60 -114
  145. data/lib/wukong/spec_helpers.rb +81 -0
  146. data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
  147. data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
  148. data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
  149. data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
  150. data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
  151. data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
  152. data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
  153. data/lib/wukong/version.rb +2 -1
  154. data/lib/wukong/widget/filters.rb +311 -0
  155. data/lib/wukong/widget/processors.rb +156 -0
  156. data/lib/wukong/widget/reducers.rb +7 -0
  157. data/lib/wukong/widget/reducers/accumulator.rb +73 -0
  158. data/lib/wukong/widget/reducers/bin.rb +318 -0
  159. data/lib/wukong/widget/reducers/count.rb +61 -0
  160. data/lib/wukong/widget/reducers/group.rb +85 -0
  161. data/lib/wukong/widget/reducers/group_concat.rb +70 -0
  162. data/lib/wukong/widget/reducers/moments.rb +72 -0
  163. data/lib/wukong/widget/reducers/sort.rb +130 -0
  164. data/lib/wukong/widget/serializers.rb +287 -0
  165. data/lib/wukong/widget/sink.rb +10 -52
  166. data/lib/wukong/widget/source.rb +7 -113
  167. data/lib/wukong/widget/utils.rb +46 -0
  168. data/lib/wukong/widgets.rb +6 -0
  169. data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
  170. data/spec/examples/dataflow/parsing_spec.rb +12 -11
  171. data/spec/examples/dataflow/simple_spec.rb +32 -6
  172. data/spec/examples/dataflow/telegram_spec.rb +36 -36
  173. data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
  174. data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
  175. data/spec/examples/munging/airline_flights_spec.rb +202 -0
  176. data/spec/examples/text/pig_latin_spec.rb +13 -16
  177. data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
  178. data/spec/hanuman/graph_spec.rb +27 -2
  179. data/spec/hanuman/hanuman_spec.rb +10 -0
  180. data/spec/hanuman/registry_spec.rb +123 -0
  181. data/spec/hanuman/stage_spec.rb +61 -7
  182. data/spec/spec_helper.rb +29 -19
  183. data/spec/support/hanuman_test_helpers.rb +14 -12
  184. data/spec/support/shared_context_for_reducers.rb +37 -0
  185. data/spec/support/shared_examples_for_builders.rb +101 -0
  186. data/spec/support/shared_examples_for_shortcuts.rb +57 -0
  187. data/spec/support/wukong_test_helpers.rb +37 -11
  188. data/spec/wukong/dataflow_spec.rb +77 -55
  189. data/spec/wukong/local_runner_spec.rb +24 -24
  190. data/spec/wukong/model/faker_spec.rb +132 -131
  191. data/spec/wukong/runner_spec.rb +8 -8
  192. data/spec/wukong/widget/filters_spec.rb +61 -0
  193. data/spec/wukong/widget/processors_spec.rb +126 -0
  194. data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
  195. data/spec/wukong/widget/reducers/count_spec.rb +11 -0
  196. data/spec/wukong/widget/reducers/group_spec.rb +20 -0
  197. data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
  198. data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
  199. data/spec/wukong/widget/serializers_spec.rb +92 -0
  200. data/spec/wukong/widget/sink_spec.rb +15 -15
  201. data/spec/wukong/widget/source_spec.rb +65 -41
  202. data/spec/wukong/wukong_spec.rb +10 -0
  203. data/wukong.gemspec +17 -10
  204. metadata +359 -335
  205. data/.document +0 -5
  206. data/VERSION +0 -1
  207. data/bin/hdp-bin +0 -44
  208. data/bin/hdp-bzip +0 -23
  209. data/bin/hdp-cat +0 -3
  210. data/bin/hdp-catd +0 -3
  211. data/bin/hdp-cp +0 -3
  212. data/bin/hdp-du +0 -86
  213. data/bin/hdp-get +0 -3
  214. data/bin/hdp-kill +0 -3
  215. data/bin/hdp-kill-task +0 -3
  216. data/bin/hdp-ls +0 -11
  217. data/bin/hdp-mkdir +0 -2
  218. data/bin/hdp-mkdirp +0 -12
  219. data/bin/hdp-mv +0 -3
  220. data/bin/hdp-parts_to_keys.rb +0 -77
  221. data/bin/hdp-ps +0 -3
  222. data/bin/hdp-put +0 -3
  223. data/bin/hdp-rm +0 -32
  224. data/bin/hdp-sort +0 -40
  225. data/bin/hdp-stream +0 -40
  226. data/bin/hdp-stream-flat +0 -22
  227. data/bin/hdp-stream2 +0 -39
  228. data/bin/hdp-sync +0 -17
  229. data/bin/hdp-wc +0 -67
  230. data/bin/wu-flow +0 -10
  231. data/bin/wu-map +0 -17
  232. data/bin/wu-red +0 -17
  233. data/bin/wukong +0 -17
  234. data/data/CREDITS.md +0 -355
  235. data/data/graph/airfares.tsv +0 -2174
  236. data/data/text/gift_of_the_magi.txt +0 -225
  237. data/data/text/jabberwocky.txt +0 -36
  238. data/data/text/rectification_of_names.txt +0 -33
  239. data/data/twitter/a_atsigns_b.tsv +0 -64
  240. data/data/twitter/a_follows_b.tsv +0 -53
  241. data/data/twitter/tweet.tsv +0 -167
  242. data/data/twitter/twitter_user.tsv +0 -55
  243. data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
  244. data/docpages/INSTALL.textile +0 -92
  245. data/docpages/LICENSE.textile +0 -107
  246. data/docpages/README-elastic_map_reduce.textile +0 -377
  247. data/docpages/README-performance.textile +0 -90
  248. data/docpages/README-wulign.textile +0 -65
  249. data/docpages/UsingWukong-part1-get_ready.textile +0 -17
  250. data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
  251. data/docpages/UsingWukong-part3-parsing.textile +0 -138
  252. data/docpages/_config.yml +0 -39
  253. data/docpages/avro/avro_notes.textile +0 -56
  254. data/docpages/avro/performance.textile +0 -36
  255. data/docpages/avro/tethering.textile +0 -19
  256. data/docpages/bigdata-tips.textile +0 -143
  257. data/docpages/code/api_response_example.txt +0 -20
  258. data/docpages/code/parser_skeleton.rb +0 -38
  259. data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
  260. data/docpages/favicon.ico +0 -0
  261. data/docpages/gem.css +0 -16
  262. data/docpages/hadoop-tips.textile +0 -83
  263. data/docpages/index.textile +0 -92
  264. data/docpages/intro.textile +0 -8
  265. data/docpages/moreinfo.textile +0 -174
  266. data/docpages/news.html +0 -24
  267. data/docpages/pig/PigLatinExpressionsList.txt +0 -122
  268. data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
  269. data/docpages/pig/commandline_params.txt +0 -26
  270. data/docpages/pig/cookbook.html +0 -481
  271. data/docpages/pig/images/hadoop-logo.jpg +0 -0
  272. data/docpages/pig/images/instruction_arrow.png +0 -0
  273. data/docpages/pig/images/pig-logo.gif +0 -0
  274. data/docpages/pig/piglatin_ref1.html +0 -1103
  275. data/docpages/pig/piglatin_ref2.html +0 -14340
  276. data/docpages/pig/setup.html +0 -505
  277. data/docpages/pig/skin/basic.css +0 -166
  278. data/docpages/pig/skin/breadcrumbs.js +0 -237
  279. data/docpages/pig/skin/fontsize.js +0 -166
  280. data/docpages/pig/skin/getBlank.js +0 -40
  281. data/docpages/pig/skin/getMenu.js +0 -45
  282. data/docpages/pig/skin/images/chapter.gif +0 -0
  283. data/docpages/pig/skin/images/chapter_open.gif +0 -0
  284. data/docpages/pig/skin/images/current.gif +0 -0
  285. data/docpages/pig/skin/images/external-link.gif +0 -0
  286. data/docpages/pig/skin/images/header_white_line.gif +0 -0
  287. data/docpages/pig/skin/images/page.gif +0 -0
  288. data/docpages/pig/skin/images/pdfdoc.gif +0 -0
  289. data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  290. data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  291. data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  292. data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  293. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  294. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  295. data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  296. data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  297. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  298. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  299. data/docpages/pig/skin/print.css +0 -54
  300. data/docpages/pig/skin/profile.css +0 -181
  301. data/docpages/pig/skin/screen.css +0 -587
  302. data/docpages/pig/tutorial.html +0 -1059
  303. data/docpages/pig/udf.html +0 -1509
  304. data/docpages/tutorial.textile +0 -283
  305. data/docpages/usage.textile +0 -195
  306. data/docpages/wutils.textile +0 -263
  307. data/examples/dataflow/complex.rb +0 -11
  308. data/examples/dataflow/donuts.rb +0 -13
  309. data/examples/tiny_count/jabberwocky_output.tsv +0 -92
  310. data/examples/word_count.rb +0 -48
  311. data/examples/workflow/fiddle.rb +0 -24
  312. data/lib/away/escapement.rb +0 -129
  313. data/lib/away/exe.rb +0 -11
  314. data/lib/away/experimental.rb +0 -5
  315. data/lib/away/from_file.rb +0 -52
  316. data/lib/away/job.rb +0 -56
  317. data/lib/away/job/rake_compat.rb +0 -17
  318. data/lib/away/registry.rb +0 -79
  319. data/lib/away/runner.rb +0 -276
  320. data/lib/away/runner/execute.rb +0 -121
  321. data/lib/away/script.rb +0 -161
  322. data/lib/away/script/hadoop_command.rb +0 -240
  323. data/lib/away/source/file_list_source.rb +0 -15
  324. data/lib/away/source/looper.rb +0 -18
  325. data/lib/away/task.rb +0 -219
  326. data/lib/hanuman/action.rb +0 -21
  327. data/lib/hanuman/chain.rb +0 -4
  328. data/lib/hanuman/graphviz.rb +0 -74
  329. data/lib/hanuman/resource.rb +0 -6
  330. data/lib/hanuman/slot.rb +0 -87
  331. data/lib/hanuman/slottable.rb +0 -220
  332. data/lib/wukong/bad_record.rb +0 -15
  333. data/lib/wukong/event.rb +0 -44
  334. data/lib/wukong/local_runner.rb +0 -55
  335. data/lib/wukong/mapred.rb +0 -3
  336. data/lib/wukong/universe.rb +0 -48
  337. data/lib/wukong/widget/filter.rb +0 -81
  338. data/lib/wukong/widget/gibberish.rb +0 -123
  339. data/lib/wukong/widget/monitor.rb +0 -26
  340. data/lib/wukong/widget/reducer.rb +0 -66
  341. data/lib/wukong/widget/stringifier.rb +0 -50
  342. data/lib/wukong/workflow.rb +0 -22
  343. data/lib/wukong/workflow/command.rb +0 -42
  344. data/old/config/emr-example.yaml +0 -48
  345. data/old/examples/README.txt +0 -17
  346. data/old/examples/contrib/jeans/README.markdown +0 -165
  347. data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
  348. data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
  349. data/old/examples/contrib/jeans/data/sizes +0 -3
  350. data/old/examples/contrib/jeans/normalize.rb +0 -20
  351. data/old/examples/contrib/jeans/sizes.rb +0 -55
  352. data/old/examples/corpus/bnc_word_freq.rb +0 -44
  353. data/old/examples/corpus/bucket_counter.rb +0 -47
  354. data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
  355. data/old/examples/corpus/sentence_bigrams.rb +0 -53
  356. data/old/examples/corpus/sentence_coocurrence.rb +0 -66
  357. data/old/examples/corpus/stopwords.rb +0 -138
  358. data/old/examples/corpus/words_to_bigrams.rb +0 -53
  359. data/old/examples/emr/README.textile +0 -110
  360. data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
  361. data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
  362. data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
  363. data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
  364. data/old/examples/network_graph/adjacency_list.rb +0 -74
  365. data/old/examples/network_graph/breadth_first_search.rb +0 -72
  366. data/old/examples/network_graph/gen_2paths.rb +0 -68
  367. data/old/examples/network_graph/gen_multi_edge.rb +0 -112
  368. data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
  369. data/old/examples/pagerank/README.textile +0 -6
  370. data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
  371. data/old/examples/pagerank/pagerank.rb +0 -72
  372. data/old/examples/pagerank/pagerank_initialize.rb +0 -42
  373. data/old/examples/pagerank/run_pagerank.sh +0 -21
  374. data/old/examples/sample_records.rb +0 -33
  375. data/old/examples/server_logs/apache_log_parser.rb +0 -15
  376. data/old/examples/server_logs/nook.rb +0 -48
  377. data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
  378. data/old/examples/server_logs/user_agent.rb +0 -40
  379. data/old/examples/simple_word_count.rb +0 -82
  380. data/old/examples/size.rb +0 -61
  381. data/old/examples/stats/avg_value_frequency.rb +0 -86
  382. data/old/examples/stats/binning_percentile_estimator.rb +0 -140
  383. data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
  384. data/old/examples/stats/rank_and_bin.rb +0 -173
  385. data/old/examples/stupidly_simple_filter.rb +0 -40
  386. data/old/examples/word_count.rb +0 -75
  387. data/old/graph/graphviz_builder.rb +0 -580
  388. data/old/graph_easy/Attributes.pm +0 -4181
  389. data/old/graph_easy/Graphviz.pm +0 -2232
  390. data/old/wukong.rb +0 -18
  391. data/old/wukong/and_pig.rb +0 -38
  392. data/old/wukong/bad_record.rb +0 -18
  393. data/old/wukong/datatypes.rb +0 -24
  394. data/old/wukong/datatypes/enum.rb +0 -127
  395. data/old/wukong/datatypes/fake_types.rb +0 -17
  396. data/old/wukong/decorator.rb +0 -28
  397. data/old/wukong/encoding/asciize.rb +0 -108
  398. data/old/wukong/extensions.rb +0 -16
  399. data/old/wukong/extensions/array.rb +0 -18
  400. data/old/wukong/extensions/blank.rb +0 -93
  401. data/old/wukong/extensions/class.rb +0 -189
  402. data/old/wukong/extensions/date_time.rb +0 -53
  403. data/old/wukong/extensions/emittable.rb +0 -69
  404. data/old/wukong/extensions/enumerable.rb +0 -79
  405. data/old/wukong/extensions/hash.rb +0 -167
  406. data/old/wukong/extensions/hash_keys.rb +0 -16
  407. data/old/wukong/extensions/hash_like.rb +0 -150
  408. data/old/wukong/extensions/hashlike_class.rb +0 -47
  409. data/old/wukong/extensions/module.rb +0 -2
  410. data/old/wukong/extensions/pathname.rb +0 -27
  411. data/old/wukong/extensions/string.rb +0 -65
  412. data/old/wukong/extensions/struct.rb +0 -17
  413. data/old/wukong/extensions/symbol.rb +0 -11
  414. data/old/wukong/filename_pattern.rb +0 -74
  415. data/old/wukong/helper.rb +0 -7
  416. data/old/wukong/helper/stopwords.rb +0 -195
  417. data/old/wukong/helper/tokenize.rb +0 -35
  418. data/old/wukong/logger.rb +0 -38
  419. data/old/wukong/periodic_monitor.rb +0 -72
  420. data/old/wukong/schema.rb +0 -269
  421. data/old/wukong/script.rb +0 -286
  422. data/old/wukong/script/avro_command.rb +0 -5
  423. data/old/wukong/script/cassandra_loader_script.rb +0 -40
  424. data/old/wukong/script/emr_command.rb +0 -168
  425. data/old/wukong/script/hadoop_command.rb +0 -237
  426. data/old/wukong/script/local_command.rb +0 -41
  427. data/old/wukong/store.rb +0 -10
  428. data/old/wukong/store/base.rb +0 -27
  429. data/old/wukong/store/cassandra.rb +0 -10
  430. data/old/wukong/store/cassandra/streaming.rb +0 -75
  431. data/old/wukong/store/cassandra/struct_loader.rb +0 -21
  432. data/old/wukong/store/cassandra_model.rb +0 -91
  433. data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
  434. data/old/wukong/store/chunked_flat_file_store.rb +0 -48
  435. data/old/wukong/store/conditional_store.rb +0 -57
  436. data/old/wukong/store/factory.rb +0 -8
  437. data/old/wukong/store/flat_file_store.rb +0 -89
  438. data/old/wukong/store/key_store.rb +0 -51
  439. data/old/wukong/store/null_store.rb +0 -15
  440. data/old/wukong/store/read_thru_store.rb +0 -22
  441. data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
  442. data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
  443. data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
  444. data/old/wukong/streamer.rb +0 -30
  445. data/old/wukong/streamer/accumulating_reducer.rb +0 -83
  446. data/old/wukong/streamer/base.rb +0 -126
  447. data/old/wukong/streamer/counting_reducer.rb +0 -25
  448. data/old/wukong/streamer/filter.rb +0 -20
  449. data/old/wukong/streamer/instance_streamer.rb +0 -15
  450. data/old/wukong/streamer/json_streamer.rb +0 -21
  451. data/old/wukong/streamer/line_streamer.rb +0 -12
  452. data/old/wukong/streamer/list_reducer.rb +0 -31
  453. data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
  454. data/old/wukong/streamer/record_streamer.rb +0 -14
  455. data/old/wukong/streamer/reducer.rb +0 -11
  456. data/old/wukong/streamer/set_reducer.rb +0 -14
  457. data/old/wukong/streamer/struct_streamer.rb +0 -48
  458. data/old/wukong/streamer/summing_reducer.rb +0 -29
  459. data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
  460. data/old/wukong/typed_struct.rb +0 -12
  461. data/spec/away/encoding_spec.rb +0 -32
  462. data/spec/away/exe_spec.rb +0 -20
  463. data/spec/away/flow_spec.rb +0 -82
  464. data/spec/away/graph_spec.rb +0 -6
  465. data/spec/away/job_spec.rb +0 -15
  466. data/spec/away/rake_compat_spec.rb +0 -9
  467. data/spec/away/script_spec.rb +0 -81
  468. data/spec/hanuman/graphviz_spec.rb +0 -29
  469. data/spec/hanuman/slot_spec.rb +0 -2
  470. data/spec/support/examples_helper.rb +0 -10
  471. data/spec/support/streamer_test_helpers.rb +0 -6
  472. data/spec/support/wukong_widget_helpers.rb +0 -66
  473. data/spec/wukong/processor_spec.rb +0 -109
  474. data/spec/wukong/widget/filter_spec.rb +0 -99
  475. data/spec/wukong/widget/stringifier_spec.rb +0 -51
  476. data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -1,140 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'rubygems'
3
- require 'wukong/script'
4
- require 'wukong/streamer/count_keys'
5
-
6
- #
7
- # Ch3ck out dis moist azz code bitches!!
8
- #
9
- #
10
-
11
- #
12
- # Do nothing more than bin users here, arbitrary and probably bad
13
- #
14
- class Mapper < Wukong::Streamer::RecordStreamer
15
- def process rank, followers
16
- followers = followers.to_i
17
- if followers > 100
18
- yield [9,rank]
19
- elsif followers > 75
20
- yield [8,rank]
21
- elsif followers > 50
22
- yield [7,rank]
23
- elsif followers > 25
24
- yield [6,rank]
25
- elsif followers > 15
26
- yield [5,rank]
27
- elsif followers > 10
28
- yield [4,rank]
29
- elsif followers > 5
30
- yield [3,rank]
31
- elsif followers > 4
32
- yield [2,rank]
33
- elsif followers > 1
34
- yield [1,rank]
35
- else
36
- yield [0,rank]
37
- end
38
- end
39
- end
40
-
41
-
42
- #
43
- # Calculate percentile rank for every pr value in a given follower bracket
44
- #
45
- class Reducer < Wukong::Streamer::AccumulatingReducer
46
- attr_accessor :count_bin
47
- def start! bin, rank
48
- self.count_bin ||= {}
49
- self.count_bin[bin] ||= {}
50
- end
51
-
52
- def accumulate bin, rank
53
- rank = (rank.to_f*10.0).round.to_f/10.0
54
- self.count_bin[bin][rank] ||= 0
55
- self.count_bin[bin][rank] += 1
56
- end
57
-
58
- def finalize
59
- count_bin[key] = generate_all_pairs(key).inject({}){|h,pair| h[pair.first] = pair.last; h}
60
- yield [key, count_bin[key].values.sort.join(",")]
61
- end
62
-
63
- #
64
- # Write the final table to disk as a ruby hash
65
- #
66
- def after_stream
67
- table = File.open("trstrank_table.rb", 'w')
68
- table << "TRSTRANK_TABLE = " << count_bin.inspect
69
- table.close
70
- end
71
-
72
- #
73
- # Return percentile of a given trstrank for a given follower bracket
74
- #
75
- def percentile bin, rank
76
- ((count_less_than(bin,rank) + 0.5*frequency_of(bin,rank))/ total_num(bin) )*100.0
77
- end
78
-
79
- #
80
- # Return the count of values less than rank
81
- #
82
- def count_less_than bin, rank
83
- count_bin[bin].keys.inject(0){|count,key| count += count_bin[bin][key] if key.to_f < rank; count}
84
- end
85
-
86
- #
87
- # Return the count of rank
88
- #
89
- def frequency_of bin, rank
90
- count_bin[bin].keys.inject(0){|count,key| count += count_bin[bin][key] if key.to_f == rank; count}
91
- end
92
-
93
- #
94
- # Return the total number in sample
95
- #
96
- def total_num bin
97
- count_bin[bin].values.inject(0){|count,v| count += v; count}
98
- end
99
-
100
- #
101
- # Generate a list of all pairs {trstrank => percentile}, interpolate when necessary
102
- #
103
- def generate_all_pairs bin
104
- h = {}
105
- count_bin[bin].keys.each do |rank|
106
- h[rank.to_f] = percentile(bin, rank.to_f)
107
- end
108
- h[0.0] ||= 0.0
109
- h[10.0] ||= 100.0
110
- arr = h.to_a.sort!{|x,y| x.first <=> y.first}
111
- list = arr.zip(arr[1..-1])
112
- big_list = []
113
- big_list << [0.0,0.0]
114
- list.each do |pairs|
115
- interpolate(pairs.first, pairs.last, 0.1).each{|pair| big_list << pair}
116
- end
117
- big_list.uniq.sort{|x,y| x.first <=> y.first}
118
- end
119
-
120
-
121
- #
122
- # Nothing to see here, move along
123
- #
124
- def interpolate pair1, pair2, dx
125
- return [pair1] if pair2.blank?
126
- m = (pair2.last - pair1.last)/(pair2.first - pair1.first) # slope
127
- b = pair2.last - m*pair2.first # y intercept
128
- num = ((pair2.first - pair1.first)/dx).abs.round # number of points to interpolate
129
- points = []
130
- num.times do |i|
131
- x = pair1.first + (i+1).to_f*dx
132
- y = m*x + b
133
- points << [x,y]
134
- end
135
- points # return an array of pairs
136
- end
137
-
138
- end
139
-
140
- Wukong::Script.new(Mapper,Reducer).run
@@ -1,3 +0,0 @@
1
- 1 15 30 25
2
- 2 10 10 20
3
- 3 50 30 30
@@ -1,173 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'rubygems'
3
- require 'wukong/script'
4
- require 'wukong/streamer/rank_and_bin_reducer'
5
-
6
- #
7
- # This example uses the classes from http://github.com/mrflip/twitter_friends
8
- # (That's sloppy, and I apologize. I'm building this script for that, but it
9
- # seems broadly useful and I'm not maintaining two copies. Once this script is
10
- # more worky we'll make it standalone. Anyway you should get the picture.)
11
- #
12
- $: << File.dirname(__FILE__)+'/../../projects/twitter_friends/lib'
13
- require 'twitter_friends';
14
- require 'twitter_friends/struct_model' ; include TwitterFriends::StructModel
15
-
16
-
17
- #
18
- # attrs to bin
19
- #
20
- BINNABLE_ATTRS = {
21
- :twitter_user => [
22
- [:followers_count, :fo ],
23
- [:friends_count, :fr ],
24
- [:statuses_count, :st ],
25
- [:favourites_count, :fv ],
26
- [:created_at, :crat ]
27
- ]
28
-
29
- }
30
- RESOURCE_ALIASES = {
31
- :twitter_user => :u,
32
- :user_metrics => :um,
33
- }
34
- #
35
- # KLUDGE This is not DRY at all but let's get it working first
36
- #
37
- BinUserMetrics = TypedStruct.new(
38
- [:id, Integer],
39
- *BINNABLE_ATTRS[:user_metrics].map{|attr, attr_abbr| [attr_abbr, Integer] }
40
- )
41
- BINNED_RESOURCE_ALIASES = {
42
- :u => BinTwitterUser,
43
- }
44
-
45
- module RankAndBinAttrs
46
- class ExplodeResourceMapper < Wukong::Streamer::StructStreamer
47
- def get_and_format_attr thing, attr
48
- val = thing.send(attr)
49
- case thing.members_types[attr].to_s.to_sym
50
- when :Integer then "%010d" % val.to_i
51
- when :Float then "%020.7f" % val.to_f
52
- when :Bignum then "%020d" % val.to_i
53
- else
54
- raise [val, thing.members_types[attr].to_s.to_sym].inspect
55
- end
56
- end
57
-
58
- #
59
- # The data expansion of this mapper is large enough that it makes sense to
60
- # be a little responsible with what we emit. We'll use the RESOURCE_ALIASES
61
- # and BINNABLE_ATTRS hashes, above, to dump a more parsimonious
62
- # representation.
63
- #
64
- def process thing, *args, &block
65
- attr_abbrs = BINNABLE_ATTRS[thing.class.resource_name]
66
- return unless attr_abbrs
67
- attr_abbrs.each do |attr, abbr|
68
- yield [
69
- RESOURCE_ALIASES[thing.class.resource_name],
70
- abbr,
71
- get_and_format_attr(thing, attr),
72
- thing.id.to_i
73
- ]
74
- end
75
- end
76
- end
77
-
78
- class BinAttrReducer < Wukong::Streamer::RankAndBinReducer
79
- attr_accessor :last_rsrc_attr
80
- #
81
- # Note that we might get several different resources at the same reducer
82
- #
83
- def get_key rsrc, attr, val, *args
84
- if [rsrc, attr] != self.last_rsrc_attr
85
- # Note: since each partition has the same cardinality, we don't need to
86
- # fiddle around with the bin_size, etc -- just reset the order
87
- # parameters' state.
88
- reset_order_params!
89
- self.last_rsrc_attr = [rsrc, attr]
90
- end
91
- val
92
- end
93
-
94
- #
95
- # Note well -- we are rearranging the field order to
96
- #
97
- # resource_abbr id attr_abbr bin
98
- #
99
- # for proper sorting to the re-assembler
100
- #
101
- def emit record
102
- rsrc, attr, val, id, numbering, rank, bin = record
103
- super [rsrc, id, attr, bin]
104
- end
105
- end
106
-
107
- class ReassembleObjectReducer < Wukong::Streamer::AccumulatingReducer
108
- attr_accessor :thing
109
- def klass_from_abbr rsrc_abbr
110
- BINNED_RESOURCE_ALIASES[rsrc_abbr.to_sym]
111
- end
112
- def get_key rsrc_abbr, id, *args
113
- [rsrc_abbr, id.to_i]
114
- end
115
-
116
- def start! rsrc_abbr, id, *args
117
- klass = klass_from_abbr(rsrc_abbr)
118
- self.thing = klass.new id.to_i
119
- end
120
-
121
- def accumulate rsrc, id, attr, bin
122
- thing.send("#{attr}=", bin)
123
- end
124
-
125
- def finalize
126
- yield thing
127
- end
128
- end
129
-
130
- #
131
- # Two-phase script
132
- #
133
- # FIXME -- We need a runner class to manage this.
134
- #
135
- class Script < Wukong::Script
136
- attr_accessor :phase
137
- # KLUDGE !!
138
- def initialize
139
- case
140
- when ARGV.detect{|arg| arg =~ /--phase=1/}
141
- # Phase 1 -- Steal underpants. Also, disassemble each object, and find
142
- # the bin for each binnable attribute's value
143
- self.phase = 1
144
- self.mapper_klass, self.reducer_klass = [ExplodeResourceMapper, BinAttrReducer]
145
- when ARGV.detect{|arg| arg =~ /--phase=2/}
146
- # Phase 2 -- ????
147
- raise "Phase 2 : ????"
148
- when ARGV.detect{|arg| arg =~ /--phase=3/}
149
- # Phase 3 -- profit. In this case, put records back together.
150
- self.phase = 3
151
- self.mapper_klass, self.reducer_klass = [nil, ReassembleObjectReducer]
152
- else
153
- raise "Please run me with a --phase= option"
154
- end
155
- super mapper_klass, reducer_klass
156
- end
157
-
158
- def default_options
159
- extra_options =
160
- case self.phase
161
- # partition on [rsrc, attr]; sort on [rsrc, attr, val]
162
- when 1 then { :sort_fields => 3, :partition_fields => 2 }
163
- # sort on [rsrc, id]
164
- when 3 then { :sort_fields => 2 }
165
- else { }
166
- end
167
- super.merge extra_options
168
- end
169
- end
170
-
171
- # execute script
172
- Script.new.run
173
- end
@@ -1,40 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'rubygems'
3
- require 'wukong/script'
4
-
5
- # Run as (local mode)
6
- #
7
- # ./examples/stupidly_simple_filter.rb --run=local input.tsv output.tsv
8
- #
9
- # for hadoop mode,
10
- #
11
- # ./examples/stupidly_simple_filter.rb --run=hadoop input.tsv output.tsv
12
- #
13
- # For debugging, run
14
- #
15
- # cat input.tsv | ./examples/stupidly_simple_filter.rb --map input.tsv | more
16
- #
17
-
18
- class Mapper < LineStreamer
19
- include Filter
20
- MATCHER = %r{(ford|mercury|saab|mazda|isuzu)}
21
-
22
- #
23
- # A very simple mapper -- looks for a regex match in one field,
24
- # and emits the whole record if the field matches
25
- #
26
- #
27
- # Given a series of records like:
28
- #
29
- # tweet 123456789 20100102030405 @frank: I'm having a bacon sandwich
30
- # tweet 123456789 20100102030405 @jerry, I'm having your baby
31
- #
32
- # emits only the lines matching that regex
33
- #
34
- def emit? line
35
- MATCHER.match line
36
- end
37
- end
38
-
39
- # Execute the script
40
- Wukong.run(Mapper)
@@ -1,75 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'rubygems'
3
- require 'wukong/script'
4
-
5
- module WordCount
6
- class Mapper < Wukong::Streamer::LineStreamer
7
- #
8
- # Split a string into its constituent words.
9
- #
10
- # This is pretty simpleminded:
11
- # * downcase the word
12
- # * Split at any non-alphanumeric boundary, including '_'
13
- # * However, preserve the special cases of 's, 'd or 't at the end of a
14
- # word.
15
- #
16
- # tokenize("Ability is a poor man's wealth #johnwoodenquote")
17
- # # => ["ability", "is", "a", "poor", "man's", "wealth", "johnwoodenquote"]
18
- #
19
- def tokenize str
20
- return [] if str.blank?
21
- str = str.downcase;
22
- # kill off all punctuation except [stuff]'s or [stuff]'t
23
- # this includes hyphens (words are split)
24
- str = str.
25
- gsub(/[^a-zA-Z0-9\']+/, ' ').
26
- gsub(/(\w)\'([stdm]|re|ve|ll)\b/, '\1!\2').gsub(/\'/, ' ').gsub(/!/, "'")
27
- # Busticate at whitespace
28
- words = str.split(/\s+/)
29
- words.reject!{|w| w.length < 3 }
30
- words
31
- end
32
-
33
- #
34
- # Emit each word in each line.
35
- #
36
- def process line
37
- tokenize(line).each{|word| yield [word, 1] }
38
- end
39
- end
40
-
41
- #
42
- # You can stack up all the values in a list then sum them at once.
43
- #
44
- # This isn't good style, as it means the whole list is held in memory
45
- #
46
- class Reducer1 < Wukong::Streamer::ListReducer
47
- def finalize
48
- yield [ values.map(&:last).map(&:to_i).inject(0){|x,tot| x+tot }, key ]
49
- end
50
- end
51
-
52
- #
53
- # A bit kinder to your memory manager: accumulate the sum record-by-record:
54
- #
55
- class Reducer2 < Wukong::Streamer::AccumulatingReducer
56
- def start!(*args) @key_count = 0 end
57
- def accumulate(*args) @key_count += 1 end
58
- def finalize
59
- yield [ @key_count, key ]
60
- end
61
- end
62
-
63
- #
64
- # ... easiest of all, though: this is common enough that it's already included
65
- #
66
- require 'wukong/streamer/count_keys'
67
- class Reducer3 < Wukong::Streamer::CountKeys
68
- end
69
- end
70
-
71
- # Execute the script
72
- Wukong.run(
73
- WordCount::Mapper,
74
- WordCount::Reducer2
75
- )
@@ -1,580 +0,0 @@
1
- #!/usr/local/bin/ruby -w
2
-
3
- require "enumerator"
4
-
5
- ##
6
- # Graph models directed graphs and subgraphs and outputs in graphviz's
7
- # dot format.
8
-
9
- module Hanuman
10
-
11
- class GraphvizBuilder
12
- VERSION = "2.5.0" # :nodoc:
13
-
14
- LIGHT_COLORS = %w(gray lightblue lightcyan lightgray lightpink
15
- lightslategray lightsteelblue white)
16
-
17
- # WTF -- can't be %w() because of a bug in rcov
18
- BOLD_COLORS = [:black, :brown, :mediumblue, :blueviolet,
19
- :orange, :magenta, :darkgreen, :maroon,
20
- :violetred, :purple, :greenyellow, :deeppink,
21
- :midnightblue, :firebrick, :darkturquoise,
22
- :mediumspringgreen, :chartreuse, :navy,
23
- :lightseagreen, :chocolate, :lawngreen, :green,
24
- :indigo, :darkgoldenrod, :darkviolet, :red,
25
- :springgreen, :saddlebrown, :mediumvioletred,
26
- :goldenrod, :tomato, :cyan, :forestgreen,
27
- :darkorchid, :crimson, :coral, :deepskyblue,
28
- :seagreen, :peru, :turquoise, :orangered,
29
- :dodgerblue, :sienna, :limegreen, :royalblue,
30
- :darkorange, :blue]
31
-
32
- ##
33
- # Defines the brewer color schemes and the maximum number of colors
34
- # in each set.
35
-
36
- COLOR_SCHEME_MAX = {
37
- :accent => 8, :blues => 9, :brbg => 11, :bugn => 9,
38
- :dark2 => 8, :gnbu => 9, :greens => 9, :greys => 9,
39
- :oranges => 9, :orrd => 9, :paired => 12, :pastel1 => 9,
40
- :pastel2 => 8, :piyg => 11, :prgn => 11, :pubu => 9,
41
- :pubugn => 9, :puor => 11, :purd => 9, :purples => 9,
42
- :rdbu => 11, :rdgy => 11, :rdylbu => 11, :rdylgn => 11,
43
- :reds => 9, :set1 => 9, :set2 => 8, :set3 => 12,
44
- :spectral => 11, :ylgn => 9, :ylgnbu => 9, :ylorbr => 9,
45
- :ylorrd => 9
46
- }
47
-
48
- SHAPES = %w[
49
- Mcircle Mdiamond Msquare box box3d circle component
50
- diamond doublecircle doubleoctagon egg ellipse folder
51
- hexagon house invhouse invtrapezium invtriangle none
52
- note octagon parallelogram pentagon plaintext point
53
- polygon rect rectangle septagon square tab trapezium
54
- triangle tripleoctagon
55
- ].map(&:to_sym)
56
-
57
- STYLES = %w(dashed dotted solid invis bold filled diagonals rounded).map(&:to_sym)
58
-
59
- ARROW_RE = /(?:o?[lr]?(?:box|crow|diamond|dot|inv|none|normal|tee|vee)){1,4}/
60
-
61
- ARROWS = %w(box crow diamond dot inv none normal tee vee).map(&:to_sym)
62
-
63
- STYLES.each do |name|
64
- define_method(name) { style name }
65
- end
66
-
67
- (BOLD_COLORS + LIGHT_COLORS).each do |name|
68
- define_method(name) { color name }
69
- end
70
-
71
- SHAPES.each do |name|
72
- method_name = name.downcase.to_s.sub(/none/, 'shape_none')
73
- define_method(method_name) { shape name }
74
- end
75
-
76
- ARROWS.each do |name|
77
- method_name = {
78
- :none => "none_arrow",
79
- :box => "box_arrow",
80
- :diamond => "diamond_arrow",
81
- }[name] || name
82
-
83
- define_method(method_name) { arrowhead name }
84
- end
85
-
86
- ENGINES = %w[ circo dot fdp neato osage sfdp twopi ].map(&:to_sym)
87
-
88
- ##
89
- # A parent graph, if any. Only used for subgraphs.
90
-
91
- attr_accessor :graph
92
-
93
- ##
94
- # The name of the graph. Optional for graphs and subgraphs. Prefix
95
- # the name of a subgraph with "cluster" for subgraph that is boxed.
96
-
97
- attr_accessor :name
98
-
99
- ##
100
- # Global attributes for edges in this graph.
101
-
102
- attr_reader :edge_attribs
103
-
104
- ##
105
- # The hash of hashes of edges in this graph. Use #[] or #node to create edges.
106
-
107
- attr_reader :edges
108
-
109
- ##
110
- # Global attributes for this graph.
111
-
112
- attr_reader :graph_attribs
113
-
114
- ##
115
- # Global attributes for nodes in this graph.
116
-
117
- attr_reader :node_attribs
118
-
119
- ##
120
- # The hash of nodes in this graph. Use #[] or #node to create nodes.
121
-
122
- attr_reader :nodes
123
-
124
- ##
125
- # An array of subgraphs.
126
-
127
- attr_reader :subgraphs
128
-
129
- ##
130
- # Creates a new graph object. Optional name and parent graph are
131
- # available. Also takes an optional block for DSL-like use.
132
-
133
- def initialize name = nil, graph = nil, &block
134
- @name = name.to_sym
135
- @graph = graph
136
- graph << self if graph
137
- @nodes = Hash.new { |h,k| h[k] = Node.new self, k }
138
- @edges = Hash.new { |h,k|
139
- h[k] = Hash.new { |h2, k2| h2[k2] = Edge.new self, self[k], self[k2] }
140
- }
141
- @graph_attribs = []
142
- @node_attribs = []
143
- @edge_attribs = []
144
- @subgraphs = []
145
-
146
- engine(:dot)
147
-
148
- self.scheme = graph.scheme if graph
149
- node_attribs << scheme if scheme
150
- configurate(&block) if block
151
- end
152
-
153
- def depth
154
- graph.nil? ? 0 : graph.depth + 1
155
- end
156
-
157
- def configurate(&block)
158
- (block.arity == 0) ? instance_eval(&block) : block.call(self)
159
- self
160
- end
161
-
162
- def engine(engine_name=nil)
163
- return @engine unless engine_name
164
- raise ArgumentError, "Don't have engine #{engine_name} listed -- should be one of #{ENGINES}" unless ENGINES.include?(engine_name.to_sym)
165
- @engine = engine_name
166
- end
167
-
168
- ##
169
- # Push a subgraph into the current graph. Sets the subgraph's graph to self.
170
-
171
- def << subgraph
172
- subgraphs << subgraph
173
- subgraph.graph = self
174
- end
175
-
176
- ##
177
- # Access a node by name
178
-
179
- def [] name
180
- nodes[name]
181
- end
182
-
183
- def arrowhead shape
184
- raise ArgumentError, "Bad arrow shape: #{shape}" unless shape =~ ARROW_RE
185
- Attribute.new "arrowhead = #{shape}"
186
- end
187
-
188
- def arrowtail shape
189
- raise ArgumentError, "Bad arrow shape: #{shape}" unless shape =~ ARROW_RE
190
- Attribute.new "arrowtail = #{shape}"
191
- end
192
-
193
- def arrowsize size
194
- Attribute.new "arrowsize = #{size}"
195
- end
196
-
197
- ##
198
- # A convenience method to set the global node attributes to use boxes.
199
-
200
- def boxes
201
- node_attribs << shape(:box)
202
- end
203
-
204
- ##
205
- # Shortcut method to create a new color Attribute instance.
206
-
207
- def color color
208
- Attribute.new "color = #{color}"
209
- end
210
-
211
- ##
212
- # Shortcut method to create a new colorscheme Attribute instance. If
213
- # passed +n+, +name+ must match one of the brewer color scheme names
214
- # and it will generate accessors for each fillcolor as well as push
215
- # the colorscheme onto the node_attribs.
216
-
217
- attr_accessor :scheme
218
-
219
- def colorscheme name, n = nil
220
- self.scheme = Attribute.new "colorscheme = #{name}#{n}"
221
- max = COLOR_SCHEME_MAX[name.to_sym]
222
-
223
- node_attribs << scheme if max
224
-
225
- scheme
226
- end
227
-
228
- (1..COLOR_SCHEME_MAX.values.max).map { |m|
229
- define_method "c#{m}" do
230
- GraphvizBuilder::Attribute.new("fillcolor = #{m}")
231
- end
232
- }
233
-
234
- ##
235
- # Define one or more edges.
236
- #
237
- # edge :a, :b, :c, ...
238
- #
239
- # is equivalent to:
240
- #
241
- # edge :a, :b
242
- # edge :b, :c
243
- # ...
244
-
245
- def edge(*names)
246
- last = nil
247
- names.each_cons(2) do |from, to|
248
- last = self[from][to]
249
- end
250
- last
251
- end
252
-
253
- ##
254
- # Creates a new Graph whose edges point the other direction.
255
-
256
- def invert(new_name=nil)
257
- result = self.class.new(new_name || "#{name}_inverted")
258
- edges.each do |from, h|
259
- h.each do |to, edge|
260
- result[to][from]
261
- end
262
- end
263
- result
264
- end
265
-
266
- ##
267
- # Shortcut method to create a new fillcolor Attribute instance.
268
-
269
- def fillcolor n
270
- Attribute.new "fillcolor = #{n}"
271
- end
272
-
273
- ##
274
- # Shortcut method to create a new font Attribute instance. You can
275
- # pass in both the name and an optional font size.
276
-
277
- def font name
278
- Attribute.new "fontname = #{name.inspect}"
279
- end
280
-
281
- def fontsize size
282
- Attribute.new "fontsize = #{size}"
283
- end
284
-
285
- ##
286
- # Shortcut method to set the graph's label. Usually used with subgraphs.
287
-
288
- def label name
289
- graph_attribs << %Q{label = "#{name.to_s.gsub(/\n/, '\n')}"} # ""
290
- end
291
-
292
- ##
293
- # Access a node by name, supplying an optional label
294
-
295
- def node name, label = nil
296
- n = nodes[name]
297
- n.label label if label
298
- n
299
- end
300
-
301
- ##
302
- # Shortcut method to specify the orientation of the graph. Defaults
303
- # to the graphviz default "TB".
304
-
305
- def orient dir = :TB
306
- graph_attribs << "rankdir = #{dir}"
307
- end
308
-
309
- ##
310
- # Shortcut method to specify the orientation of the graph. Defaults to :LR.
311
-
312
- def rotate dir = :LR
313
- orient dir
314
- end
315
-
316
- ##
317
- # Saves out both a dot file to path and an image for the specified type.
318
- # Specify type as nil to skip exporting an image.
319
-
320
- def save(path, type=nil)
321
- File.open "#{path}.dot", "w" do |f|
322
- f.puts self.to_s
323
- end
324
- system "#{engine} -T#{type} #{path}.dot > #{path}.#{type}" if type
325
- end
326
-
327
- ##
328
- # Shortcut method to create a new shape Attribute instance.
329
-
330
- def shape shape
331
- Attribute.new "shape = #{shape}"
332
- end
333
-
334
- ##
335
- # Shortcut method to create a new style Attribute instance.
336
-
337
- def style name
338
- Attribute.new "style = #{name}"
339
- end
340
-
341
- ##
342
- # Shortcut method to create a subgraph in the current graph. Use
343
- # with the top-level +digraph+ method in block form for a graph DSL.
344
-
345
- def subgraph name = nil, &block
346
- GraphvizBuilder.new name, self, &block
347
- end
348
-
349
- ##
350
- # Shortcut method to create a clustered subgraph in the current
351
- # graph. Use with the top-level +digraph+ method in block form for a
352
- # graph DSL.
353
-
354
- def cluster name, &block
355
- subgraph "cluster_#{name}", &block
356
- end
357
-
358
- ##
359
- # Outputs a graphviz graph.
360
-
361
- def to_s
362
- result = []
363
-
364
- type = graph ? "subgraph " : "digraph "
365
- type << "\"#{name}\"" if name and !name.empty?
366
- result << "#{type} {"
367
-
368
- graph_attribs.each do |line|
369
- result << " #{line};"
370
- end
371
-
372
- unless node_attribs.empty? then
373
- result << " node [ #{node_attribs.join(", ")} ];"
374
- end
375
-
376
- unless edge_attribs.empty? then
377
- result << " edge [ #{edge_attribs.join(", ")} ];"
378
- end
379
-
380
- subgraphs.each do |line|
381
- result << " #{line.to_s.rstrip};"
382
- end
383
-
384
- nodes.each do |name, node|
385
- result << " #{node.to_s.rstrip};" if graph or node.attributes? or node.orphan?
386
- end
387
-
388
- edges.each do |from, deps|
389
- deps.each do |to, edge|
390
- result << " #{edge.to_s.rstrip};"
391
- end
392
- end
393
-
394
- result << "}"
395
- result.join "\n#{" "*self.depth}"
396
- end
397
-
398
- ##
399
- # An attribute for a graph, node, or edge. Really just a composable
400
- # string (via #+) with a convenience method #<< that allows you to
401
- # "paint" nodes and edges with this attribute.
402
-
403
- class Attribute < Struct.new :attr
404
- ##
405
- # "Paint" graphs, nodes, and edges with this attribute.
406
- #
407
- # red << node1 << node2 << node3
408
- #
409
- # is the same as:
410
- #
411
- # node1.attributes << red
412
- # node2.attributes << red
413
- # node3.attributes << red
414
-
415
- def << thing
416
- thing.attributes << self
417
- thing.attributes.uniq!
418
- self
419
- end
420
-
421
- ##
422
- # Returns the attribute in string form.
423
-
424
- alias :to_s :attr
425
-
426
- ##
427
- # Compose a new attribute from two existing attributes:
428
- #
429
- # bad_nodes = red + filled + diamond
430
-
431
- def + style
432
- c = CompoundAttribute.new
433
- c.push self
434
- c.push style
435
- c
436
- end
437
- end
438
-
439
- class CompoundAttribute < Attribute
440
- def initialize attr = []
441
- super
442
- end
443
-
444
- def push attrib
445
- attr.push attrib
446
- end
447
-
448
- def << thing
449
- attr.each do |subattr|
450
- subattr << thing # allows for recursive compound attributes
451
- end
452
- self
453
- end
454
-
455
- def to_s
456
- attr.join ", "
457
- end
458
- end
459
-
460
- class Thingy < Struct.new :graph, :attributes
461
- def initialize graph
462
- super graph, []
463
- end
464
-
465
- def quote(str)
466
- %Q{"#{str}"}
467
- end
468
-
469
- def pad_with_attributes(text)
470
- width = 40 - (2 * graph.depth)
471
- if self.attributes? then
472
- "%-#{width}s [ %s ]" % [text, attributes.join(',')]
473
- else
474
- text
475
- end
476
- end
477
-
478
- def initialize_copy other # :nodoc:
479
- super
480
- self.attributes = other.attributes.dup
481
- end
482
-
483
- ##
484
- # Shortcut method to set the label attribute.
485
-
486
- def label name
487
- attributes.reject! { |s| s =~ /^label =/ }
488
- attributes << "label = \"#{name.to_s.gsub(/\n/, '\n')}\""
489
- self
490
- end
491
-
492
- ##
493
- # Does this thing have attributes?
494
-
495
- def attributes?
496
- not self.attributes.empty?
497
- end
498
- end
499
-
500
- ##
501
- # An edge in a graph.
502
-
503
- class Edge < Thingy
504
-
505
- attr_accessor :from, :to, :from_slot, :to_slot
506
-
507
- ##
508
- # Create a new edge in +graph+ from +from+ to +to+.
509
-
510
- def initialize graph, from, to, from_slot=nil, to_slot=nil
511
- super graph
512
- self.from = from
513
- self.to = to
514
- self.from_slot = from_slot
515
- self.to_slot = to_slot
516
- end
517
-
518
- ##
519
- # Returns the edge in dot syntax.
520
-
521
- def to_s
522
- from_name = quote(from.name)
523
- to_name = quote(to.name)
524
- fromto = "%-18s -> %s" % [from_name, to_name]
525
- pad_with_attributes(fromto)
526
- end
527
- end
528
-
529
- ##
530
- # Nodes in the graph.
531
-
532
- class Node < Thingy
533
-
534
- attr_accessor :name
535
-
536
- def connected?
537
- edges = graph.edges
538
-
539
- edges.include?(name) or edges.any? { |from, deps| deps.include? name }
540
- end
541
-
542
- def orphan?
543
- not connected?
544
- end
545
-
546
- ##
547
- # Create a new Node. Takes a parent graph and a name.
548
-
549
- def initialize graph, name
550
- super graph
551
- self.name = name
552
- end
553
-
554
- ##
555
- # Create a new node with +name+ and an edge between them pointing
556
- # from self to the new node.
557
-
558
- def >> name
559
- self[name] # creates node and edge
560
- self
561
- end
562
-
563
- alias :"<<" :">>"
564
-
565
- ##
566
- # Returns the edge between self and +dep_name+.
567
-
568
- def [] dep_name
569
- graph.edges[name][dep_name]
570
- end
571
-
572
- ##
573
- # Returns the node in dot syntax.
574
-
575
- def to_s
576
- pad_with_attributes(quote(name))
577
- end
578
- end
579
- end
580
- end