wukong 3.0.0.pre → 3.0.0.pre2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (476) hide show
  1. data/.gitignore +46 -33
  2. data/.gitmodules +3 -0
  3. data/.rspec +1 -1
  4. data/.travis.yml +8 -1
  5. data/.yardopts +0 -13
  6. data/Guardfile +4 -6
  7. data/{LICENSE.textile → LICENSE.md} +43 -55
  8. data/README-old.md +422 -0
  9. data/README.md +279 -418
  10. data/Rakefile +21 -5
  11. data/TODO.md +6 -6
  12. data/bin/wu-clean-encoding +31 -0
  13. data/bin/wu-lign +2 -2
  14. data/bin/wu-local +69 -0
  15. data/bin/wu-server +70 -0
  16. data/examples/Gemfile +38 -0
  17. data/examples/README.md +9 -0
  18. data/examples/dataflow/apache_log_line.rb +64 -25
  19. data/examples/dataflow/fibonacci_series.rb +101 -0
  20. data/examples/dataflow/parse_apache_logs.rb +37 -7
  21. data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
  22. data/examples/dataflow/simple.rb +4 -4
  23. data/examples/geo.rb +4 -0
  24. data/examples/geo/geo_grids.numbers +0 -0
  25. data/examples/geo/geolocated.rb +331 -0
  26. data/examples/geo/quadtile.rb +69 -0
  27. data/examples/geo/spec/geolocated_spec.rb +247 -0
  28. data/examples/geo/tile_fetcher.rb +77 -0
  29. data/examples/graph/minimum_spanning_tree.rb +61 -61
  30. data/examples/jabberwocky.txt +36 -0
  31. data/examples/models/wikipedia.rb +20 -0
  32. data/examples/munging/Gemfile +8 -0
  33. data/examples/munging/airline_flights/airline.rb +57 -0
  34. data/examples/munging/airline_flights/airline_flights.rake +83 -0
  35. data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
  36. data/examples/munging/airline_flights/airport.rb +211 -0
  37. data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
  38. data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
  39. data/examples/munging/airline_flights/flight.rb +156 -0
  40. data/examples/munging/airline_flights/models.rb +4 -0
  41. data/examples/munging/airline_flights/parse.rb +26 -0
  42. data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
  43. data/examples/munging/airline_flights/route.rb +35 -0
  44. data/examples/munging/airline_flights/tasks.rake +83 -0
  45. data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
  46. data/examples/munging/airline_flights/topcities.rb +167 -0
  47. data/examples/munging/airports/40_wbans.txt +40 -0
  48. data/examples/munging/airports/filter_weather_reports.rb +37 -0
  49. data/examples/munging/airports/join.pig +31 -0
  50. data/examples/munging/airports/to_tsv.rb +33 -0
  51. data/examples/munging/airports/usa_wbans.pig +19 -0
  52. data/examples/munging/airports/usa_wbans.txt +2157 -0
  53. data/examples/munging/airports/wbans.pig +19 -0
  54. data/examples/munging/airports/wbans.txt +2310 -0
  55. data/examples/munging/geo/geo_json.rb +54 -0
  56. data/examples/munging/geo/geo_models.rb +69 -0
  57. data/examples/munging/geo/geonames_models.rb +78 -0
  58. data/examples/munging/geo/iso_codes.rb +172 -0
  59. data/examples/munging/geo/reconcile_countries.rb +124 -0
  60. data/examples/munging/geo/tasks.rake +71 -0
  61. data/examples/munging/rake_helper.rb +62 -0
  62. data/examples/munging/weather/.gitignore +1 -0
  63. data/examples/munging/weather/Gemfile +4 -0
  64. data/examples/munging/weather/Rakefile +28 -0
  65. data/examples/munging/weather/extract_ish.rb +13 -0
  66. data/examples/munging/weather/models/weather.rb +119 -0
  67. data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
  68. data/examples/munging/wikipedia/README.md +34 -0
  69. data/examples/munging/wikipedia/Rakefile +193 -0
  70. data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
  71. data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
  72. data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
  73. data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
  74. data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
  75. data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
  76. data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
  77. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
  78. data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
  79. data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
  80. data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
  81. data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
  82. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
  83. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
  84. data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
  85. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
  86. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
  87. data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
  88. data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
  89. data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
  90. data/examples/munging/wikipedia/pig_style_guide.md +25 -0
  91. data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
  92. data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
  93. data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
  94. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
  95. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
  96. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
  97. data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
  98. data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
  99. data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
  100. data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
  101. data/examples/munging/wikipedia/utils/namespaces.json +1 -0
  102. data/examples/rake_helper.rb +85 -0
  103. data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
  104. data/examples/server_logs/logline.rb +95 -0
  105. data/examples/server_logs/models.rb +66 -0
  106. data/examples/server_logs/page_counts.pig +48 -0
  107. data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
  108. data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
  109. data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
  110. data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
  111. data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
  112. data/examples/string_reverser.rb +26 -0
  113. data/examples/text/pig_latin.rb +2 -2
  114. data/examples/text/regional_flavor/README.md +14 -0
  115. data/examples/text/regional_flavor/article_wordbags.pig +39 -0
  116. data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
  117. data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
  118. data/examples/word_count/accumulator.rb +26 -0
  119. data/examples/word_count/tokenizer.rb +13 -0
  120. data/examples/word_count/word_count.rb +6 -0
  121. data/examples/workflow/cherry_pie.dot +97 -0
  122. data/examples/workflow/cherry_pie.png +0 -0
  123. data/examples/workflow/cherry_pie.rb +61 -26
  124. data/lib/hanuman.rb +34 -7
  125. data/lib/hanuman/graph.rb +55 -31
  126. data/lib/hanuman/graphvizzer.rb +199 -178
  127. data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
  128. data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
  129. data/lib/hanuman/link.rb +35 -0
  130. data/lib/hanuman/registry.rb +46 -0
  131. data/lib/hanuman/stage.rb +76 -32
  132. data/lib/wukong.rb +23 -24
  133. data/lib/wukong/boot.rb +87 -0
  134. data/lib/wukong/configuration.rb +8 -0
  135. data/lib/wukong/dataflow.rb +45 -78
  136. data/lib/wukong/driver.rb +99 -0
  137. data/lib/wukong/emitter.rb +22 -0
  138. data/lib/wukong/model/faker.rb +24 -24
  139. data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
  140. data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
  141. data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
  142. data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
  143. data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
  144. data/lib/wukong/processor.rb +60 -114
  145. data/lib/wukong/spec_helpers.rb +81 -0
  146. data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
  147. data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
  148. data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
  149. data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
  150. data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
  151. data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
  152. data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
  153. data/lib/wukong/version.rb +2 -1
  154. data/lib/wukong/widget/filters.rb +311 -0
  155. data/lib/wukong/widget/processors.rb +156 -0
  156. data/lib/wukong/widget/reducers.rb +7 -0
  157. data/lib/wukong/widget/reducers/accumulator.rb +73 -0
  158. data/lib/wukong/widget/reducers/bin.rb +318 -0
  159. data/lib/wukong/widget/reducers/count.rb +61 -0
  160. data/lib/wukong/widget/reducers/group.rb +85 -0
  161. data/lib/wukong/widget/reducers/group_concat.rb +70 -0
  162. data/lib/wukong/widget/reducers/moments.rb +72 -0
  163. data/lib/wukong/widget/reducers/sort.rb +130 -0
  164. data/lib/wukong/widget/serializers.rb +287 -0
  165. data/lib/wukong/widget/sink.rb +10 -52
  166. data/lib/wukong/widget/source.rb +7 -113
  167. data/lib/wukong/widget/utils.rb +46 -0
  168. data/lib/wukong/widgets.rb +6 -0
  169. data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
  170. data/spec/examples/dataflow/parsing_spec.rb +12 -11
  171. data/spec/examples/dataflow/simple_spec.rb +32 -6
  172. data/spec/examples/dataflow/telegram_spec.rb +36 -36
  173. data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
  174. data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
  175. data/spec/examples/munging/airline_flights_spec.rb +202 -0
  176. data/spec/examples/text/pig_latin_spec.rb +13 -16
  177. data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
  178. data/spec/hanuman/graph_spec.rb +27 -2
  179. data/spec/hanuman/hanuman_spec.rb +10 -0
  180. data/spec/hanuman/registry_spec.rb +123 -0
  181. data/spec/hanuman/stage_spec.rb +61 -7
  182. data/spec/spec_helper.rb +29 -19
  183. data/spec/support/hanuman_test_helpers.rb +14 -12
  184. data/spec/support/shared_context_for_reducers.rb +37 -0
  185. data/spec/support/shared_examples_for_builders.rb +101 -0
  186. data/spec/support/shared_examples_for_shortcuts.rb +57 -0
  187. data/spec/support/wukong_test_helpers.rb +37 -11
  188. data/spec/wukong/dataflow_spec.rb +77 -55
  189. data/spec/wukong/local_runner_spec.rb +24 -24
  190. data/spec/wukong/model/faker_spec.rb +132 -131
  191. data/spec/wukong/runner_spec.rb +8 -8
  192. data/spec/wukong/widget/filters_spec.rb +61 -0
  193. data/spec/wukong/widget/processors_spec.rb +126 -0
  194. data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
  195. data/spec/wukong/widget/reducers/count_spec.rb +11 -0
  196. data/spec/wukong/widget/reducers/group_spec.rb +20 -0
  197. data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
  198. data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
  199. data/spec/wukong/widget/serializers_spec.rb +92 -0
  200. data/spec/wukong/widget/sink_spec.rb +15 -15
  201. data/spec/wukong/widget/source_spec.rb +65 -41
  202. data/spec/wukong/wukong_spec.rb +10 -0
  203. data/wukong.gemspec +17 -10
  204. metadata +359 -335
  205. data/.document +0 -5
  206. data/VERSION +0 -1
  207. data/bin/hdp-bin +0 -44
  208. data/bin/hdp-bzip +0 -23
  209. data/bin/hdp-cat +0 -3
  210. data/bin/hdp-catd +0 -3
  211. data/bin/hdp-cp +0 -3
  212. data/bin/hdp-du +0 -86
  213. data/bin/hdp-get +0 -3
  214. data/bin/hdp-kill +0 -3
  215. data/bin/hdp-kill-task +0 -3
  216. data/bin/hdp-ls +0 -11
  217. data/bin/hdp-mkdir +0 -2
  218. data/bin/hdp-mkdirp +0 -12
  219. data/bin/hdp-mv +0 -3
  220. data/bin/hdp-parts_to_keys.rb +0 -77
  221. data/bin/hdp-ps +0 -3
  222. data/bin/hdp-put +0 -3
  223. data/bin/hdp-rm +0 -32
  224. data/bin/hdp-sort +0 -40
  225. data/bin/hdp-stream +0 -40
  226. data/bin/hdp-stream-flat +0 -22
  227. data/bin/hdp-stream2 +0 -39
  228. data/bin/hdp-sync +0 -17
  229. data/bin/hdp-wc +0 -67
  230. data/bin/wu-flow +0 -10
  231. data/bin/wu-map +0 -17
  232. data/bin/wu-red +0 -17
  233. data/bin/wukong +0 -17
  234. data/data/CREDITS.md +0 -355
  235. data/data/graph/airfares.tsv +0 -2174
  236. data/data/text/gift_of_the_magi.txt +0 -225
  237. data/data/text/jabberwocky.txt +0 -36
  238. data/data/text/rectification_of_names.txt +0 -33
  239. data/data/twitter/a_atsigns_b.tsv +0 -64
  240. data/data/twitter/a_follows_b.tsv +0 -53
  241. data/data/twitter/tweet.tsv +0 -167
  242. data/data/twitter/twitter_user.tsv +0 -55
  243. data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
  244. data/docpages/INSTALL.textile +0 -92
  245. data/docpages/LICENSE.textile +0 -107
  246. data/docpages/README-elastic_map_reduce.textile +0 -377
  247. data/docpages/README-performance.textile +0 -90
  248. data/docpages/README-wulign.textile +0 -65
  249. data/docpages/UsingWukong-part1-get_ready.textile +0 -17
  250. data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
  251. data/docpages/UsingWukong-part3-parsing.textile +0 -138
  252. data/docpages/_config.yml +0 -39
  253. data/docpages/avro/avro_notes.textile +0 -56
  254. data/docpages/avro/performance.textile +0 -36
  255. data/docpages/avro/tethering.textile +0 -19
  256. data/docpages/bigdata-tips.textile +0 -143
  257. data/docpages/code/api_response_example.txt +0 -20
  258. data/docpages/code/parser_skeleton.rb +0 -38
  259. data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
  260. data/docpages/favicon.ico +0 -0
  261. data/docpages/gem.css +0 -16
  262. data/docpages/hadoop-tips.textile +0 -83
  263. data/docpages/index.textile +0 -92
  264. data/docpages/intro.textile +0 -8
  265. data/docpages/moreinfo.textile +0 -174
  266. data/docpages/news.html +0 -24
  267. data/docpages/pig/PigLatinExpressionsList.txt +0 -122
  268. data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
  269. data/docpages/pig/commandline_params.txt +0 -26
  270. data/docpages/pig/cookbook.html +0 -481
  271. data/docpages/pig/images/hadoop-logo.jpg +0 -0
  272. data/docpages/pig/images/instruction_arrow.png +0 -0
  273. data/docpages/pig/images/pig-logo.gif +0 -0
  274. data/docpages/pig/piglatin_ref1.html +0 -1103
  275. data/docpages/pig/piglatin_ref2.html +0 -14340
  276. data/docpages/pig/setup.html +0 -505
  277. data/docpages/pig/skin/basic.css +0 -166
  278. data/docpages/pig/skin/breadcrumbs.js +0 -237
  279. data/docpages/pig/skin/fontsize.js +0 -166
  280. data/docpages/pig/skin/getBlank.js +0 -40
  281. data/docpages/pig/skin/getMenu.js +0 -45
  282. data/docpages/pig/skin/images/chapter.gif +0 -0
  283. data/docpages/pig/skin/images/chapter_open.gif +0 -0
  284. data/docpages/pig/skin/images/current.gif +0 -0
  285. data/docpages/pig/skin/images/external-link.gif +0 -0
  286. data/docpages/pig/skin/images/header_white_line.gif +0 -0
  287. data/docpages/pig/skin/images/page.gif +0 -0
  288. data/docpages/pig/skin/images/pdfdoc.gif +0 -0
  289. data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  290. data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  291. data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  292. data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  293. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  294. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  295. data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  296. data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  297. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  298. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  299. data/docpages/pig/skin/print.css +0 -54
  300. data/docpages/pig/skin/profile.css +0 -181
  301. data/docpages/pig/skin/screen.css +0 -587
  302. data/docpages/pig/tutorial.html +0 -1059
  303. data/docpages/pig/udf.html +0 -1509
  304. data/docpages/tutorial.textile +0 -283
  305. data/docpages/usage.textile +0 -195
  306. data/docpages/wutils.textile +0 -263
  307. data/examples/dataflow/complex.rb +0 -11
  308. data/examples/dataflow/donuts.rb +0 -13
  309. data/examples/tiny_count/jabberwocky_output.tsv +0 -92
  310. data/examples/word_count.rb +0 -48
  311. data/examples/workflow/fiddle.rb +0 -24
  312. data/lib/away/escapement.rb +0 -129
  313. data/lib/away/exe.rb +0 -11
  314. data/lib/away/experimental.rb +0 -5
  315. data/lib/away/from_file.rb +0 -52
  316. data/lib/away/job.rb +0 -56
  317. data/lib/away/job/rake_compat.rb +0 -17
  318. data/lib/away/registry.rb +0 -79
  319. data/lib/away/runner.rb +0 -276
  320. data/lib/away/runner/execute.rb +0 -121
  321. data/lib/away/script.rb +0 -161
  322. data/lib/away/script/hadoop_command.rb +0 -240
  323. data/lib/away/source/file_list_source.rb +0 -15
  324. data/lib/away/source/looper.rb +0 -18
  325. data/lib/away/task.rb +0 -219
  326. data/lib/hanuman/action.rb +0 -21
  327. data/lib/hanuman/chain.rb +0 -4
  328. data/lib/hanuman/graphviz.rb +0 -74
  329. data/lib/hanuman/resource.rb +0 -6
  330. data/lib/hanuman/slot.rb +0 -87
  331. data/lib/hanuman/slottable.rb +0 -220
  332. data/lib/wukong/bad_record.rb +0 -15
  333. data/lib/wukong/event.rb +0 -44
  334. data/lib/wukong/local_runner.rb +0 -55
  335. data/lib/wukong/mapred.rb +0 -3
  336. data/lib/wukong/universe.rb +0 -48
  337. data/lib/wukong/widget/filter.rb +0 -81
  338. data/lib/wukong/widget/gibberish.rb +0 -123
  339. data/lib/wukong/widget/monitor.rb +0 -26
  340. data/lib/wukong/widget/reducer.rb +0 -66
  341. data/lib/wukong/widget/stringifier.rb +0 -50
  342. data/lib/wukong/workflow.rb +0 -22
  343. data/lib/wukong/workflow/command.rb +0 -42
  344. data/old/config/emr-example.yaml +0 -48
  345. data/old/examples/README.txt +0 -17
  346. data/old/examples/contrib/jeans/README.markdown +0 -165
  347. data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
  348. data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
  349. data/old/examples/contrib/jeans/data/sizes +0 -3
  350. data/old/examples/contrib/jeans/normalize.rb +0 -20
  351. data/old/examples/contrib/jeans/sizes.rb +0 -55
  352. data/old/examples/corpus/bnc_word_freq.rb +0 -44
  353. data/old/examples/corpus/bucket_counter.rb +0 -47
  354. data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
  355. data/old/examples/corpus/sentence_bigrams.rb +0 -53
  356. data/old/examples/corpus/sentence_coocurrence.rb +0 -66
  357. data/old/examples/corpus/stopwords.rb +0 -138
  358. data/old/examples/corpus/words_to_bigrams.rb +0 -53
  359. data/old/examples/emr/README.textile +0 -110
  360. data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
  361. data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
  362. data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
  363. data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
  364. data/old/examples/network_graph/adjacency_list.rb +0 -74
  365. data/old/examples/network_graph/breadth_first_search.rb +0 -72
  366. data/old/examples/network_graph/gen_2paths.rb +0 -68
  367. data/old/examples/network_graph/gen_multi_edge.rb +0 -112
  368. data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
  369. data/old/examples/pagerank/README.textile +0 -6
  370. data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
  371. data/old/examples/pagerank/pagerank.rb +0 -72
  372. data/old/examples/pagerank/pagerank_initialize.rb +0 -42
  373. data/old/examples/pagerank/run_pagerank.sh +0 -21
  374. data/old/examples/sample_records.rb +0 -33
  375. data/old/examples/server_logs/apache_log_parser.rb +0 -15
  376. data/old/examples/server_logs/nook.rb +0 -48
  377. data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
  378. data/old/examples/server_logs/user_agent.rb +0 -40
  379. data/old/examples/simple_word_count.rb +0 -82
  380. data/old/examples/size.rb +0 -61
  381. data/old/examples/stats/avg_value_frequency.rb +0 -86
  382. data/old/examples/stats/binning_percentile_estimator.rb +0 -140
  383. data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
  384. data/old/examples/stats/rank_and_bin.rb +0 -173
  385. data/old/examples/stupidly_simple_filter.rb +0 -40
  386. data/old/examples/word_count.rb +0 -75
  387. data/old/graph/graphviz_builder.rb +0 -580
  388. data/old/graph_easy/Attributes.pm +0 -4181
  389. data/old/graph_easy/Graphviz.pm +0 -2232
  390. data/old/wukong.rb +0 -18
  391. data/old/wukong/and_pig.rb +0 -38
  392. data/old/wukong/bad_record.rb +0 -18
  393. data/old/wukong/datatypes.rb +0 -24
  394. data/old/wukong/datatypes/enum.rb +0 -127
  395. data/old/wukong/datatypes/fake_types.rb +0 -17
  396. data/old/wukong/decorator.rb +0 -28
  397. data/old/wukong/encoding/asciize.rb +0 -108
  398. data/old/wukong/extensions.rb +0 -16
  399. data/old/wukong/extensions/array.rb +0 -18
  400. data/old/wukong/extensions/blank.rb +0 -93
  401. data/old/wukong/extensions/class.rb +0 -189
  402. data/old/wukong/extensions/date_time.rb +0 -53
  403. data/old/wukong/extensions/emittable.rb +0 -69
  404. data/old/wukong/extensions/enumerable.rb +0 -79
  405. data/old/wukong/extensions/hash.rb +0 -167
  406. data/old/wukong/extensions/hash_keys.rb +0 -16
  407. data/old/wukong/extensions/hash_like.rb +0 -150
  408. data/old/wukong/extensions/hashlike_class.rb +0 -47
  409. data/old/wukong/extensions/module.rb +0 -2
  410. data/old/wukong/extensions/pathname.rb +0 -27
  411. data/old/wukong/extensions/string.rb +0 -65
  412. data/old/wukong/extensions/struct.rb +0 -17
  413. data/old/wukong/extensions/symbol.rb +0 -11
  414. data/old/wukong/filename_pattern.rb +0 -74
  415. data/old/wukong/helper.rb +0 -7
  416. data/old/wukong/helper/stopwords.rb +0 -195
  417. data/old/wukong/helper/tokenize.rb +0 -35
  418. data/old/wukong/logger.rb +0 -38
  419. data/old/wukong/periodic_monitor.rb +0 -72
  420. data/old/wukong/schema.rb +0 -269
  421. data/old/wukong/script.rb +0 -286
  422. data/old/wukong/script/avro_command.rb +0 -5
  423. data/old/wukong/script/cassandra_loader_script.rb +0 -40
  424. data/old/wukong/script/emr_command.rb +0 -168
  425. data/old/wukong/script/hadoop_command.rb +0 -237
  426. data/old/wukong/script/local_command.rb +0 -41
  427. data/old/wukong/store.rb +0 -10
  428. data/old/wukong/store/base.rb +0 -27
  429. data/old/wukong/store/cassandra.rb +0 -10
  430. data/old/wukong/store/cassandra/streaming.rb +0 -75
  431. data/old/wukong/store/cassandra/struct_loader.rb +0 -21
  432. data/old/wukong/store/cassandra_model.rb +0 -91
  433. data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
  434. data/old/wukong/store/chunked_flat_file_store.rb +0 -48
  435. data/old/wukong/store/conditional_store.rb +0 -57
  436. data/old/wukong/store/factory.rb +0 -8
  437. data/old/wukong/store/flat_file_store.rb +0 -89
  438. data/old/wukong/store/key_store.rb +0 -51
  439. data/old/wukong/store/null_store.rb +0 -15
  440. data/old/wukong/store/read_thru_store.rb +0 -22
  441. data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
  442. data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
  443. data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
  444. data/old/wukong/streamer.rb +0 -30
  445. data/old/wukong/streamer/accumulating_reducer.rb +0 -83
  446. data/old/wukong/streamer/base.rb +0 -126
  447. data/old/wukong/streamer/counting_reducer.rb +0 -25
  448. data/old/wukong/streamer/filter.rb +0 -20
  449. data/old/wukong/streamer/instance_streamer.rb +0 -15
  450. data/old/wukong/streamer/json_streamer.rb +0 -21
  451. data/old/wukong/streamer/line_streamer.rb +0 -12
  452. data/old/wukong/streamer/list_reducer.rb +0 -31
  453. data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
  454. data/old/wukong/streamer/record_streamer.rb +0 -14
  455. data/old/wukong/streamer/reducer.rb +0 -11
  456. data/old/wukong/streamer/set_reducer.rb +0 -14
  457. data/old/wukong/streamer/struct_streamer.rb +0 -48
  458. data/old/wukong/streamer/summing_reducer.rb +0 -29
  459. data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
  460. data/old/wukong/typed_struct.rb +0 -12
  461. data/spec/away/encoding_spec.rb +0 -32
  462. data/spec/away/exe_spec.rb +0 -20
  463. data/spec/away/flow_spec.rb +0 -82
  464. data/spec/away/graph_spec.rb +0 -6
  465. data/spec/away/job_spec.rb +0 -15
  466. data/spec/away/rake_compat_spec.rb +0 -9
  467. data/spec/away/script_spec.rb +0 -81
  468. data/spec/hanuman/graphviz_spec.rb +0 -29
  469. data/spec/hanuman/slot_spec.rb +0 -2
  470. data/spec/support/examples_helper.rb +0 -10
  471. data/spec/support/streamer_test_helpers.rb +0 -6
  472. data/spec/support/wukong_widget_helpers.rb +0 -66
  473. data/spec/wukong/processor_spec.rb +0 -109
  474. data/spec/wukong/widget/filter_spec.rb +0 -99
  475. data/spec/wukong/widget/stringifier_spec.rb +0 -51
  476. data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -1,140 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'rubygems'
3
- require 'wukong/script'
4
- require 'wukong/streamer/count_keys'
5
-
6
- #
7
- # Ch3ck out dis moist azz code bitches!!
8
- #
9
- #
10
-
11
- #
12
- # Do nothing more than bin users here, arbitrary and probably bad
13
- #
14
- class Mapper < Wukong::Streamer::RecordStreamer
15
- def process rank, followers
16
- followers = followers.to_i
17
- if followers > 100
18
- yield [9,rank]
19
- elsif followers > 75
20
- yield [8,rank]
21
- elsif followers > 50
22
- yield [7,rank]
23
- elsif followers > 25
24
- yield [6,rank]
25
- elsif followers > 15
26
- yield [5,rank]
27
- elsif followers > 10
28
- yield [4,rank]
29
- elsif followers > 5
30
- yield [3,rank]
31
- elsif followers > 4
32
- yield [2,rank]
33
- elsif followers > 1
34
- yield [1,rank]
35
- else
36
- yield [0,rank]
37
- end
38
- end
39
- end
40
-
41
-
42
- #
43
- # Calculate percentile rank for every pr value in a given follower bracket
44
- #
45
- class Reducer < Wukong::Streamer::AccumulatingReducer
46
- attr_accessor :count_bin
47
- def start! bin, rank
48
- self.count_bin ||= {}
49
- self.count_bin[bin] ||= {}
50
- end
51
-
52
- def accumulate bin, rank
53
- rank = (rank.to_f*10.0).round.to_f/10.0
54
- self.count_bin[bin][rank] ||= 0
55
- self.count_bin[bin][rank] += 1
56
- end
57
-
58
- def finalize
59
- count_bin[key] = generate_all_pairs(key).inject({}){|h,pair| h[pair.first] = pair.last; h}
60
- yield [key, count_bin[key].values.sort.join(",")]
61
- end
62
-
63
- #
64
- # Write the final table to disk as a ruby hash
65
- #
66
- def after_stream
67
- table = File.open("trstrank_table.rb", 'w')
68
- table << "TRSTRANK_TABLE = " << count_bin.inspect
69
- table.close
70
- end
71
-
72
- #
73
- # Return percentile of a given trstrank for a given follower bracket
74
- #
75
- def percentile bin, rank
76
- ((count_less_than(bin,rank) + 0.5*frequency_of(bin,rank))/ total_num(bin) )*100.0
77
- end
78
-
79
- #
80
- # Return the count of values less than rank
81
- #
82
- def count_less_than bin, rank
83
- count_bin[bin].keys.inject(0){|count,key| count += count_bin[bin][key] if key.to_f < rank; count}
84
- end
85
-
86
- #
87
- # Return the count of rank
88
- #
89
- def frequency_of bin, rank
90
- count_bin[bin].keys.inject(0){|count,key| count += count_bin[bin][key] if key.to_f == rank; count}
91
- end
92
-
93
- #
94
- # Return the total number in sample
95
- #
96
- def total_num bin
97
- count_bin[bin].values.inject(0){|count,v| count += v; count}
98
- end
99
-
100
- #
101
- # Generate a list of all pairs {trstrank => percentile}, interpolate when necessary
102
- #
103
- def generate_all_pairs bin
104
- h = {}
105
- count_bin[bin].keys.each do |rank|
106
- h[rank.to_f] = percentile(bin, rank.to_f)
107
- end
108
- h[0.0] ||= 0.0
109
- h[10.0] ||= 100.0
110
- arr = h.to_a.sort!{|x,y| x.first <=> y.first}
111
- list = arr.zip(arr[1..-1])
112
- big_list = []
113
- big_list << [0.0,0.0]
114
- list.each do |pairs|
115
- interpolate(pairs.first, pairs.last, 0.1).each{|pair| big_list << pair}
116
- end
117
- big_list.uniq.sort{|x,y| x.first <=> y.first}
118
- end
119
-
120
-
121
- #
122
- # Nothing to see here, move along
123
- #
124
- def interpolate pair1, pair2, dx
125
- return [pair1] if pair2.blank?
126
- m = (pair2.last - pair1.last)/(pair2.first - pair1.first) # slope
127
- b = pair2.last - m*pair2.first # y intercept
128
- num = ((pair2.first - pair1.first)/dx).abs.round # number of points to interpolate
129
- points = []
130
- num.times do |i|
131
- x = pair1.first + (i+1).to_f*dx
132
- y = m*x + b
133
- points << [x,y]
134
- end
135
- points # return an array of pairs
136
- end
137
-
138
- end
139
-
140
- Wukong::Script.new(Mapper,Reducer).run
@@ -1,3 +0,0 @@
1
- 1 15 30 25
2
- 2 10 10 20
3
- 3 50 30 30
@@ -1,173 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'rubygems'
3
- require 'wukong/script'
4
- require 'wukong/streamer/rank_and_bin_reducer'
5
-
6
- #
7
- # This example uses the classes from http://github.com/mrflip/twitter_friends
8
- # (That's sloppy, and I apologize. I'm building this script for that, but it
9
- # seems broadly useful and I'm not maintaining two copies. Once this script is
10
- # more worky we'll make it standalone. Anyway you should get the picture.)
11
- #
12
- $: << File.dirname(__FILE__)+'/../../projects/twitter_friends/lib'
13
- require 'twitter_friends';
14
- require 'twitter_friends/struct_model' ; include TwitterFriends::StructModel
15
-
16
-
17
- #
18
- # attrs to bin
19
- #
20
- BINNABLE_ATTRS = {
21
- :twitter_user => [
22
- [:followers_count, :fo ],
23
- [:friends_count, :fr ],
24
- [:statuses_count, :st ],
25
- [:favourites_count, :fv ],
26
- [:created_at, :crat ]
27
- ]
28
-
29
- }
30
- RESOURCE_ALIASES = {
31
- :twitter_user => :u,
32
- :user_metrics => :um,
33
- }
34
- #
35
- # KLUDGE This is not DRY at all but let's get it working first
36
- #
37
- BinUserMetrics = TypedStruct.new(
38
- [:id, Integer],
39
- *BINNABLE_ATTRS[:user_metrics].map{|attr, attr_abbr| [attr_abbr, Integer] }
40
- )
41
- BINNED_RESOURCE_ALIASES = {
42
- :u => BinTwitterUser,
43
- }
44
-
45
- module RankAndBinAttrs
46
- class ExplodeResourceMapper < Wukong::Streamer::StructStreamer
47
- def get_and_format_attr thing, attr
48
- val = thing.send(attr)
49
- case thing.members_types[attr].to_s.to_sym
50
- when :Integer then "%010d" % val.to_i
51
- when :Float then "%020.7f" % val.to_f
52
- when :Bignum then "%020d" % val.to_i
53
- else
54
- raise [val, thing.members_types[attr].to_s.to_sym].inspect
55
- end
56
- end
57
-
58
- #
59
- # The data expansion of this mapper is large enough that it makes sense to
60
- # be a little responsible with what we emit. We'll use the RESOURCE_ALIASES
61
- # and BINNABLE_ATTRS hashes, above, to dump a more parsimonious
62
- # representation.
63
- #
64
- def process thing, *args, &block
65
- attr_abbrs = BINNABLE_ATTRS[thing.class.resource_name]
66
- return unless attr_abbrs
67
- attr_abbrs.each do |attr, abbr|
68
- yield [
69
- RESOURCE_ALIASES[thing.class.resource_name],
70
- abbr,
71
- get_and_format_attr(thing, attr),
72
- thing.id.to_i
73
- ]
74
- end
75
- end
76
- end
77
-
78
- class BinAttrReducer < Wukong::Streamer::RankAndBinReducer
79
- attr_accessor :last_rsrc_attr
80
- #
81
- # Note that we might get several different resources at the same reducer
82
- #
83
- def get_key rsrc, attr, val, *args
84
- if [rsrc, attr] != self.last_rsrc_attr
85
- # Note: since each partition has the same cardinality, we don't need to
86
- # fiddle around with the bin_size, etc -- just reset the order
87
- # parameters' state.
88
- reset_order_params!
89
- self.last_rsrc_attr = [rsrc, attr]
90
- end
91
- val
92
- end
93
-
94
- #
95
- # Note well -- we are rearranging the field order to
96
- #
97
- # resource_abbr id attr_abbr bin
98
- #
99
- # for proper sorting to the re-assembler
100
- #
101
- def emit record
102
- rsrc, attr, val, id, numbering, rank, bin = record
103
- super [rsrc, id, attr, bin]
104
- end
105
- end
106
-
107
- class ReassembleObjectReducer < Wukong::Streamer::AccumulatingReducer
108
- attr_accessor :thing
109
- def klass_from_abbr rsrc_abbr
110
- BINNED_RESOURCE_ALIASES[rsrc_abbr.to_sym]
111
- end
112
- def get_key rsrc_abbr, id, *args
113
- [rsrc_abbr, id.to_i]
114
- end
115
-
116
- def start! rsrc_abbr, id, *args
117
- klass = klass_from_abbr(rsrc_abbr)
118
- self.thing = klass.new id.to_i
119
- end
120
-
121
- def accumulate rsrc, id, attr, bin
122
- thing.send("#{attr}=", bin)
123
- end
124
-
125
- def finalize
126
- yield thing
127
- end
128
- end
129
-
130
- #
131
- # Two-phase script
132
- #
133
- # FIXME -- We need a runner class to manage this.
134
- #
135
- class Script < Wukong::Script
136
- attr_accessor :phase
137
- # KLUDGE !!
138
- def initialize
139
- case
140
- when ARGV.detect{|arg| arg =~ /--phase=1/}
141
- # Phase 1 -- Steal underpants. Also, disassemble each object, and find
142
- # the bin for each binnable attribute's value
143
- self.phase = 1
144
- self.mapper_klass, self.reducer_klass = [ExplodeResourceMapper, BinAttrReducer]
145
- when ARGV.detect{|arg| arg =~ /--phase=2/}
146
- # Phase 2 -- ????
147
- raise "Phase 2 : ????"
148
- when ARGV.detect{|arg| arg =~ /--phase=3/}
149
- # Phase 3 -- profit. In this case, put records back together.
150
- self.phase = 3
151
- self.mapper_klass, self.reducer_klass = [nil, ReassembleObjectReducer]
152
- else
153
- raise "Please run me with a --phase= option"
154
- end
155
- super mapper_klass, reducer_klass
156
- end
157
-
158
- def default_options
159
- extra_options =
160
- case self.phase
161
- # partition on [rsrc, attr]; sort on [rsrc, attr, val]
162
- when 1 then { :sort_fields => 3, :partition_fields => 2 }
163
- # sort on [rsrc, id]
164
- when 3 then { :sort_fields => 2 }
165
- else { }
166
- end
167
- super.merge extra_options
168
- end
169
- end
170
-
171
- # execute script
172
- Script.new.run
173
- end
@@ -1,40 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'rubygems'
3
- require 'wukong/script'
4
-
5
- # Run as (local mode)
6
- #
7
- # ./examples/stupidly_simple_filter.rb --run=local input.tsv output.tsv
8
- #
9
- # for hadoop mode,
10
- #
11
- # ./examples/stupidly_simple_filter.rb --run=hadoop input.tsv output.tsv
12
- #
13
- # For debugging, run
14
- #
15
- # cat input.tsv | ./examples/stupidly_simple_filter.rb --map input.tsv | more
16
- #
17
-
18
- class Mapper < LineStreamer
19
- include Filter
20
- MATCHER = %r{(ford|mercury|saab|mazda|isuzu)}
21
-
22
- #
23
- # A very simple mapper -- looks for a regex match in one field,
24
- # and emits the whole record if the field matches
25
- #
26
- #
27
- # Given a series of records like:
28
- #
29
- # tweet 123456789 20100102030405 @frank: I'm having a bacon sandwich
30
- # tweet 123456789 20100102030405 @jerry, I'm having your baby
31
- #
32
- # emits only the lines matching that regex
33
- #
34
- def emit? line
35
- MATCHER.match line
36
- end
37
- end
38
-
39
- # Execute the script
40
- Wukong.run(Mapper)
@@ -1,75 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'rubygems'
3
- require 'wukong/script'
4
-
5
- module WordCount
6
- class Mapper < Wukong::Streamer::LineStreamer
7
- #
8
- # Split a string into its constituent words.
9
- #
10
- # This is pretty simpleminded:
11
- # * downcase the word
12
- # * Split at any non-alphanumeric boundary, including '_'
13
- # * However, preserve the special cases of 's, 'd or 't at the end of a
14
- # word.
15
- #
16
- # tokenize("Ability is a poor man's wealth #johnwoodenquote")
17
- # # => ["ability", "is", "a", "poor", "man's", "wealth", "johnwoodenquote"]
18
- #
19
- def tokenize str
20
- return [] if str.blank?
21
- str = str.downcase;
22
- # kill off all punctuation except [stuff]'s or [stuff]'t
23
- # this includes hyphens (words are split)
24
- str = str.
25
- gsub(/[^a-zA-Z0-9\']+/, ' ').
26
- gsub(/(\w)\'([stdm]|re|ve|ll)\b/, '\1!\2').gsub(/\'/, ' ').gsub(/!/, "'")
27
- # Busticate at whitespace
28
- words = str.split(/\s+/)
29
- words.reject!{|w| w.length < 3 }
30
- words
31
- end
32
-
33
- #
34
- # Emit each word in each line.
35
- #
36
- def process line
37
- tokenize(line).each{|word| yield [word, 1] }
38
- end
39
- end
40
-
41
- #
42
- # You can stack up all the values in a list then sum them at once.
43
- #
44
- # This isn't good style, as it means the whole list is held in memory
45
- #
46
- class Reducer1 < Wukong::Streamer::ListReducer
47
- def finalize
48
- yield [ values.map(&:last).map(&:to_i).inject(0){|x,tot| x+tot }, key ]
49
- end
50
- end
51
-
52
- #
53
- # A bit kinder to your memory manager: accumulate the sum record-by-record:
54
- #
55
- class Reducer2 < Wukong::Streamer::AccumulatingReducer
56
- def start!(*args) @key_count = 0 end
57
- def accumulate(*args) @key_count += 1 end
58
- def finalize
59
- yield [ @key_count, key ]
60
- end
61
- end
62
-
63
- #
64
- # ... easiest of all, though: this is common enough that it's already included
65
- #
66
- require 'wukong/streamer/count_keys'
67
- class Reducer3 < Wukong::Streamer::CountKeys
68
- end
69
- end
70
-
71
- # Execute the script
72
- Wukong.run(
73
- WordCount::Mapper,
74
- WordCount::Reducer2
75
- )
@@ -1,580 +0,0 @@
1
- #!/usr/local/bin/ruby -w
2
-
3
- require "enumerator"
4
-
5
- ##
6
- # Graph models directed graphs and subgraphs and outputs in graphviz's
7
- # dot format.
8
-
9
- module Hanuman
10
-
11
- class GraphvizBuilder
12
- VERSION = "2.5.0" # :nodoc:
13
-
14
- LIGHT_COLORS = %w(gray lightblue lightcyan lightgray lightpink
15
- lightslategray lightsteelblue white)
16
-
17
- # WTF -- can't be %w() because of a bug in rcov
18
- BOLD_COLORS = [:black, :brown, :mediumblue, :blueviolet,
19
- :orange, :magenta, :darkgreen, :maroon,
20
- :violetred, :purple, :greenyellow, :deeppink,
21
- :midnightblue, :firebrick, :darkturquoise,
22
- :mediumspringgreen, :chartreuse, :navy,
23
- :lightseagreen, :chocolate, :lawngreen, :green,
24
- :indigo, :darkgoldenrod, :darkviolet, :red,
25
- :springgreen, :saddlebrown, :mediumvioletred,
26
- :goldenrod, :tomato, :cyan, :forestgreen,
27
- :darkorchid, :crimson, :coral, :deepskyblue,
28
- :seagreen, :peru, :turquoise, :orangered,
29
- :dodgerblue, :sienna, :limegreen, :royalblue,
30
- :darkorange, :blue]
31
-
32
- ##
33
- # Defines the brewer color schemes and the maximum number of colors
34
- # in each set.
35
-
36
- COLOR_SCHEME_MAX = {
37
- :accent => 8, :blues => 9, :brbg => 11, :bugn => 9,
38
- :dark2 => 8, :gnbu => 9, :greens => 9, :greys => 9,
39
- :oranges => 9, :orrd => 9, :paired => 12, :pastel1 => 9,
40
- :pastel2 => 8, :piyg => 11, :prgn => 11, :pubu => 9,
41
- :pubugn => 9, :puor => 11, :purd => 9, :purples => 9,
42
- :rdbu => 11, :rdgy => 11, :rdylbu => 11, :rdylgn => 11,
43
- :reds => 9, :set1 => 9, :set2 => 8, :set3 => 12,
44
- :spectral => 11, :ylgn => 9, :ylgnbu => 9, :ylorbr => 9,
45
- :ylorrd => 9
46
- }
47
-
48
- SHAPES = %w[
49
- Mcircle Mdiamond Msquare box box3d circle component
50
- diamond doublecircle doubleoctagon egg ellipse folder
51
- hexagon house invhouse invtrapezium invtriangle none
52
- note octagon parallelogram pentagon plaintext point
53
- polygon rect rectangle septagon square tab trapezium
54
- triangle tripleoctagon
55
- ].map(&:to_sym)
56
-
57
- STYLES = %w(dashed dotted solid invis bold filled diagonals rounded).map(&:to_sym)
58
-
59
- ARROW_RE = /(?:o?[lr]?(?:box|crow|diamond|dot|inv|none|normal|tee|vee)){1,4}/
60
-
61
- ARROWS = %w(box crow diamond dot inv none normal tee vee).map(&:to_sym)
62
-
63
- STYLES.each do |name|
64
- define_method(name) { style name }
65
- end
66
-
67
- (BOLD_COLORS + LIGHT_COLORS).each do |name|
68
- define_method(name) { color name }
69
- end
70
-
71
- SHAPES.each do |name|
72
- method_name = name.downcase.to_s.sub(/none/, 'shape_none')
73
- define_method(method_name) { shape name }
74
- end
75
-
76
- ARROWS.each do |name|
77
- method_name = {
78
- :none => "none_arrow",
79
- :box => "box_arrow",
80
- :diamond => "diamond_arrow",
81
- }[name] || name
82
-
83
- define_method(method_name) { arrowhead name }
84
- end
85
-
86
- ENGINES = %w[ circo dot fdp neato osage sfdp twopi ].map(&:to_sym)
87
-
88
- ##
89
- # A parent graph, if any. Only used for subgraphs.
90
-
91
- attr_accessor :graph
92
-
93
- ##
94
- # The name of the graph. Optional for graphs and subgraphs. Prefix
95
- # the name of a subgraph with "cluster" for subgraph that is boxed.
96
-
97
- attr_accessor :name
98
-
99
- ##
100
- # Global attributes for edges in this graph.
101
-
102
- attr_reader :edge_attribs
103
-
104
- ##
105
- # The hash of hashes of edges in this graph. Use #[] or #node to create edges.
106
-
107
- attr_reader :edges
108
-
109
- ##
110
- # Global attributes for this graph.
111
-
112
- attr_reader :graph_attribs
113
-
114
- ##
115
- # Global attributes for nodes in this graph.
116
-
117
- attr_reader :node_attribs
118
-
119
- ##
120
- # The hash of nodes in this graph. Use #[] or #node to create nodes.
121
-
122
- attr_reader :nodes
123
-
124
- ##
125
- # An array of subgraphs.
126
-
127
- attr_reader :subgraphs
128
-
129
- ##
130
- # Creates a new graph object. Optional name and parent graph are
131
- # available. Also takes an optional block for DSL-like use.
132
-
133
- def initialize name = nil, graph = nil, &block
134
- @name = name.to_sym
135
- @graph = graph
136
- graph << self if graph
137
- @nodes = Hash.new { |h,k| h[k] = Node.new self, k }
138
- @edges = Hash.new { |h,k|
139
- h[k] = Hash.new { |h2, k2| h2[k2] = Edge.new self, self[k], self[k2] }
140
- }
141
- @graph_attribs = []
142
- @node_attribs = []
143
- @edge_attribs = []
144
- @subgraphs = []
145
-
146
- engine(:dot)
147
-
148
- self.scheme = graph.scheme if graph
149
- node_attribs << scheme if scheme
150
- configurate(&block) if block
151
- end
152
-
153
- def depth
154
- graph.nil? ? 0 : graph.depth + 1
155
- end
156
-
157
- def configurate(&block)
158
- (block.arity == 0) ? instance_eval(&block) : block.call(self)
159
- self
160
- end
161
-
162
- def engine(engine_name=nil)
163
- return @engine unless engine_name
164
- raise ArgumentError, "Don't have engine #{engine_name} listed -- should be one of #{ENGINES}" unless ENGINES.include?(engine_name.to_sym)
165
- @engine = engine_name
166
- end
167
-
168
- ##
169
- # Push a subgraph into the current graph. Sets the subgraph's graph to self.
170
-
171
- def << subgraph
172
- subgraphs << subgraph
173
- subgraph.graph = self
174
- end
175
-
176
- ##
177
- # Access a node by name
178
-
179
- def [] name
180
- nodes[name]
181
- end
182
-
183
- def arrowhead shape
184
- raise ArgumentError, "Bad arrow shape: #{shape}" unless shape =~ ARROW_RE
185
- Attribute.new "arrowhead = #{shape}"
186
- end
187
-
188
- def arrowtail shape
189
- raise ArgumentError, "Bad arrow shape: #{shape}" unless shape =~ ARROW_RE
190
- Attribute.new "arrowtail = #{shape}"
191
- end
192
-
193
- def arrowsize size
194
- Attribute.new "arrowsize = #{size}"
195
- end
196
-
197
- ##
198
- # A convenience method to set the global node attributes to use boxes.
199
-
200
- def boxes
201
- node_attribs << shape(:box)
202
- end
203
-
204
- ##
205
- # Shortcut method to create a new color Attribute instance.
206
-
207
- def color color
208
- Attribute.new "color = #{color}"
209
- end
210
-
211
- ##
212
- # Shortcut method to create a new colorscheme Attribute instance. If
213
- # passed +n+, +name+ must match one of the brewer color scheme names
214
- # and it will generate accessors for each fillcolor as well as push
215
- # the colorscheme onto the node_attribs.
216
-
217
- attr_accessor :scheme
218
-
219
- def colorscheme name, n = nil
220
- self.scheme = Attribute.new "colorscheme = #{name}#{n}"
221
- max = COLOR_SCHEME_MAX[name.to_sym]
222
-
223
- node_attribs << scheme if max
224
-
225
- scheme
226
- end
227
-
228
- (1..COLOR_SCHEME_MAX.values.max).map { |m|
229
- define_method "c#{m}" do
230
- GraphvizBuilder::Attribute.new("fillcolor = #{m}")
231
- end
232
- }
233
-
234
- ##
235
- # Define one or more edges.
236
- #
237
- # edge :a, :b, :c, ...
238
- #
239
- # is equivalent to:
240
- #
241
- # edge :a, :b
242
- # edge :b, :c
243
- # ...
244
-
245
- def edge(*names)
246
- last = nil
247
- names.each_cons(2) do |from, to|
248
- last = self[from][to]
249
- end
250
- last
251
- end
252
-
253
- ##
254
- # Creates a new Graph whose edges point the other direction.
255
-
256
- def invert(new_name=nil)
257
- result = self.class.new(new_name || "#{name}_inverted")
258
- edges.each do |from, h|
259
- h.each do |to, edge|
260
- result[to][from]
261
- end
262
- end
263
- result
264
- end
265
-
266
- ##
267
- # Shortcut method to create a new fillcolor Attribute instance.
268
-
269
- def fillcolor n
270
- Attribute.new "fillcolor = #{n}"
271
- end
272
-
273
- ##
274
- # Shortcut method to create a new font Attribute instance. You can
275
- # pass in both the name and an optional font size.
276
-
277
- def font name
278
- Attribute.new "fontname = #{name.inspect}"
279
- end
280
-
281
- def fontsize size
282
- Attribute.new "fontsize = #{size}"
283
- end
284
-
285
- ##
286
- # Shortcut method to set the graph's label. Usually used with subgraphs.
287
-
288
- def label name
289
- graph_attribs << %Q{label = "#{name.to_s.gsub(/\n/, '\n')}"} # ""
290
- end
291
-
292
- ##
293
- # Access a node by name, supplying an optional label
294
-
295
- def node name, label = nil
296
- n = nodes[name]
297
- n.label label if label
298
- n
299
- end
300
-
301
- ##
302
- # Shortcut method to specify the orientation of the graph. Defaults
303
- # to the graphviz default "TB".
304
-
305
- def orient dir = :TB
306
- graph_attribs << "rankdir = #{dir}"
307
- end
308
-
309
- ##
310
- # Shortcut method to specify the orientation of the graph. Defaults to :LR.
311
-
312
- def rotate dir = :LR
313
- orient dir
314
- end
315
-
316
- ##
317
- # Saves out both a dot file to path and an image for the specified type.
318
- # Specify type as nil to skip exporting an image.
319
-
320
- def save(path, type=nil)
321
- File.open "#{path}.dot", "w" do |f|
322
- f.puts self.to_s
323
- end
324
- system "#{engine} -T#{type} #{path}.dot > #{path}.#{type}" if type
325
- end
326
-
327
- ##
328
- # Shortcut method to create a new shape Attribute instance.
329
-
330
- def shape shape
331
- Attribute.new "shape = #{shape}"
332
- end
333
-
334
- ##
335
- # Shortcut method to create a new style Attribute instance.
336
-
337
- def style name
338
- Attribute.new "style = #{name}"
339
- end
340
-
341
- ##
342
- # Shortcut method to create a subgraph in the current graph. Use
343
- # with the top-level +digraph+ method in block form for a graph DSL.
344
-
345
- def subgraph name = nil, &block
346
- GraphvizBuilder.new name, self, &block
347
- end
348
-
349
- ##
350
- # Shortcut method to create a clustered subgraph in the current
351
- # graph. Use with the top-level +digraph+ method in block form for a
352
- # graph DSL.
353
-
354
- def cluster name, &block
355
- subgraph "cluster_#{name}", &block
356
- end
357
-
358
- ##
359
- # Outputs a graphviz graph.
360
-
361
- def to_s
362
- result = []
363
-
364
- type = graph ? "subgraph " : "digraph "
365
- type << "\"#{name}\"" if name and !name.empty?
366
- result << "#{type} {"
367
-
368
- graph_attribs.each do |line|
369
- result << " #{line};"
370
- end
371
-
372
- unless node_attribs.empty? then
373
- result << " node [ #{node_attribs.join(", ")} ];"
374
- end
375
-
376
- unless edge_attribs.empty? then
377
- result << " edge [ #{edge_attribs.join(", ")} ];"
378
- end
379
-
380
- subgraphs.each do |line|
381
- result << " #{line.to_s.rstrip};"
382
- end
383
-
384
- nodes.each do |name, node|
385
- result << " #{node.to_s.rstrip};" if graph or node.attributes? or node.orphan?
386
- end
387
-
388
- edges.each do |from, deps|
389
- deps.each do |to, edge|
390
- result << " #{edge.to_s.rstrip};"
391
- end
392
- end
393
-
394
- result << "}"
395
- result.join "\n#{" "*self.depth}"
396
- end
397
-
398
- ##
399
- # An attribute for a graph, node, or edge. Really just a composable
400
- # string (via #+) with a convenience method #<< that allows you to
401
- # "paint" nodes and edges with this attribute.
402
-
403
- class Attribute < Struct.new :attr
404
- ##
405
- # "Paint" graphs, nodes, and edges with this attribute.
406
- #
407
- # red << node1 << node2 << node3
408
- #
409
- # is the same as:
410
- #
411
- # node1.attributes << red
412
- # node2.attributes << red
413
- # node3.attributes << red
414
-
415
- def << thing
416
- thing.attributes << self
417
- thing.attributes.uniq!
418
- self
419
- end
420
-
421
- ##
422
- # Returns the attribute in string form.
423
-
424
- alias :to_s :attr
425
-
426
- ##
427
- # Compose a new attribute from two existing attributes:
428
- #
429
- # bad_nodes = red + filled + diamond
430
-
431
- def + style
432
- c = CompoundAttribute.new
433
- c.push self
434
- c.push style
435
- c
436
- end
437
- end
438
-
439
- class CompoundAttribute < Attribute
440
- def initialize attr = []
441
- super
442
- end
443
-
444
- def push attrib
445
- attr.push attrib
446
- end
447
-
448
- def << thing
449
- attr.each do |subattr|
450
- subattr << thing # allows for recursive compound attributes
451
- end
452
- self
453
- end
454
-
455
- def to_s
456
- attr.join ", "
457
- end
458
- end
459
-
460
- class Thingy < Struct.new :graph, :attributes
461
- def initialize graph
462
- super graph, []
463
- end
464
-
465
- def quote(str)
466
- %Q{"#{str}"}
467
- end
468
-
469
- def pad_with_attributes(text)
470
- width = 40 - (2 * graph.depth)
471
- if self.attributes? then
472
- "%-#{width}s [ %s ]" % [text, attributes.join(',')]
473
- else
474
- text
475
- end
476
- end
477
-
478
- def initialize_copy other # :nodoc:
479
- super
480
- self.attributes = other.attributes.dup
481
- end
482
-
483
- ##
484
- # Shortcut method to set the label attribute.
485
-
486
- def label name
487
- attributes.reject! { |s| s =~ /^label =/ }
488
- attributes << "label = \"#{name.to_s.gsub(/\n/, '\n')}\""
489
- self
490
- end
491
-
492
- ##
493
- # Does this thing have attributes?
494
-
495
- def attributes?
496
- not self.attributes.empty?
497
- end
498
- end
499
-
500
- ##
501
- # An edge in a graph.
502
-
503
- class Edge < Thingy
504
-
505
- attr_accessor :from, :to, :from_slot, :to_slot
506
-
507
- ##
508
- # Create a new edge in +graph+ from +from+ to +to+.
509
-
510
- def initialize graph, from, to, from_slot=nil, to_slot=nil
511
- super graph
512
- self.from = from
513
- self.to = to
514
- self.from_slot = from_slot
515
- self.to_slot = to_slot
516
- end
517
-
518
- ##
519
- # Returns the edge in dot syntax.
520
-
521
- def to_s
522
- from_name = quote(from.name)
523
- to_name = quote(to.name)
524
- fromto = "%-18s -> %s" % [from_name, to_name]
525
- pad_with_attributes(fromto)
526
- end
527
- end
528
-
529
- ##
530
- # Nodes in the graph.
531
-
532
- class Node < Thingy
533
-
534
- attr_accessor :name
535
-
536
- def connected?
537
- edges = graph.edges
538
-
539
- edges.include?(name) or edges.any? { |from, deps| deps.include? name }
540
- end
541
-
542
- def orphan?
543
- not connected?
544
- end
545
-
546
- ##
547
- # Create a new Node. Takes a parent graph and a name.
548
-
549
- def initialize graph, name
550
- super graph
551
- self.name = name
552
- end
553
-
554
- ##
555
- # Create a new node with +name+ and an edge between them pointing
556
- # from self to the new node.
557
-
558
- def >> name
559
- self[name] # creates node and edge
560
- self
561
- end
562
-
563
- alias :"<<" :">>"
564
-
565
- ##
566
- # Returns the edge between self and +dep_name+.
567
-
568
- def [] dep_name
569
- graph.edges[name][dep_name]
570
- end
571
-
572
- ##
573
- # Returns the node in dot syntax.
574
-
575
- def to_s
576
- pad_with_attributes(quote(name))
577
- end
578
- end
579
- end
580
- end