wukong 3.0.0.pre → 3.0.0.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (476) hide show
  1. data/.gitignore +46 -33
  2. data/.gitmodules +3 -0
  3. data/.rspec +1 -1
  4. data/.travis.yml +8 -1
  5. data/.yardopts +0 -13
  6. data/Guardfile +4 -6
  7. data/{LICENSE.textile → LICENSE.md} +43 -55
  8. data/README-old.md +422 -0
  9. data/README.md +279 -418
  10. data/Rakefile +21 -5
  11. data/TODO.md +6 -6
  12. data/bin/wu-clean-encoding +31 -0
  13. data/bin/wu-lign +2 -2
  14. data/bin/wu-local +69 -0
  15. data/bin/wu-server +70 -0
  16. data/examples/Gemfile +38 -0
  17. data/examples/README.md +9 -0
  18. data/examples/dataflow/apache_log_line.rb +64 -25
  19. data/examples/dataflow/fibonacci_series.rb +101 -0
  20. data/examples/dataflow/parse_apache_logs.rb +37 -7
  21. data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
  22. data/examples/dataflow/simple.rb +4 -4
  23. data/examples/geo.rb +4 -0
  24. data/examples/geo/geo_grids.numbers +0 -0
  25. data/examples/geo/geolocated.rb +331 -0
  26. data/examples/geo/quadtile.rb +69 -0
  27. data/examples/geo/spec/geolocated_spec.rb +247 -0
  28. data/examples/geo/tile_fetcher.rb +77 -0
  29. data/examples/graph/minimum_spanning_tree.rb +61 -61
  30. data/examples/jabberwocky.txt +36 -0
  31. data/examples/models/wikipedia.rb +20 -0
  32. data/examples/munging/Gemfile +8 -0
  33. data/examples/munging/airline_flights/airline.rb +57 -0
  34. data/examples/munging/airline_flights/airline_flights.rake +83 -0
  35. data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
  36. data/examples/munging/airline_flights/airport.rb +211 -0
  37. data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
  38. data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
  39. data/examples/munging/airline_flights/flight.rb +156 -0
  40. data/examples/munging/airline_flights/models.rb +4 -0
  41. data/examples/munging/airline_flights/parse.rb +26 -0
  42. data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
  43. data/examples/munging/airline_flights/route.rb +35 -0
  44. data/examples/munging/airline_flights/tasks.rake +83 -0
  45. data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
  46. data/examples/munging/airline_flights/topcities.rb +167 -0
  47. data/examples/munging/airports/40_wbans.txt +40 -0
  48. data/examples/munging/airports/filter_weather_reports.rb +37 -0
  49. data/examples/munging/airports/join.pig +31 -0
  50. data/examples/munging/airports/to_tsv.rb +33 -0
  51. data/examples/munging/airports/usa_wbans.pig +19 -0
  52. data/examples/munging/airports/usa_wbans.txt +2157 -0
  53. data/examples/munging/airports/wbans.pig +19 -0
  54. data/examples/munging/airports/wbans.txt +2310 -0
  55. data/examples/munging/geo/geo_json.rb +54 -0
  56. data/examples/munging/geo/geo_models.rb +69 -0
  57. data/examples/munging/geo/geonames_models.rb +78 -0
  58. data/examples/munging/geo/iso_codes.rb +172 -0
  59. data/examples/munging/geo/reconcile_countries.rb +124 -0
  60. data/examples/munging/geo/tasks.rake +71 -0
  61. data/examples/munging/rake_helper.rb +62 -0
  62. data/examples/munging/weather/.gitignore +1 -0
  63. data/examples/munging/weather/Gemfile +4 -0
  64. data/examples/munging/weather/Rakefile +28 -0
  65. data/examples/munging/weather/extract_ish.rb +13 -0
  66. data/examples/munging/weather/models/weather.rb +119 -0
  67. data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
  68. data/examples/munging/wikipedia/README.md +34 -0
  69. data/examples/munging/wikipedia/Rakefile +193 -0
  70. data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
  71. data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
  72. data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
  73. data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
  74. data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
  75. data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
  76. data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
  77. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
  78. data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
  79. data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
  80. data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
  81. data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
  82. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
  83. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
  84. data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
  85. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
  86. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
  87. data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
  88. data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
  89. data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
  90. data/examples/munging/wikipedia/pig_style_guide.md +25 -0
  91. data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
  92. data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
  93. data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
  94. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
  95. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
  96. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
  97. data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
  98. data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
  99. data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
  100. data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
  101. data/examples/munging/wikipedia/utils/namespaces.json +1 -0
  102. data/examples/rake_helper.rb +85 -0
  103. data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
  104. data/examples/server_logs/logline.rb +95 -0
  105. data/examples/server_logs/models.rb +66 -0
  106. data/examples/server_logs/page_counts.pig +48 -0
  107. data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
  108. data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
  109. data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
  110. data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
  111. data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
  112. data/examples/string_reverser.rb +26 -0
  113. data/examples/text/pig_latin.rb +2 -2
  114. data/examples/text/regional_flavor/README.md +14 -0
  115. data/examples/text/regional_flavor/article_wordbags.pig +39 -0
  116. data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
  117. data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
  118. data/examples/word_count/accumulator.rb +26 -0
  119. data/examples/word_count/tokenizer.rb +13 -0
  120. data/examples/word_count/word_count.rb +6 -0
  121. data/examples/workflow/cherry_pie.dot +97 -0
  122. data/examples/workflow/cherry_pie.png +0 -0
  123. data/examples/workflow/cherry_pie.rb +61 -26
  124. data/lib/hanuman.rb +34 -7
  125. data/lib/hanuman/graph.rb +55 -31
  126. data/lib/hanuman/graphvizzer.rb +199 -178
  127. data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
  128. data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
  129. data/lib/hanuman/link.rb +35 -0
  130. data/lib/hanuman/registry.rb +46 -0
  131. data/lib/hanuman/stage.rb +76 -32
  132. data/lib/wukong.rb +23 -24
  133. data/lib/wukong/boot.rb +87 -0
  134. data/lib/wukong/configuration.rb +8 -0
  135. data/lib/wukong/dataflow.rb +45 -78
  136. data/lib/wukong/driver.rb +99 -0
  137. data/lib/wukong/emitter.rb +22 -0
  138. data/lib/wukong/model/faker.rb +24 -24
  139. data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
  140. data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
  141. data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
  142. data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
  143. data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
  144. data/lib/wukong/processor.rb +60 -114
  145. data/lib/wukong/spec_helpers.rb +81 -0
  146. data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
  147. data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
  148. data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
  149. data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
  150. data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
  151. data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
  152. data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
  153. data/lib/wukong/version.rb +2 -1
  154. data/lib/wukong/widget/filters.rb +311 -0
  155. data/lib/wukong/widget/processors.rb +156 -0
  156. data/lib/wukong/widget/reducers.rb +7 -0
  157. data/lib/wukong/widget/reducers/accumulator.rb +73 -0
  158. data/lib/wukong/widget/reducers/bin.rb +318 -0
  159. data/lib/wukong/widget/reducers/count.rb +61 -0
  160. data/lib/wukong/widget/reducers/group.rb +85 -0
  161. data/lib/wukong/widget/reducers/group_concat.rb +70 -0
  162. data/lib/wukong/widget/reducers/moments.rb +72 -0
  163. data/lib/wukong/widget/reducers/sort.rb +130 -0
  164. data/lib/wukong/widget/serializers.rb +287 -0
  165. data/lib/wukong/widget/sink.rb +10 -52
  166. data/lib/wukong/widget/source.rb +7 -113
  167. data/lib/wukong/widget/utils.rb +46 -0
  168. data/lib/wukong/widgets.rb +6 -0
  169. data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
  170. data/spec/examples/dataflow/parsing_spec.rb +12 -11
  171. data/spec/examples/dataflow/simple_spec.rb +32 -6
  172. data/spec/examples/dataflow/telegram_spec.rb +36 -36
  173. data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
  174. data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
  175. data/spec/examples/munging/airline_flights_spec.rb +202 -0
  176. data/spec/examples/text/pig_latin_spec.rb +13 -16
  177. data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
  178. data/spec/hanuman/graph_spec.rb +27 -2
  179. data/spec/hanuman/hanuman_spec.rb +10 -0
  180. data/spec/hanuman/registry_spec.rb +123 -0
  181. data/spec/hanuman/stage_spec.rb +61 -7
  182. data/spec/spec_helper.rb +29 -19
  183. data/spec/support/hanuman_test_helpers.rb +14 -12
  184. data/spec/support/shared_context_for_reducers.rb +37 -0
  185. data/spec/support/shared_examples_for_builders.rb +101 -0
  186. data/spec/support/shared_examples_for_shortcuts.rb +57 -0
  187. data/spec/support/wukong_test_helpers.rb +37 -11
  188. data/spec/wukong/dataflow_spec.rb +77 -55
  189. data/spec/wukong/local_runner_spec.rb +24 -24
  190. data/spec/wukong/model/faker_spec.rb +132 -131
  191. data/spec/wukong/runner_spec.rb +8 -8
  192. data/spec/wukong/widget/filters_spec.rb +61 -0
  193. data/spec/wukong/widget/processors_spec.rb +126 -0
  194. data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
  195. data/spec/wukong/widget/reducers/count_spec.rb +11 -0
  196. data/spec/wukong/widget/reducers/group_spec.rb +20 -0
  197. data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
  198. data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
  199. data/spec/wukong/widget/serializers_spec.rb +92 -0
  200. data/spec/wukong/widget/sink_spec.rb +15 -15
  201. data/spec/wukong/widget/source_spec.rb +65 -41
  202. data/spec/wukong/wukong_spec.rb +10 -0
  203. data/wukong.gemspec +17 -10
  204. metadata +359 -335
  205. data/.document +0 -5
  206. data/VERSION +0 -1
  207. data/bin/hdp-bin +0 -44
  208. data/bin/hdp-bzip +0 -23
  209. data/bin/hdp-cat +0 -3
  210. data/bin/hdp-catd +0 -3
  211. data/bin/hdp-cp +0 -3
  212. data/bin/hdp-du +0 -86
  213. data/bin/hdp-get +0 -3
  214. data/bin/hdp-kill +0 -3
  215. data/bin/hdp-kill-task +0 -3
  216. data/bin/hdp-ls +0 -11
  217. data/bin/hdp-mkdir +0 -2
  218. data/bin/hdp-mkdirp +0 -12
  219. data/bin/hdp-mv +0 -3
  220. data/bin/hdp-parts_to_keys.rb +0 -77
  221. data/bin/hdp-ps +0 -3
  222. data/bin/hdp-put +0 -3
  223. data/bin/hdp-rm +0 -32
  224. data/bin/hdp-sort +0 -40
  225. data/bin/hdp-stream +0 -40
  226. data/bin/hdp-stream-flat +0 -22
  227. data/bin/hdp-stream2 +0 -39
  228. data/bin/hdp-sync +0 -17
  229. data/bin/hdp-wc +0 -67
  230. data/bin/wu-flow +0 -10
  231. data/bin/wu-map +0 -17
  232. data/bin/wu-red +0 -17
  233. data/bin/wukong +0 -17
  234. data/data/CREDITS.md +0 -355
  235. data/data/graph/airfares.tsv +0 -2174
  236. data/data/text/gift_of_the_magi.txt +0 -225
  237. data/data/text/jabberwocky.txt +0 -36
  238. data/data/text/rectification_of_names.txt +0 -33
  239. data/data/twitter/a_atsigns_b.tsv +0 -64
  240. data/data/twitter/a_follows_b.tsv +0 -53
  241. data/data/twitter/tweet.tsv +0 -167
  242. data/data/twitter/twitter_user.tsv +0 -55
  243. data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
  244. data/docpages/INSTALL.textile +0 -92
  245. data/docpages/LICENSE.textile +0 -107
  246. data/docpages/README-elastic_map_reduce.textile +0 -377
  247. data/docpages/README-performance.textile +0 -90
  248. data/docpages/README-wulign.textile +0 -65
  249. data/docpages/UsingWukong-part1-get_ready.textile +0 -17
  250. data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
  251. data/docpages/UsingWukong-part3-parsing.textile +0 -138
  252. data/docpages/_config.yml +0 -39
  253. data/docpages/avro/avro_notes.textile +0 -56
  254. data/docpages/avro/performance.textile +0 -36
  255. data/docpages/avro/tethering.textile +0 -19
  256. data/docpages/bigdata-tips.textile +0 -143
  257. data/docpages/code/api_response_example.txt +0 -20
  258. data/docpages/code/parser_skeleton.rb +0 -38
  259. data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
  260. data/docpages/favicon.ico +0 -0
  261. data/docpages/gem.css +0 -16
  262. data/docpages/hadoop-tips.textile +0 -83
  263. data/docpages/index.textile +0 -92
  264. data/docpages/intro.textile +0 -8
  265. data/docpages/moreinfo.textile +0 -174
  266. data/docpages/news.html +0 -24
  267. data/docpages/pig/PigLatinExpressionsList.txt +0 -122
  268. data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
  269. data/docpages/pig/commandline_params.txt +0 -26
  270. data/docpages/pig/cookbook.html +0 -481
  271. data/docpages/pig/images/hadoop-logo.jpg +0 -0
  272. data/docpages/pig/images/instruction_arrow.png +0 -0
  273. data/docpages/pig/images/pig-logo.gif +0 -0
  274. data/docpages/pig/piglatin_ref1.html +0 -1103
  275. data/docpages/pig/piglatin_ref2.html +0 -14340
  276. data/docpages/pig/setup.html +0 -505
  277. data/docpages/pig/skin/basic.css +0 -166
  278. data/docpages/pig/skin/breadcrumbs.js +0 -237
  279. data/docpages/pig/skin/fontsize.js +0 -166
  280. data/docpages/pig/skin/getBlank.js +0 -40
  281. data/docpages/pig/skin/getMenu.js +0 -45
  282. data/docpages/pig/skin/images/chapter.gif +0 -0
  283. data/docpages/pig/skin/images/chapter_open.gif +0 -0
  284. data/docpages/pig/skin/images/current.gif +0 -0
  285. data/docpages/pig/skin/images/external-link.gif +0 -0
  286. data/docpages/pig/skin/images/header_white_line.gif +0 -0
  287. data/docpages/pig/skin/images/page.gif +0 -0
  288. data/docpages/pig/skin/images/pdfdoc.gif +0 -0
  289. data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  290. data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  291. data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  292. data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  293. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  294. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  295. data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  296. data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  297. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  298. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  299. data/docpages/pig/skin/print.css +0 -54
  300. data/docpages/pig/skin/profile.css +0 -181
  301. data/docpages/pig/skin/screen.css +0 -587
  302. data/docpages/pig/tutorial.html +0 -1059
  303. data/docpages/pig/udf.html +0 -1509
  304. data/docpages/tutorial.textile +0 -283
  305. data/docpages/usage.textile +0 -195
  306. data/docpages/wutils.textile +0 -263
  307. data/examples/dataflow/complex.rb +0 -11
  308. data/examples/dataflow/donuts.rb +0 -13
  309. data/examples/tiny_count/jabberwocky_output.tsv +0 -92
  310. data/examples/word_count.rb +0 -48
  311. data/examples/workflow/fiddle.rb +0 -24
  312. data/lib/away/escapement.rb +0 -129
  313. data/lib/away/exe.rb +0 -11
  314. data/lib/away/experimental.rb +0 -5
  315. data/lib/away/from_file.rb +0 -52
  316. data/lib/away/job.rb +0 -56
  317. data/lib/away/job/rake_compat.rb +0 -17
  318. data/lib/away/registry.rb +0 -79
  319. data/lib/away/runner.rb +0 -276
  320. data/lib/away/runner/execute.rb +0 -121
  321. data/lib/away/script.rb +0 -161
  322. data/lib/away/script/hadoop_command.rb +0 -240
  323. data/lib/away/source/file_list_source.rb +0 -15
  324. data/lib/away/source/looper.rb +0 -18
  325. data/lib/away/task.rb +0 -219
  326. data/lib/hanuman/action.rb +0 -21
  327. data/lib/hanuman/chain.rb +0 -4
  328. data/lib/hanuman/graphviz.rb +0 -74
  329. data/lib/hanuman/resource.rb +0 -6
  330. data/lib/hanuman/slot.rb +0 -87
  331. data/lib/hanuman/slottable.rb +0 -220
  332. data/lib/wukong/bad_record.rb +0 -15
  333. data/lib/wukong/event.rb +0 -44
  334. data/lib/wukong/local_runner.rb +0 -55
  335. data/lib/wukong/mapred.rb +0 -3
  336. data/lib/wukong/universe.rb +0 -48
  337. data/lib/wukong/widget/filter.rb +0 -81
  338. data/lib/wukong/widget/gibberish.rb +0 -123
  339. data/lib/wukong/widget/monitor.rb +0 -26
  340. data/lib/wukong/widget/reducer.rb +0 -66
  341. data/lib/wukong/widget/stringifier.rb +0 -50
  342. data/lib/wukong/workflow.rb +0 -22
  343. data/lib/wukong/workflow/command.rb +0 -42
  344. data/old/config/emr-example.yaml +0 -48
  345. data/old/examples/README.txt +0 -17
  346. data/old/examples/contrib/jeans/README.markdown +0 -165
  347. data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
  348. data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
  349. data/old/examples/contrib/jeans/data/sizes +0 -3
  350. data/old/examples/contrib/jeans/normalize.rb +0 -20
  351. data/old/examples/contrib/jeans/sizes.rb +0 -55
  352. data/old/examples/corpus/bnc_word_freq.rb +0 -44
  353. data/old/examples/corpus/bucket_counter.rb +0 -47
  354. data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
  355. data/old/examples/corpus/sentence_bigrams.rb +0 -53
  356. data/old/examples/corpus/sentence_coocurrence.rb +0 -66
  357. data/old/examples/corpus/stopwords.rb +0 -138
  358. data/old/examples/corpus/words_to_bigrams.rb +0 -53
  359. data/old/examples/emr/README.textile +0 -110
  360. data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
  361. data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
  362. data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
  363. data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
  364. data/old/examples/network_graph/adjacency_list.rb +0 -74
  365. data/old/examples/network_graph/breadth_first_search.rb +0 -72
  366. data/old/examples/network_graph/gen_2paths.rb +0 -68
  367. data/old/examples/network_graph/gen_multi_edge.rb +0 -112
  368. data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
  369. data/old/examples/pagerank/README.textile +0 -6
  370. data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
  371. data/old/examples/pagerank/pagerank.rb +0 -72
  372. data/old/examples/pagerank/pagerank_initialize.rb +0 -42
  373. data/old/examples/pagerank/run_pagerank.sh +0 -21
  374. data/old/examples/sample_records.rb +0 -33
  375. data/old/examples/server_logs/apache_log_parser.rb +0 -15
  376. data/old/examples/server_logs/nook.rb +0 -48
  377. data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
  378. data/old/examples/server_logs/user_agent.rb +0 -40
  379. data/old/examples/simple_word_count.rb +0 -82
  380. data/old/examples/size.rb +0 -61
  381. data/old/examples/stats/avg_value_frequency.rb +0 -86
  382. data/old/examples/stats/binning_percentile_estimator.rb +0 -140
  383. data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
  384. data/old/examples/stats/rank_and_bin.rb +0 -173
  385. data/old/examples/stupidly_simple_filter.rb +0 -40
  386. data/old/examples/word_count.rb +0 -75
  387. data/old/graph/graphviz_builder.rb +0 -580
  388. data/old/graph_easy/Attributes.pm +0 -4181
  389. data/old/graph_easy/Graphviz.pm +0 -2232
  390. data/old/wukong.rb +0 -18
  391. data/old/wukong/and_pig.rb +0 -38
  392. data/old/wukong/bad_record.rb +0 -18
  393. data/old/wukong/datatypes.rb +0 -24
  394. data/old/wukong/datatypes/enum.rb +0 -127
  395. data/old/wukong/datatypes/fake_types.rb +0 -17
  396. data/old/wukong/decorator.rb +0 -28
  397. data/old/wukong/encoding/asciize.rb +0 -108
  398. data/old/wukong/extensions.rb +0 -16
  399. data/old/wukong/extensions/array.rb +0 -18
  400. data/old/wukong/extensions/blank.rb +0 -93
  401. data/old/wukong/extensions/class.rb +0 -189
  402. data/old/wukong/extensions/date_time.rb +0 -53
  403. data/old/wukong/extensions/emittable.rb +0 -69
  404. data/old/wukong/extensions/enumerable.rb +0 -79
  405. data/old/wukong/extensions/hash.rb +0 -167
  406. data/old/wukong/extensions/hash_keys.rb +0 -16
  407. data/old/wukong/extensions/hash_like.rb +0 -150
  408. data/old/wukong/extensions/hashlike_class.rb +0 -47
  409. data/old/wukong/extensions/module.rb +0 -2
  410. data/old/wukong/extensions/pathname.rb +0 -27
  411. data/old/wukong/extensions/string.rb +0 -65
  412. data/old/wukong/extensions/struct.rb +0 -17
  413. data/old/wukong/extensions/symbol.rb +0 -11
  414. data/old/wukong/filename_pattern.rb +0 -74
  415. data/old/wukong/helper.rb +0 -7
  416. data/old/wukong/helper/stopwords.rb +0 -195
  417. data/old/wukong/helper/tokenize.rb +0 -35
  418. data/old/wukong/logger.rb +0 -38
  419. data/old/wukong/periodic_monitor.rb +0 -72
  420. data/old/wukong/schema.rb +0 -269
  421. data/old/wukong/script.rb +0 -286
  422. data/old/wukong/script/avro_command.rb +0 -5
  423. data/old/wukong/script/cassandra_loader_script.rb +0 -40
  424. data/old/wukong/script/emr_command.rb +0 -168
  425. data/old/wukong/script/hadoop_command.rb +0 -237
  426. data/old/wukong/script/local_command.rb +0 -41
  427. data/old/wukong/store.rb +0 -10
  428. data/old/wukong/store/base.rb +0 -27
  429. data/old/wukong/store/cassandra.rb +0 -10
  430. data/old/wukong/store/cassandra/streaming.rb +0 -75
  431. data/old/wukong/store/cassandra/struct_loader.rb +0 -21
  432. data/old/wukong/store/cassandra_model.rb +0 -91
  433. data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
  434. data/old/wukong/store/chunked_flat_file_store.rb +0 -48
  435. data/old/wukong/store/conditional_store.rb +0 -57
  436. data/old/wukong/store/factory.rb +0 -8
  437. data/old/wukong/store/flat_file_store.rb +0 -89
  438. data/old/wukong/store/key_store.rb +0 -51
  439. data/old/wukong/store/null_store.rb +0 -15
  440. data/old/wukong/store/read_thru_store.rb +0 -22
  441. data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
  442. data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
  443. data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
  444. data/old/wukong/streamer.rb +0 -30
  445. data/old/wukong/streamer/accumulating_reducer.rb +0 -83
  446. data/old/wukong/streamer/base.rb +0 -126
  447. data/old/wukong/streamer/counting_reducer.rb +0 -25
  448. data/old/wukong/streamer/filter.rb +0 -20
  449. data/old/wukong/streamer/instance_streamer.rb +0 -15
  450. data/old/wukong/streamer/json_streamer.rb +0 -21
  451. data/old/wukong/streamer/line_streamer.rb +0 -12
  452. data/old/wukong/streamer/list_reducer.rb +0 -31
  453. data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
  454. data/old/wukong/streamer/record_streamer.rb +0 -14
  455. data/old/wukong/streamer/reducer.rb +0 -11
  456. data/old/wukong/streamer/set_reducer.rb +0 -14
  457. data/old/wukong/streamer/struct_streamer.rb +0 -48
  458. data/old/wukong/streamer/summing_reducer.rb +0 -29
  459. data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
  460. data/old/wukong/typed_struct.rb +0 -12
  461. data/spec/away/encoding_spec.rb +0 -32
  462. data/spec/away/exe_spec.rb +0 -20
  463. data/spec/away/flow_spec.rb +0 -82
  464. data/spec/away/graph_spec.rb +0 -6
  465. data/spec/away/job_spec.rb +0 -15
  466. data/spec/away/rake_compat_spec.rb +0 -9
  467. data/spec/away/script_spec.rb +0 -81
  468. data/spec/hanuman/graphviz_spec.rb +0 -29
  469. data/spec/hanuman/slot_spec.rb +0 -2
  470. data/spec/support/examples_helper.rb +0 -10
  471. data/spec/support/streamer_test_helpers.rb +0 -6
  472. data/spec/support/wukong_widget_helpers.rb +0 -66
  473. data/spec/wukong/processor_spec.rb +0 -109
  474. data/spec/wukong/widget/filter_spec.rb +0 -99
  475. data/spec/wukong/widget/stringifier_spec.rb +0 -51
  476. data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -1,68 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'rubygems'
3
- require 'wukong/script'
4
-
5
- class Edge < Struct.new(:src, :dest)
6
- end
7
-
8
- class MultiEdge < Struct.new(
9
- :src, :dest,
10
- :a_follows_b, :b_follows_a,
11
- :a_replies_b, :b_replies_a,
12
- :a_favorites_b, :b_favorites_a
13
- )
14
- end
15
-
16
- module Gen1HoodEdges
17
- class Mapper < Wukong::Streamer::Base
18
- def process rsrc, src, dest
19
- # next if (src.to_i == 0) || (dest.to_i == 0)
20
- yield [ dest, 'i', src ]
21
- yield [ src, 'o', dest]
22
- end
23
- end
24
-
25
- #
26
- # Accumulate ( !!in memory!!) all inbound links onto middle node
27
- #
28
- # Then for each outbound link, loop over those inbound links and emit the
29
- # triple (in, mid,out)
30
- #
31
- class Reducer < Wukong::Streamer::AccumulatingReducer
32
- attr_accessor :ins
33
- def start! *args
34
- self.ins = []
35
- end
36
- def accumulate mid, dir, node
37
- case dir
38
- when 'i'
39
- self.ins << node
40
- if (self.ins.length % 1000 == 0) && (self.ins.length > 10000)
41
- $stderr.puts ["Accumulating:", mid, self.ins.length].join("\t")
42
- end
43
- when 'o'
44
- ins.each do |inn|
45
- yield ['path_2', inn, mid, node]
46
- end
47
- end
48
- end
49
- def finalize
50
- end
51
- def get_key mid, *_
52
- mid
53
- end
54
- end
55
-
56
- class Script < Wukong::Script
57
- def default_options
58
- super.merge :sort_fields => 2, :partition_fields => 1
59
- end
60
- end
61
-
62
- end
63
-
64
- # Execute the script
65
- Gen1HoodEdges::Script.new(
66
- Gen1HoodEdges::Mapper,
67
- Gen1HoodEdges::Reducer
68
- ).run
@@ -1,112 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'rubygems'
3
- require 'wukong'
4
-
5
- #
6
- # Takes any number of flavors of directed edge with the form
7
- #
8
- # a_relatesto_b src_id dest_id [optional fields]
9
- #
10
- # and prepares a combined adjacency list. You need to supply a model named
11
- # "MultiEdge" with members for each edge type.
12
- #
13
- # For instance, suppose you have a social network with edges like
14
- #
15
- # a_follows_b user_a_id user_b_id
16
- # a_messages_b user_a_id user_b_id message_id date
17
- # a_favorites_b user_a_id user_b_id message_id date
18
- #
19
- # Your MultiEdge class might look like
20
- #
21
- # class MultiEdge < Struct(
22
- # :src, :dest,
23
- # :a_follows_b, :b_follows_a,
24
- # :a_messages_b, :b_messages_a,
25
- # :a_favorites_b, :b_favorites_a
26
- # )
27
- # end
28
- #
29
- # The row for a user pair who follows each other; with user_a #24601 messaging b
30
- # 57 times and favoriting 5 of user_b's messages; and user_b #8675309 messaging
31
- # 62 times and favoriting none, will emerge as (tab separated, with [blank]
32
- # indicating there is no text in that slot):
33
- #
34
- # ...
35
- # 24601 8675309 1 1 57 62 5 [blank]
36
- # ...
37
- #
38
- module GenMultiEdge
39
- #
40
- # Emit each relation as
41
- #
42
- # src dest rel
43
- #
44
- # Canonicalizes the src and dest ids to 10-character, zero-padded strings.
45
- # (Ten chars fits a 32-bit up-to-4-billion-and-change unsigned integer.)
46
- # Discards all the ancillary crap except +src+, +dest+ and +rel+
47
- #
48
- class Mapper < Wukong::Streamer::Base
49
- def process rsrc, src, dest, *_
50
- # note that a_retweets_b_id matches here
51
- m = /^a_([a-z]+)_b.*/.match(rsrc) or return
52
- rel = m.captures.first
53
- src = src.to_i ; dest = dest.to_i
54
- return if ((src == 0) || (dest == 0))
55
- yield [src, dest, "a_#{rel}_b"]
56
- yield [dest, src, "b_#{rel}_a"]
57
- end
58
- end
59
-
60
- #
61
- # Aggregate all sightings of relations for each pair into
62
- # a single combined
63
- #
64
- # Note that [a,b] and [b,a] /each/ have a listing, with the a->b and b<-a
65
- # relations repeated for each. That is, if there is an "a_messages_b"
66
- # relation, you'll have edges
67
- #
68
- # x y ... a_messages_b(x,y) b_messages_a(y,x) ...
69
- # y x ... a_messages_b(y,x) b_messages_a(x,y) ...
70
- #
71
- #
72
- class Reducer < Wukong::Streamer::AccumulatingReducer
73
- attr_accessor :multi_edge
74
- def get_key src, dest, rel
75
- [src, dest]
76
- end
77
- def start! *args
78
- self.multi_edge = MultiEdge.new
79
- end
80
- def accumulate src, dest, rel
81
- self.multi_edge[rel] ||= 0
82
- self.multi_edge[rel] += 1
83
- end
84
- def finalize
85
- multi_edge.src, multi_edge.dest = key
86
- yield self.multi_edge
87
- end
88
- end
89
- end
90
-
91
- Edge = TypedStruct.new(
92
- [:src, Integer],
93
- [:dest, Integer]
94
- )
95
-
96
- MultiEdge = TypedStruct.new(
97
- [:src, Integer],
98
- [:dest, Integer],
99
- [:a_follows_b, Integer],
100
- [:b_follows_a, Integer],
101
- [:a_replies_b, Integer],
102
- [:b_replies_a, Integer],
103
- [:a_atsigns_b, Integer],
104
- [:b_atsigns_a, Integer],
105
- [:a_retweets_b, Integer],
106
- [:b_retweets_a, Integer],
107
- [:a_favorites_b, Integer],
108
- [:b_favorites_a, Integer]
109
- )
110
-
111
- # Execute the script
112
- Script.new(Mapper, Reducer, :sort_fields => 2).run
@@ -1,64 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'rubygems'
3
- $: << File.dirname(__FILE__)+'/../../lib'
4
- require 'wukong'
5
-
6
- class Edge < Struct.new(:src, :dest)
7
- end
8
-
9
- class ASymmetricB < Edge
10
- end
11
-
12
- module Wukong::Streamer
13
- class EdgeStreamer < Wukong::Streamer::Base
14
- def recordize line
15
- rsrc, src, dest, *_ = super(line)
16
- [ASymmetricB.new(src.to_i, dest.to_i)]
17
- end
18
- end
19
- end
20
-
21
- #
22
- # Find symmetric links
23
- #
24
- # Takes adjacency list for a directed graph and emits only edges where
25
- # A->B and B->A
26
- #
27
- # The output will list each such symmetric edge exactly once as
28
- # a_symmetric_b node1 node2
29
- # where node1 is lexicographically less than node2.
30
- #
31
- module FindSymmetricLinks
32
-
33
- class Mapper < Wukong::Streamer::EdgeStreamer
34
- def process edge
35
- yield edge.to_flat(false)
36
- yield ASymmetricB.new(edge.dest, edge.src).to_flat(false)
37
- end
38
- end
39
-
40
- #
41
- #
42
- class Reducer < Wukong::Streamer::Base
43
- def stream
44
- %x{/usr/bin/uniq -c}.split("\n").each do |line|
45
- key_count, rsrc, src, dest, data = line.chomp.strip.split(/\s+/, 4)
46
- next unless key_count.to_i == 2
47
- next unless src.to_i < dest.to_i
48
- emit [src, dest, data].compact
49
- end
50
- end
51
- end
52
-
53
- class Script < Wukong::Script
54
- def default_options
55
- super.merge :sort_fields => 3
56
- end
57
- end
58
- end
59
-
60
- # Execute the script
61
- Wukong::Script.new(
62
- FindSymmetricLinks::Mapper,
63
- FindSymmetricLinks::Reducer
64
- ).run
@@ -1,6 +0,0 @@
1
-
2
- Calculate pagerank for a tab-separated adjacency list.
3
-
4
- See
5
- * Heretrix pagerank util
6
- * http://www.umiacs.umd.edu/~jimmylin/cloud9/docs/exercises/pagerank.html
@@ -1,57 +0,0 @@
1
-
2
- -- ===========================================================================
3
- --
4
- -- Load Graph
5
- --
6
- AFollowsB = LOAD 'twnew/all/a_follows_b' AS (rsrc: chararray, user_a_id: int, user_b_id: int) ;
7
- FollEdges_0 = FOREACH AFollowsB GENERATE user_a_id AS src, user_b_id AS dest ;
8
-
9
- InitPagerankFoll_0 = GROUP FollEdges_0 BY src ;
10
- InitPagerankFoll_1 = FOREACH InitPagerankFoll_0 GENERATE
11
- group AS src,
12
- 1.0F AS pagerank:float,
13
- FollEdges_0.(dest) AS dests
14
- ;
15
- rmf twnew/pagerank-foll/pagerank_graph_000 ;
16
- STORE InitPagerankFoll_1 INTO 'twnew/pagerank-foll/pagerank_graph_000';
17
-
18
-
19
- -- MultiEdge = LOAD 'twnew/all/multi_edge' AS (
20
- -- rsrc: chararray, src: int, dest: int,
21
- -- fo: int, fr: int,
22
- -- re_out: int, re_in: int,
23
- -- at_out: int, at_in: int,
24
- -- rt_out: int, rt_in: int,
25
- -- fv_out: int, fv_in: int) ;
26
- --
27
- -- SymmEdges_0 = FOREACH MultiEdge GENERATE src, dest, fo, fr ;
28
- -- SymmEdges_1 = FILTER SymmEdges_0 BY (fo >= 1.0) AND (fr >= 1.0) ;
29
- -- SymmEdges = FOREACH SymmEdges_1 GENERATE src, dest ;
30
- -- -- rm twnew/graphs/symm_edges; STORE SymmEdges INTO 'twnew/graphs/symm_edges' ;
31
- -- SymmEdges = LOAD 'twnew/graphs/symm_edges' AS (src:int , dest:int);
32
- --
33
- -- AnyoutEdges_0 = FOREACH MultiEdge GENERATE src, dest, fo, re_out, fv_out ;
34
- -- AnyoutEdges_1 = FILTER AnyoutEdges_0 BY (fo >= 1.0) OR (re_out >= 1.0) OR (fv_out >= 1.0) ;
35
- -- AnyoutEdges = FOREACH AnyoutEdges_1 GENERATE src, dest ;
36
- -- -- rm twnew/graphs/anyout_edges; STORE AnyoutEdges INTO 'twnew/graphs/anyout_edges' ;
37
- -- AnyoutEdges = LOAD 'twnew/graphs/anyout_edges' AS (src:int , dest:int);
38
- --
39
- --
40
- -- InitPagerankSymm_0 = GROUP SymmEdges BY src ;
41
- -- InitPagerankSymm_1 = FOREACH InitPagerankSymm_0 GENERATE
42
- -- group AS src,
43
- -- 1.0F AS pagerank:float,
44
- -- SymmEdges.(dest) AS dests
45
- -- ;
46
- -- rm twnew/pagerank-symm/pagerank_graph_000 ;
47
- -- STORE InitPagerankSymm_1 INTO 'twnew/pagerank-symm/pagerank_graph_000';
48
- --
49
- --
50
- -- InitPagerankAnyout_0 = GROUP AnyoutEdges BY src ;
51
- -- InitPagerankAnyout_1 = FOREACH InitPagerankAnyout_0 GENERATE
52
- -- group AS src,
53
- -- 1.0F AS pagerank:float,
54
- -- AnyoutEdges.(dest) AS dests
55
- -- ;
56
- -- rm twnew/pagerank-anyout/pagerank_graph_000 ;
57
- -- STORE InitPagerankAnyout_1 INTO 'twnew/pagerank-anyout/pagerank_graph_000';
@@ -1,72 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'rubygems'
3
- require 'wukong/script'
4
-
5
- module PageRank
6
- #
7
- # Damping factor (prob. of a 'random' jump)
8
- # 0.85 works well in practice. See http://en.wikipedia.org/wiki/Pagerank
9
- #
10
- DAMPING_FACTOR = 0.85
11
-
12
- # Each user's line looks like
13
- # user_a pagerank id1,id2,...,idN
14
- # we need to disperse this user's pagerank to each of id1..idN, and
15
- # rendezvous the list of outbound links at user_a's reducer as well.
16
- module Iterating
17
- class PagerankMapper < Wukong::Streamer::Base
18
- #
19
- # Send pagerank to each page, and send the dests list back to self
20
- #
21
- def process src, pagerank, dests_str, &block
22
- # This lets us use Pig to generate the input
23
- dests_str = dests_str.gsub(/[\(\{\}\)]/, '')
24
- dests = dests_str.split(",")
25
- yield_pagerank_shares src, pagerank, dests, &block
26
- yield_own_dest_list src, dests_str, &block
27
- end
28
-
29
- # Take the source node's pagerank and distribute it among all the out-nodes
30
- def yield_pagerank_shares src, pagerank, dests
31
- pagerank_share = pagerank.to_f / dests.length
32
- dests.each do |dest|
33
- yield [dest, 'p', pagerank_share]
34
- end
35
- end
36
-
37
- # Dispatch this user's out-node list to rendezvous with itself.
38
- def yield_own_dest_list src, dests_str
39
- yield [src, 'd', dests_str]
40
- end
41
- end
42
-
43
- class PagerankReducer < Wukong::Streamer::AccumulatingReducer
44
- attr_accessor :node_id, :pagerank, :dests_str
45
- # Begin reduction with 0 accumulated pagerank and no dests as yet
46
- def start! node_id, *args
47
- self.node_id = node_id
48
- self.pagerank = 0.0
49
- self.dests_str = nil
50
- end
51
- # We'll receive fractional pagerank from all incoming edges,
52
- # and the destination list from this node's map stage
53
- def accumulate node_id, what, val
54
- case what
55
- when 'p' then self.pagerank += val.to_f
56
- when 'd' then self.dests_str = val
57
- else raise "Don't know how to accumulate #{[node_id, what, val].inspect}"
58
- end
59
- end
60
- # To finalize, dump the damped pagerank and dest list
61
- # in a form that can be fed back into this script
62
- def finalize
63
- damped_pagerank = (self.pagerank * DAMPING_FACTOR) + (1 - DAMPING_FACTOR)
64
- self.dests_str = 'dummy' if self.dests_str.blank?
65
- yield [node_id, damped_pagerank, dests_str]
66
- end
67
- end
68
-
69
- Wukong.run(PagerankMapper, PagerankReducer,
70
- :extra_args => ' -jobconf io.sort.record.percent=0.25 ')
71
- end
72
- end
@@ -1,42 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'rubygems'
3
- require 'wukong/script'
4
- require 'wukong/streamer/list_reducer'
5
-
6
- module PageRank
7
- class Script < Wukong::Script
8
- #
9
- # Input format is
10
- #
11
- # rsrc src_id dest_id [... junk ...]
12
- #
13
- # All we want from the line are its src and dest IDs.
14
- #
15
- def map_command
16
- %Q{/usr/bin/cut -d"\t" -f2,3}
17
- end
18
- end
19
-
20
- #
21
- # Accumulate the dests list in memory, dump as a whole. Multiple edges between
22
- # any two nodes are permitted, and will accumulate pagerank according to the
23
- # edge's multiplicity.
24
- #
25
- class Reducer < Wukong::Streamer::ListReducer
26
- def accumulate src, dest
27
- @values << dest
28
- end
29
-
30
- # Emit src, initial pagerank, and flattened dests list
31
- def finalize
32
- @values = ['dummy'] if @values.blank?
33
- yield [key, 1.0, @values.to_a.join(",")]
34
- end
35
- end
36
-
37
- # Execute the script
38
- Script.new(nil, PageRank::Reducer, :io_sort_record_percent => 0.25).run
39
- end
40
-
41
-
42
-
@@ -1,21 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- # Directory to pagerank on.
4
- work_dir=$1 ; shift
5
- if [ "$work_dir" == '' ] ; then echo "Please specify the parent of the directory made by gen_initial_pagerank: $0 initial_dir [number_of_iterations] [start_iteration]" ; exit ; fi
6
- # How many rounds to run: default 10
7
- n_iters=${1-10} ; shift
8
- # the iteration to start with: default 0
9
- start_i=${1-0} ; shift
10
-
11
- # this directory
12
- script_dir="`dirname $0`"
13
-
14
- for (( iter=0 ; "$iter" < "$n_iters" ; iter++ )) ; do
15
- curr_str=`printf "%03d" $(( $start_i + $iter ))`
16
- next_str=`printf "%03d" $(( $start_i + $iter + 1 ))`
17
- curr_dir=$work_dir/pagerank_graph_${curr_str}
18
- next_dir=$work_dir/pagerank_graph_${next_str}
19
- echo -e "Iteration $(( $iter + 1 )) / $n_iters:\t `basename $curr_dir` => `basename $next_dir`"
20
- $script_dir/pagerank.rb --rm --run $curr_dir $next_dir
21
- done