wukong 3.0.0.pre → 3.0.0.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (476) hide show
  1. data/.gitignore +46 -33
  2. data/.gitmodules +3 -0
  3. data/.rspec +1 -1
  4. data/.travis.yml +8 -1
  5. data/.yardopts +0 -13
  6. data/Guardfile +4 -6
  7. data/{LICENSE.textile → LICENSE.md} +43 -55
  8. data/README-old.md +422 -0
  9. data/README.md +279 -418
  10. data/Rakefile +21 -5
  11. data/TODO.md +6 -6
  12. data/bin/wu-clean-encoding +31 -0
  13. data/bin/wu-lign +2 -2
  14. data/bin/wu-local +69 -0
  15. data/bin/wu-server +70 -0
  16. data/examples/Gemfile +38 -0
  17. data/examples/README.md +9 -0
  18. data/examples/dataflow/apache_log_line.rb +64 -25
  19. data/examples/dataflow/fibonacci_series.rb +101 -0
  20. data/examples/dataflow/parse_apache_logs.rb +37 -7
  21. data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
  22. data/examples/dataflow/simple.rb +4 -4
  23. data/examples/geo.rb +4 -0
  24. data/examples/geo/geo_grids.numbers +0 -0
  25. data/examples/geo/geolocated.rb +331 -0
  26. data/examples/geo/quadtile.rb +69 -0
  27. data/examples/geo/spec/geolocated_spec.rb +247 -0
  28. data/examples/geo/tile_fetcher.rb +77 -0
  29. data/examples/graph/minimum_spanning_tree.rb +61 -61
  30. data/examples/jabberwocky.txt +36 -0
  31. data/examples/models/wikipedia.rb +20 -0
  32. data/examples/munging/Gemfile +8 -0
  33. data/examples/munging/airline_flights/airline.rb +57 -0
  34. data/examples/munging/airline_flights/airline_flights.rake +83 -0
  35. data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
  36. data/examples/munging/airline_flights/airport.rb +211 -0
  37. data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
  38. data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
  39. data/examples/munging/airline_flights/flight.rb +156 -0
  40. data/examples/munging/airline_flights/models.rb +4 -0
  41. data/examples/munging/airline_flights/parse.rb +26 -0
  42. data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
  43. data/examples/munging/airline_flights/route.rb +35 -0
  44. data/examples/munging/airline_flights/tasks.rake +83 -0
  45. data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
  46. data/examples/munging/airline_flights/topcities.rb +167 -0
  47. data/examples/munging/airports/40_wbans.txt +40 -0
  48. data/examples/munging/airports/filter_weather_reports.rb +37 -0
  49. data/examples/munging/airports/join.pig +31 -0
  50. data/examples/munging/airports/to_tsv.rb +33 -0
  51. data/examples/munging/airports/usa_wbans.pig +19 -0
  52. data/examples/munging/airports/usa_wbans.txt +2157 -0
  53. data/examples/munging/airports/wbans.pig +19 -0
  54. data/examples/munging/airports/wbans.txt +2310 -0
  55. data/examples/munging/geo/geo_json.rb +54 -0
  56. data/examples/munging/geo/geo_models.rb +69 -0
  57. data/examples/munging/geo/geonames_models.rb +78 -0
  58. data/examples/munging/geo/iso_codes.rb +172 -0
  59. data/examples/munging/geo/reconcile_countries.rb +124 -0
  60. data/examples/munging/geo/tasks.rake +71 -0
  61. data/examples/munging/rake_helper.rb +62 -0
  62. data/examples/munging/weather/.gitignore +1 -0
  63. data/examples/munging/weather/Gemfile +4 -0
  64. data/examples/munging/weather/Rakefile +28 -0
  65. data/examples/munging/weather/extract_ish.rb +13 -0
  66. data/examples/munging/weather/models/weather.rb +119 -0
  67. data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
  68. data/examples/munging/wikipedia/README.md +34 -0
  69. data/examples/munging/wikipedia/Rakefile +193 -0
  70. data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
  71. data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
  72. data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
  73. data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
  74. data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
  75. data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
  76. data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
  77. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
  78. data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
  79. data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
  80. data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
  81. data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
  82. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
  83. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
  84. data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
  85. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
  86. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
  87. data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
  88. data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
  89. data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
  90. data/examples/munging/wikipedia/pig_style_guide.md +25 -0
  91. data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
  92. data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
  93. data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
  94. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
  95. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
  96. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
  97. data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
  98. data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
  99. data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
  100. data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
  101. data/examples/munging/wikipedia/utils/namespaces.json +1 -0
  102. data/examples/rake_helper.rb +85 -0
  103. data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
  104. data/examples/server_logs/logline.rb +95 -0
  105. data/examples/server_logs/models.rb +66 -0
  106. data/examples/server_logs/page_counts.pig +48 -0
  107. data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
  108. data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
  109. data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
  110. data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
  111. data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
  112. data/examples/string_reverser.rb +26 -0
  113. data/examples/text/pig_latin.rb +2 -2
  114. data/examples/text/regional_flavor/README.md +14 -0
  115. data/examples/text/regional_flavor/article_wordbags.pig +39 -0
  116. data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
  117. data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
  118. data/examples/word_count/accumulator.rb +26 -0
  119. data/examples/word_count/tokenizer.rb +13 -0
  120. data/examples/word_count/word_count.rb +6 -0
  121. data/examples/workflow/cherry_pie.dot +97 -0
  122. data/examples/workflow/cherry_pie.png +0 -0
  123. data/examples/workflow/cherry_pie.rb +61 -26
  124. data/lib/hanuman.rb +34 -7
  125. data/lib/hanuman/graph.rb +55 -31
  126. data/lib/hanuman/graphvizzer.rb +199 -178
  127. data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
  128. data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
  129. data/lib/hanuman/link.rb +35 -0
  130. data/lib/hanuman/registry.rb +46 -0
  131. data/lib/hanuman/stage.rb +76 -32
  132. data/lib/wukong.rb +23 -24
  133. data/lib/wukong/boot.rb +87 -0
  134. data/lib/wukong/configuration.rb +8 -0
  135. data/lib/wukong/dataflow.rb +45 -78
  136. data/lib/wukong/driver.rb +99 -0
  137. data/lib/wukong/emitter.rb +22 -0
  138. data/lib/wukong/model/faker.rb +24 -24
  139. data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
  140. data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
  141. data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
  142. data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
  143. data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
  144. data/lib/wukong/processor.rb +60 -114
  145. data/lib/wukong/spec_helpers.rb +81 -0
  146. data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
  147. data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
  148. data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
  149. data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
  150. data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
  151. data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
  152. data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
  153. data/lib/wukong/version.rb +2 -1
  154. data/lib/wukong/widget/filters.rb +311 -0
  155. data/lib/wukong/widget/processors.rb +156 -0
  156. data/lib/wukong/widget/reducers.rb +7 -0
  157. data/lib/wukong/widget/reducers/accumulator.rb +73 -0
  158. data/lib/wukong/widget/reducers/bin.rb +318 -0
  159. data/lib/wukong/widget/reducers/count.rb +61 -0
  160. data/lib/wukong/widget/reducers/group.rb +85 -0
  161. data/lib/wukong/widget/reducers/group_concat.rb +70 -0
  162. data/lib/wukong/widget/reducers/moments.rb +72 -0
  163. data/lib/wukong/widget/reducers/sort.rb +130 -0
  164. data/lib/wukong/widget/serializers.rb +287 -0
  165. data/lib/wukong/widget/sink.rb +10 -52
  166. data/lib/wukong/widget/source.rb +7 -113
  167. data/lib/wukong/widget/utils.rb +46 -0
  168. data/lib/wukong/widgets.rb +6 -0
  169. data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
  170. data/spec/examples/dataflow/parsing_spec.rb +12 -11
  171. data/spec/examples/dataflow/simple_spec.rb +32 -6
  172. data/spec/examples/dataflow/telegram_spec.rb +36 -36
  173. data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
  174. data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
  175. data/spec/examples/munging/airline_flights_spec.rb +202 -0
  176. data/spec/examples/text/pig_latin_spec.rb +13 -16
  177. data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
  178. data/spec/hanuman/graph_spec.rb +27 -2
  179. data/spec/hanuman/hanuman_spec.rb +10 -0
  180. data/spec/hanuman/registry_spec.rb +123 -0
  181. data/spec/hanuman/stage_spec.rb +61 -7
  182. data/spec/spec_helper.rb +29 -19
  183. data/spec/support/hanuman_test_helpers.rb +14 -12
  184. data/spec/support/shared_context_for_reducers.rb +37 -0
  185. data/spec/support/shared_examples_for_builders.rb +101 -0
  186. data/spec/support/shared_examples_for_shortcuts.rb +57 -0
  187. data/spec/support/wukong_test_helpers.rb +37 -11
  188. data/spec/wukong/dataflow_spec.rb +77 -55
  189. data/spec/wukong/local_runner_spec.rb +24 -24
  190. data/spec/wukong/model/faker_spec.rb +132 -131
  191. data/spec/wukong/runner_spec.rb +8 -8
  192. data/spec/wukong/widget/filters_spec.rb +61 -0
  193. data/spec/wukong/widget/processors_spec.rb +126 -0
  194. data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
  195. data/spec/wukong/widget/reducers/count_spec.rb +11 -0
  196. data/spec/wukong/widget/reducers/group_spec.rb +20 -0
  197. data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
  198. data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
  199. data/spec/wukong/widget/serializers_spec.rb +92 -0
  200. data/spec/wukong/widget/sink_spec.rb +15 -15
  201. data/spec/wukong/widget/source_spec.rb +65 -41
  202. data/spec/wukong/wukong_spec.rb +10 -0
  203. data/wukong.gemspec +17 -10
  204. metadata +359 -335
  205. data/.document +0 -5
  206. data/VERSION +0 -1
  207. data/bin/hdp-bin +0 -44
  208. data/bin/hdp-bzip +0 -23
  209. data/bin/hdp-cat +0 -3
  210. data/bin/hdp-catd +0 -3
  211. data/bin/hdp-cp +0 -3
  212. data/bin/hdp-du +0 -86
  213. data/bin/hdp-get +0 -3
  214. data/bin/hdp-kill +0 -3
  215. data/bin/hdp-kill-task +0 -3
  216. data/bin/hdp-ls +0 -11
  217. data/bin/hdp-mkdir +0 -2
  218. data/bin/hdp-mkdirp +0 -12
  219. data/bin/hdp-mv +0 -3
  220. data/bin/hdp-parts_to_keys.rb +0 -77
  221. data/bin/hdp-ps +0 -3
  222. data/bin/hdp-put +0 -3
  223. data/bin/hdp-rm +0 -32
  224. data/bin/hdp-sort +0 -40
  225. data/bin/hdp-stream +0 -40
  226. data/bin/hdp-stream-flat +0 -22
  227. data/bin/hdp-stream2 +0 -39
  228. data/bin/hdp-sync +0 -17
  229. data/bin/hdp-wc +0 -67
  230. data/bin/wu-flow +0 -10
  231. data/bin/wu-map +0 -17
  232. data/bin/wu-red +0 -17
  233. data/bin/wukong +0 -17
  234. data/data/CREDITS.md +0 -355
  235. data/data/graph/airfares.tsv +0 -2174
  236. data/data/text/gift_of_the_magi.txt +0 -225
  237. data/data/text/jabberwocky.txt +0 -36
  238. data/data/text/rectification_of_names.txt +0 -33
  239. data/data/twitter/a_atsigns_b.tsv +0 -64
  240. data/data/twitter/a_follows_b.tsv +0 -53
  241. data/data/twitter/tweet.tsv +0 -167
  242. data/data/twitter/twitter_user.tsv +0 -55
  243. data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
  244. data/docpages/INSTALL.textile +0 -92
  245. data/docpages/LICENSE.textile +0 -107
  246. data/docpages/README-elastic_map_reduce.textile +0 -377
  247. data/docpages/README-performance.textile +0 -90
  248. data/docpages/README-wulign.textile +0 -65
  249. data/docpages/UsingWukong-part1-get_ready.textile +0 -17
  250. data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
  251. data/docpages/UsingWukong-part3-parsing.textile +0 -138
  252. data/docpages/_config.yml +0 -39
  253. data/docpages/avro/avro_notes.textile +0 -56
  254. data/docpages/avro/performance.textile +0 -36
  255. data/docpages/avro/tethering.textile +0 -19
  256. data/docpages/bigdata-tips.textile +0 -143
  257. data/docpages/code/api_response_example.txt +0 -20
  258. data/docpages/code/parser_skeleton.rb +0 -38
  259. data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
  260. data/docpages/favicon.ico +0 -0
  261. data/docpages/gem.css +0 -16
  262. data/docpages/hadoop-tips.textile +0 -83
  263. data/docpages/index.textile +0 -92
  264. data/docpages/intro.textile +0 -8
  265. data/docpages/moreinfo.textile +0 -174
  266. data/docpages/news.html +0 -24
  267. data/docpages/pig/PigLatinExpressionsList.txt +0 -122
  268. data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
  269. data/docpages/pig/commandline_params.txt +0 -26
  270. data/docpages/pig/cookbook.html +0 -481
  271. data/docpages/pig/images/hadoop-logo.jpg +0 -0
  272. data/docpages/pig/images/instruction_arrow.png +0 -0
  273. data/docpages/pig/images/pig-logo.gif +0 -0
  274. data/docpages/pig/piglatin_ref1.html +0 -1103
  275. data/docpages/pig/piglatin_ref2.html +0 -14340
  276. data/docpages/pig/setup.html +0 -505
  277. data/docpages/pig/skin/basic.css +0 -166
  278. data/docpages/pig/skin/breadcrumbs.js +0 -237
  279. data/docpages/pig/skin/fontsize.js +0 -166
  280. data/docpages/pig/skin/getBlank.js +0 -40
  281. data/docpages/pig/skin/getMenu.js +0 -45
  282. data/docpages/pig/skin/images/chapter.gif +0 -0
  283. data/docpages/pig/skin/images/chapter_open.gif +0 -0
  284. data/docpages/pig/skin/images/current.gif +0 -0
  285. data/docpages/pig/skin/images/external-link.gif +0 -0
  286. data/docpages/pig/skin/images/header_white_line.gif +0 -0
  287. data/docpages/pig/skin/images/page.gif +0 -0
  288. data/docpages/pig/skin/images/pdfdoc.gif +0 -0
  289. data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  290. data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  291. data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  292. data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  293. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  294. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  295. data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  296. data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  297. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  298. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  299. data/docpages/pig/skin/print.css +0 -54
  300. data/docpages/pig/skin/profile.css +0 -181
  301. data/docpages/pig/skin/screen.css +0 -587
  302. data/docpages/pig/tutorial.html +0 -1059
  303. data/docpages/pig/udf.html +0 -1509
  304. data/docpages/tutorial.textile +0 -283
  305. data/docpages/usage.textile +0 -195
  306. data/docpages/wutils.textile +0 -263
  307. data/examples/dataflow/complex.rb +0 -11
  308. data/examples/dataflow/donuts.rb +0 -13
  309. data/examples/tiny_count/jabberwocky_output.tsv +0 -92
  310. data/examples/word_count.rb +0 -48
  311. data/examples/workflow/fiddle.rb +0 -24
  312. data/lib/away/escapement.rb +0 -129
  313. data/lib/away/exe.rb +0 -11
  314. data/lib/away/experimental.rb +0 -5
  315. data/lib/away/from_file.rb +0 -52
  316. data/lib/away/job.rb +0 -56
  317. data/lib/away/job/rake_compat.rb +0 -17
  318. data/lib/away/registry.rb +0 -79
  319. data/lib/away/runner.rb +0 -276
  320. data/lib/away/runner/execute.rb +0 -121
  321. data/lib/away/script.rb +0 -161
  322. data/lib/away/script/hadoop_command.rb +0 -240
  323. data/lib/away/source/file_list_source.rb +0 -15
  324. data/lib/away/source/looper.rb +0 -18
  325. data/lib/away/task.rb +0 -219
  326. data/lib/hanuman/action.rb +0 -21
  327. data/lib/hanuman/chain.rb +0 -4
  328. data/lib/hanuman/graphviz.rb +0 -74
  329. data/lib/hanuman/resource.rb +0 -6
  330. data/lib/hanuman/slot.rb +0 -87
  331. data/lib/hanuman/slottable.rb +0 -220
  332. data/lib/wukong/bad_record.rb +0 -15
  333. data/lib/wukong/event.rb +0 -44
  334. data/lib/wukong/local_runner.rb +0 -55
  335. data/lib/wukong/mapred.rb +0 -3
  336. data/lib/wukong/universe.rb +0 -48
  337. data/lib/wukong/widget/filter.rb +0 -81
  338. data/lib/wukong/widget/gibberish.rb +0 -123
  339. data/lib/wukong/widget/monitor.rb +0 -26
  340. data/lib/wukong/widget/reducer.rb +0 -66
  341. data/lib/wukong/widget/stringifier.rb +0 -50
  342. data/lib/wukong/workflow.rb +0 -22
  343. data/lib/wukong/workflow/command.rb +0 -42
  344. data/old/config/emr-example.yaml +0 -48
  345. data/old/examples/README.txt +0 -17
  346. data/old/examples/contrib/jeans/README.markdown +0 -165
  347. data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
  348. data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
  349. data/old/examples/contrib/jeans/data/sizes +0 -3
  350. data/old/examples/contrib/jeans/normalize.rb +0 -20
  351. data/old/examples/contrib/jeans/sizes.rb +0 -55
  352. data/old/examples/corpus/bnc_word_freq.rb +0 -44
  353. data/old/examples/corpus/bucket_counter.rb +0 -47
  354. data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
  355. data/old/examples/corpus/sentence_bigrams.rb +0 -53
  356. data/old/examples/corpus/sentence_coocurrence.rb +0 -66
  357. data/old/examples/corpus/stopwords.rb +0 -138
  358. data/old/examples/corpus/words_to_bigrams.rb +0 -53
  359. data/old/examples/emr/README.textile +0 -110
  360. data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
  361. data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
  362. data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
  363. data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
  364. data/old/examples/network_graph/adjacency_list.rb +0 -74
  365. data/old/examples/network_graph/breadth_first_search.rb +0 -72
  366. data/old/examples/network_graph/gen_2paths.rb +0 -68
  367. data/old/examples/network_graph/gen_multi_edge.rb +0 -112
  368. data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
  369. data/old/examples/pagerank/README.textile +0 -6
  370. data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
  371. data/old/examples/pagerank/pagerank.rb +0 -72
  372. data/old/examples/pagerank/pagerank_initialize.rb +0 -42
  373. data/old/examples/pagerank/run_pagerank.sh +0 -21
  374. data/old/examples/sample_records.rb +0 -33
  375. data/old/examples/server_logs/apache_log_parser.rb +0 -15
  376. data/old/examples/server_logs/nook.rb +0 -48
  377. data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
  378. data/old/examples/server_logs/user_agent.rb +0 -40
  379. data/old/examples/simple_word_count.rb +0 -82
  380. data/old/examples/size.rb +0 -61
  381. data/old/examples/stats/avg_value_frequency.rb +0 -86
  382. data/old/examples/stats/binning_percentile_estimator.rb +0 -140
  383. data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
  384. data/old/examples/stats/rank_and_bin.rb +0 -173
  385. data/old/examples/stupidly_simple_filter.rb +0 -40
  386. data/old/examples/word_count.rb +0 -75
  387. data/old/graph/graphviz_builder.rb +0 -580
  388. data/old/graph_easy/Attributes.pm +0 -4181
  389. data/old/graph_easy/Graphviz.pm +0 -2232
  390. data/old/wukong.rb +0 -18
  391. data/old/wukong/and_pig.rb +0 -38
  392. data/old/wukong/bad_record.rb +0 -18
  393. data/old/wukong/datatypes.rb +0 -24
  394. data/old/wukong/datatypes/enum.rb +0 -127
  395. data/old/wukong/datatypes/fake_types.rb +0 -17
  396. data/old/wukong/decorator.rb +0 -28
  397. data/old/wukong/encoding/asciize.rb +0 -108
  398. data/old/wukong/extensions.rb +0 -16
  399. data/old/wukong/extensions/array.rb +0 -18
  400. data/old/wukong/extensions/blank.rb +0 -93
  401. data/old/wukong/extensions/class.rb +0 -189
  402. data/old/wukong/extensions/date_time.rb +0 -53
  403. data/old/wukong/extensions/emittable.rb +0 -69
  404. data/old/wukong/extensions/enumerable.rb +0 -79
  405. data/old/wukong/extensions/hash.rb +0 -167
  406. data/old/wukong/extensions/hash_keys.rb +0 -16
  407. data/old/wukong/extensions/hash_like.rb +0 -150
  408. data/old/wukong/extensions/hashlike_class.rb +0 -47
  409. data/old/wukong/extensions/module.rb +0 -2
  410. data/old/wukong/extensions/pathname.rb +0 -27
  411. data/old/wukong/extensions/string.rb +0 -65
  412. data/old/wukong/extensions/struct.rb +0 -17
  413. data/old/wukong/extensions/symbol.rb +0 -11
  414. data/old/wukong/filename_pattern.rb +0 -74
  415. data/old/wukong/helper.rb +0 -7
  416. data/old/wukong/helper/stopwords.rb +0 -195
  417. data/old/wukong/helper/tokenize.rb +0 -35
  418. data/old/wukong/logger.rb +0 -38
  419. data/old/wukong/periodic_monitor.rb +0 -72
  420. data/old/wukong/schema.rb +0 -269
  421. data/old/wukong/script.rb +0 -286
  422. data/old/wukong/script/avro_command.rb +0 -5
  423. data/old/wukong/script/cassandra_loader_script.rb +0 -40
  424. data/old/wukong/script/emr_command.rb +0 -168
  425. data/old/wukong/script/hadoop_command.rb +0 -237
  426. data/old/wukong/script/local_command.rb +0 -41
  427. data/old/wukong/store.rb +0 -10
  428. data/old/wukong/store/base.rb +0 -27
  429. data/old/wukong/store/cassandra.rb +0 -10
  430. data/old/wukong/store/cassandra/streaming.rb +0 -75
  431. data/old/wukong/store/cassandra/struct_loader.rb +0 -21
  432. data/old/wukong/store/cassandra_model.rb +0 -91
  433. data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
  434. data/old/wukong/store/chunked_flat_file_store.rb +0 -48
  435. data/old/wukong/store/conditional_store.rb +0 -57
  436. data/old/wukong/store/factory.rb +0 -8
  437. data/old/wukong/store/flat_file_store.rb +0 -89
  438. data/old/wukong/store/key_store.rb +0 -51
  439. data/old/wukong/store/null_store.rb +0 -15
  440. data/old/wukong/store/read_thru_store.rb +0 -22
  441. data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
  442. data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
  443. data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
  444. data/old/wukong/streamer.rb +0 -30
  445. data/old/wukong/streamer/accumulating_reducer.rb +0 -83
  446. data/old/wukong/streamer/base.rb +0 -126
  447. data/old/wukong/streamer/counting_reducer.rb +0 -25
  448. data/old/wukong/streamer/filter.rb +0 -20
  449. data/old/wukong/streamer/instance_streamer.rb +0 -15
  450. data/old/wukong/streamer/json_streamer.rb +0 -21
  451. data/old/wukong/streamer/line_streamer.rb +0 -12
  452. data/old/wukong/streamer/list_reducer.rb +0 -31
  453. data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
  454. data/old/wukong/streamer/record_streamer.rb +0 -14
  455. data/old/wukong/streamer/reducer.rb +0 -11
  456. data/old/wukong/streamer/set_reducer.rb +0 -14
  457. data/old/wukong/streamer/struct_streamer.rb +0 -48
  458. data/old/wukong/streamer/summing_reducer.rb +0 -29
  459. data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
  460. data/old/wukong/typed_struct.rb +0 -12
  461. data/spec/away/encoding_spec.rb +0 -32
  462. data/spec/away/exe_spec.rb +0 -20
  463. data/spec/away/flow_spec.rb +0 -82
  464. data/spec/away/graph_spec.rb +0 -6
  465. data/spec/away/job_spec.rb +0 -15
  466. data/spec/away/rake_compat_spec.rb +0 -9
  467. data/spec/away/script_spec.rb +0 -81
  468. data/spec/hanuman/graphviz_spec.rb +0 -29
  469. data/spec/hanuman/slot_spec.rb +0 -2
  470. data/spec/support/examples_helper.rb +0 -10
  471. data/spec/support/streamer_test_helpers.rb +0 -6
  472. data/spec/support/wukong_widget_helpers.rb +0 -66
  473. data/spec/wukong/processor_spec.rb +0 -109
  474. data/spec/wukong/widget/filter_spec.rb +0 -99
  475. data/spec/wukong/widget/stringifier_spec.rb +0 -51
  476. data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -0,0 +1,54 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Wukong
3
+ module Data
4
+ class GeoJson ; include Gorillib::Model ; end
5
+ class GeoJson::Geometry ; include Gorillib::Model ; end
6
+
7
+ class GeoJson
8
+ include Gorillib::Model::LoadFromJson
9
+ include Gorillib::Model::Indexable
10
+ field :type, String
11
+ field :id, String
12
+ field :geometry, GeoJson::Geometry
13
+ field :properties, GenericModel
14
+
15
+ def self.load(*args)
16
+ load_json(*args) do |val|
17
+ p val.properties
18
+ p val.properties.to_place
19
+ end
20
+ end
21
+
22
+ end
23
+
24
+ class GeoJson::Geometry
25
+ field :type, String
26
+ field :coordinates, Array
27
+
28
+ def point?
29
+ type == 'Point'
30
+ end
31
+
32
+ def longitude
33
+ return nil if coordinates.blank?
34
+ raise "Longitude only available for Point objects" unless point?
35
+ coordinates[0]
36
+ end
37
+ def latitude
38
+ return nil if coordinates.blank?
39
+ raise "Latitude only available for Point objects" unless point?
40
+ coordinates[1]
41
+ end
42
+ end
43
+
44
+ class GeonamesGeoJson < GeoJson
45
+ def receive_properties(hsh)
46
+ if hsh.respond_to?(:merge)
47
+ super(hsh.merge(geo_json_id: id, longitude: geometry.longitude, latitude: geometry.latitude))
48
+ else
49
+ super
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,69 @@
1
+ module Geo
2
+
3
+ class Place
4
+ include Gorillib::Model
5
+ include Gorillib::Model::Indexable
6
+
7
+ field :geonames_id, String
8
+ field :country_id, String, doc: "ISO 3166 2-letter alphanumeric id ('us', 'mx', etc). Must be lowercase"
9
+ field :admin1_id, String
10
+ field :feature_cat, String
11
+ field :feature_subcat, String
12
+ #
13
+ field :name, String
14
+ #
15
+ field :timezone, String
16
+ field :elevation, Float
17
+ field :longitude, Float
18
+ field :latitude, Float
19
+ #
20
+ field :alternate_names, String, default: ""
21
+
22
+ def names
23
+ ([name] + alternate_names.split("|")).compact_blank
24
+ end
25
+
26
+ def coordinates
27
+ { longitude: longitude, latitude: latitude, elevation: elevation }.compact
28
+ end
29
+
30
+ def self.slugify_name(val)
31
+ val.downcase.
32
+ gsub(/(?:\s+and\s+|\s+-\s+|[^[:alpha:]\-]+)/, '-').
33
+ gsub(/\A-*(.+?)-*\z/, '\1')
34
+ end
35
+ end
36
+
37
+ class AdministrativeArea < Place
38
+ field :population, Integer
39
+ field :official_name, String
40
+ def names ; super.tap{|arr| arr.insert(1, official_name) }.uniq.compact_blank ; end
41
+ end
42
+
43
+ class Country < AdministrativeArea
44
+ field :country_al3id, String, identifier: true, doc: "ISO 3166 3-letter alphanumeric id ('usa', 'mex', etc). Must be lowercase."
45
+ field :country_numid, Integer, identifier: true, doc: "ISO 3166 numeric identifier ('usa' = 840)"
46
+ field :tld_id, String, doc: "TLD (top-level domain) identifier"
47
+ end
48
+
49
+ class CountryNameLookup
50
+ include Gorillib::Model
51
+ include Gorillib::Model::Indexable
52
+ include Gorillib::Model::LoadFromTsv
53
+ index_on :slug
54
+
55
+ field :country_id, String
56
+ field :country_al3id, String
57
+ field :country_numid, Integer
58
+ field :tld_id, String
59
+ field :geonames_id, String
60
+ field :name, String
61
+ field :slug, String
62
+ field :alt_name, String
63
+
64
+ def self.load(filename=nil)
65
+ filename ||= :country_name_lookup
66
+ @values = load_tsv(filename)
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,78 @@
1
+ # -*- coding: utf-8 -*-
2
+ # {"type":"Feature",
3
+ # "id":"3cc54602f2d69c1111dc35f0aaa92240",
4
+ # "geometry":{"type":"Point","coordinates":[42.5,11.5]},
5
+ # "properties":{
6
+ # "geonameid":"223816","country_code":"DJ","admin1_code":"00",
7
+ # "feature_code":"PCLI","feature_class":"A",
8
+ # "asciiname":"Republic of Djibouti","name":"Republic of Djibouti","alternatenames":"Cîbûtî,...",
9
+ # "modification_date":"2011-07-09",
10
+ # "timezone":"Africa/Djibouti","gtopo30":"668","population":"740528"}}
11
+
12
+
13
+ # {"type":"Feature","id":"5b66ac7270763facfe1e9ab9c1bf99f8",
14
+ # "geometry":{"type":"Point","coordinates":[-98.5,39.76]},
15
+ # "properties":{
16
+ # "modification_date":"2011-04-27","_type":"geo/geonames_country",
17
+ # "asciiname":"United States","name":"United States","gtopo30":"537","geonameid":"6252001",
18
+ # "feature_code":"PCLI","country_code":"US","feature_class":"A",
19
+ # "alternatenames":"...","admin1_code":"00","population":"310232863"}}
20
+
21
+ module Geo
22
+
23
+ class GeonamesPlace
24
+ include Gorillib::Model
25
+ class_attribute :place_klass ; self.place_klass = ::Geo::Place
26
+
27
+ field :name, String
28
+ field :asciiname, String
29
+ field :geonameid, String
30
+ field :country_code, String
31
+ field :admin1_code, String, blankish: [0, "0", "00", nil, ""]
32
+ field :feature_code, String
33
+ field :feature_class, String
34
+ #
35
+ field :modification_date, String
36
+ field :timezone, String
37
+ #
38
+ field :gtopo30, Float, blankish: ["-9999", -9999, nil, ""], doc: "Elevation in the [GTOPO30](http://en.wikipedia.org/wiki/GTOPO30) model"
39
+ field :longitude, Float
40
+ field :latitude, Float
41
+ #
42
+ field :population, Integer, blankish: [0, "0", nil, ""]
43
+ field :alternatenames, String
44
+
45
+ # because 'Saint Helena, Ascension and Tristan da Cunha' is an official
46
+ # country name (and others like it
47
+ def alternate_names_with_pipes
48
+ # comma ',' with no spaces separates names; comma space ', ' is internal.
49
+ an = alternatenames.gsub(/,/, '|').gsub(/\| /, ', ')
50
+ ([name, asciiname] + an.split('|')).uniq.join("|")
51
+ end
52
+
53
+ def to_place
54
+ attrs = {
55
+ name: asciiname,
56
+ official_name: name,
57
+ geonames_id: "gn:#{geonameid}",
58
+ country_id: country_code.downcase,
59
+ admin1_id: admin1_code,
60
+ feature_cat: feature_class,
61
+ feature_subcat: feature_code,
62
+ alternate_names: alternate_names_with_pipes,
63
+ updated_at: modification_date,
64
+ timezone: timezone,
65
+ elevation: gtopo30,
66
+ longitude: longitude,
67
+ latitude: latitude,
68
+ population: population,
69
+ }
70
+ place_klass.receive(attrs)
71
+ end
72
+ end
73
+
74
+ # Stub class: Geonames JSON elements have :_type = geo/geonames_country
75
+ class GeonamesCountry < GeonamesPlace
76
+ self.place_klass = Geo::Country
77
+ end
78
+ end
@@ -0,0 +1,172 @@
1
+ require 'active_support/lazy_load_hooks'
2
+ require 'active_support/i18n'
3
+ require 'active_support/inflector/transliterate'
4
+
5
+ module Wukong
6
+
7
+ module Data
8
+
9
+ # These classes use data from the
10
+ # [isocodes](http://pkg-isocodes.alioth.debian.org/) debian project. That
11
+ # package provides lists of various ISO standards (e.g. country, language,
12
+ # language scripts, and currency names) in one place, rather than repeated in
13
+ # many programs throughout the system.
14
+ #
15
+ class IsoCode
16
+ include Gorillib::Model
17
+ include Gorillib::Model::LoadFromTsv
18
+ include Gorillib::Model::Indexable
19
+
20
+ class_attribute :handle, instance_writer: false
21
+ def self.load(filename=nil)
22
+ filename ||= [:geo_data, 'iso_codes', "iso_3166.tsv"]
23
+ @values = load_tsv(filename, num_fields: 4..6)
24
+ end
25
+ end
26
+
27
+ #
28
+ # ISO 3166 Country code
29
+ #
30
+ # Lists the 2-letter country code and "short" country name. The official ISO
31
+ # 3166 maintenance agency is ISO. The gettext domain is
32
+ # "iso_3166". [origin](http://www.iso.org/iso/country_codes)
33
+ #
34
+ class CountryCode < IsoCode
35
+ include ActiveSupport::Inflector
36
+
37
+ self.handle = :iso_3166
38
+ index_on :alpha_2_code, :alpha_3_code, :country_numid, :name, :common_name, :official_name
39
+ field :alpha_2_code, String, identifier: true
40
+ field :alpha_3_code, String, identifier: true
41
+ field :country_numid, Integer, identifier: true
42
+ field :name, String
43
+ field :official_name, String, blankish: ["", nil]
44
+ field :common_name, String, blankish: ["", nil]
45
+
46
+ def names
47
+ [common_name, name, official_name].compact_blank
48
+ end
49
+ def self.for_any_name(val)
50
+ for_name(val){ for_common_name(val){ for_official_name(val) } }
51
+ end
52
+
53
+ def to_place
54
+ attrs = {
55
+ name: transliterate(names.first),
56
+ official_name: names.last,
57
+ country_id: alpha_2_code.downcase,
58
+ alternate_names: names.join('|'),
59
+ country_al3id: alpha_3_code.downcase,
60
+ country_numid: country_numid,
61
+ }
62
+ Geo::Country.receive(attrs.compact_blank)
63
+ end
64
+ end
65
+
66
+ class CountryCode < IsoCode
67
+ self.handle = :iso_3166_3
68
+ field :alpha_3_code, String, identifier: true
69
+ field :alpha_4_code, String, identifier: true
70
+ field :country_numid, Integer, identifier: true
71
+ field :country_names, String
72
+ field :comment, String
73
+ field :date_withdrawn, String
74
+ end
75
+
76
+ #
77
+ # ISO 3166-2 Country Subdivision (Admin 1: state, region, etc) Code
78
+ #
79
+ # The ISO 3166 standard includes a "Country Subdivision Code", giving a code
80
+ # for the names of the principal administrative subdivisions of the
81
+ # countries coded in ISO 3166. The official ISO 3166-2 maintenance agency is
82
+ # ISO. The gettext domain is "iso_3166_2".
83
+ # <http://www.iso.org/iso/country_codes/background_on_iso_3166/iso_3166-2.htm>
84
+ #
85
+ class RegionCode < IsoCode
86
+ self.handle = :iso_3166_2
87
+ field :region_code, String, identifier: true
88
+ field :country_code, String
89
+ field :parent_region, String
90
+ field :region_kind, String
91
+ field :name, String
92
+ alias_method :state_code, :region_code
93
+ end
94
+
95
+ #
96
+ # ISO 639 Language Code
97
+ #
98
+ # This lists the 2-letter and 3-letter language codes and language
99
+ # names. The official ISO 639 maintenance agency is the Library of
100
+ # Congress. The gettext domain is "iso_639".
101
+ # [origin](http://www.loc.gov/standards/iso639-2/)
102
+ #
103
+ class BasicLanguageCode < IsoCode
104
+ self.handle = :iso_639
105
+ field :iso_639_1_code, String, identifier: true
106
+ field :iso_639_2B_code, String, identifier: true
107
+ field :iso_639_2T_code, String, identifier: true
108
+ field :name, String, identifier: true
109
+ end
110
+
111
+ # ISO 639-3
112
+ #
113
+ # This is a further development of ISO 639-2, see above. All codes of ISO
114
+ # 639-2 are included in ISO 639-3. ISO 639-3 attempts to provide as complete
115
+ # an enumeration of languages as possible, including living, extinct,
116
+ # ancient, and constructed languages, whether major or minor, written or
117
+ # unwritten. The gettext domain is "iso_639_3". The official ISO 639-3
118
+ # maintenance agency is SIL International.
119
+ # [origin](http://www.sil.org/iso639-3/)
120
+ #
121
+ class LanguageCode < BasicLanguageCode
122
+ self.handle = :iso_639_3
123
+ field :language_id, String, identifier: true
124
+ field :part1_code, String
125
+ field :part2_code, String
126
+ field :scope, String
127
+ field :status, String
128
+ field :language_kind, String
129
+ field :name, String
130
+ field :inverted_name, String
131
+ field :reference_name, String
132
+ end
133
+
134
+ #
135
+ # ISO 15924 Language Scripts (alphabet) names
136
+ #
137
+ # This lists the language scripts names. The official ISO 15924 maintenance
138
+ # agency is the Unicode Consortium. The gettext domain is "iso_15924".
139
+ # [origin](http://unicode.org/iso15924/)
140
+ #
141
+ class LanguageScriptCode < IsoCode
142
+ self.handle = :iso_15924
143
+ field :alpha_4_code, String, identifier: true
144
+ field :script_numid, Integer, identifier: true
145
+ field :name, String
146
+ end
147
+
148
+ #
149
+ # ISO 4217 Currency Code
150
+ #
151
+ # This lists the currency codes and names. The official ISO 4217 maintenance
152
+ # agency is the British Standards Institution. The gettext domain is
153
+ # "iso_4217".
154
+ # [origin](http://www.bsi-global.com/en/Standards-and-Publications/Industry-Sectors/Services/BSI-Currency-Code-Service/)
155
+ #
156
+ class CurrencyCode < IsoCode
157
+ self.handle = :iso_4217
158
+ field :currency_code, String, identifier: true
159
+ field :currency_numid, Integer, identifier: true
160
+ field :name, String
161
+ end
162
+
163
+ #
164
+ # Historic Currency Code
165
+ #
166
+ class HistoricCurrencyCode < CurrencyCode
167
+ self.handle = :historic_iso_4217
168
+ field :date_withdrawn, String
169
+ end
170
+
171
+ end
172
+ end
@@ -0,0 +1,124 @@
1
+ require 'gorillib/model/reconcilable'
2
+ # require_relative('./geo_models')
3
+ # require_relative('./geo_json')
4
+
5
+ module Geo
6
+
7
+ Place.class_eval do
8
+ include Gorillib::Model::Reconcilable
9
+
10
+ def adopt_alternate_names(that_val, _)
11
+ return true if that_val.blank?
12
+ names = "#{alternate_names}|#{that_val}".split("|")
13
+ names.uniq!
14
+ names.delete(name)
15
+ write_attribute :alternate_names, names.compact_blank.join("|")
16
+ true
17
+ end
18
+
19
+ def conflicting_attribute!(attr, this_val, that_val)
20
+ case attr
21
+ when :name, :official_name then return :pass
22
+ end
23
+ super
24
+ end
25
+
26
+ end
27
+
28
+ Country.class_eval do
29
+ index_on :country_id
30
+ field :iso_3166_active, :boolean
31
+ end
32
+
33
+
34
+ class FullIso3166
35
+ include Gorillib::Model
36
+ include Gorillib::Model::Reconcilable
37
+ include Gorillib::Model::LoadFromTsv
38
+ self.tsv_options = self.tsv_options.merge(num_fields: 6..8, pop_headers: true)
39
+
40
+ field :country_id, String
41
+ field :tld_id, String
42
+ field :iso_3166_3, String
43
+ field :name, String
44
+ field :code_status, String
45
+ field :iso_3166_active, :boolean, blankish: ['N', false, nil, '']
46
+ field :year_granted, String
47
+ field :notes, String
48
+
49
+ def active?
50
+ iso_3166_active == "Y"
51
+ end
52
+
53
+ def to_place
54
+ Geo::Country.receive({
55
+ country_id: country_id,
56
+ name: name,
57
+ tld_id: tld_id,
58
+ iso_3166_active: iso_3166_active,
59
+ })
60
+ end
61
+ end
62
+
63
+ end
64
+
65
+ # cd Congo (Kinshasa)
66
+ # um Baker Island
67
+ # um Howland Island
68
+ # um Jarvis Island
69
+ # um Johnston Atoll
70
+ # um Kingman Reef
71
+ # um Midway Islands
72
+ # um Navassa Island
73
+ # um Palmyra Atoll
74
+ # um Wake Island
75
+ # mi Midway Islands
76
+ # na Netherlands Antilles
77
+ # gs South Georgia and the Islands
78
+ # sj Svalbard
79
+ # wk Wake Island
80
+ # ps West Bank
81
+ # ps West Bank and the Gaza Strip
82
+ # ps Gaza Strip
83
+
84
+ class CountryReconciler
85
+
86
+ def self.load_reconciled_countries
87
+
88
+ Geo::FullIso3166.load_tsv([:geo_data, 'iso_codes/full_iso_3166.tsv']) do |raw_country|
89
+ Geo::Country.values << raw_country.to_place
90
+ end
91
+
92
+ Wukong::Data::CountryCode.load
93
+ Wukong::Data::CountryCode.values.each do |raw_country|
94
+ iso_country = raw_country.to_place
95
+ country = Geo::Country.for_country_id(iso_country.country_id){ Geo::Country.new }
96
+ country.adopt(iso_country)
97
+ end
98
+
99
+ Wukong::Data::GeonamesGeoJson.load_json(:geonames_countries) do |raw_feature|
100
+ gn_country = raw_feature.properties.to_place
101
+ country = Geo::Country.for_country_id(gn_country.country_id){ Geo::Country.new }
102
+ country.adopt(gn_country)
103
+ end
104
+
105
+ Geo::Country.values.sort_by!(&:country_id)
106
+ end
107
+ end
108
+
109
+
110
+
111
+ # {
112
+ # :xx => { :name => 'Iran' },
113
+ # :xx => 'Tanzania, United Republic',
114
+ # :xx => 'Palestinian Territory, Occupied',
115
+ # :xx =>
116
+ # }
117
+ # :kp => "North Korea" Korea, Democratic People's Republic
118
+ # :kr => "South Korea" Korea, Republic of
119
+ # :bn => "Brunei"
120
+ # :bq => "Caribbean Netherlands"
121
+ # Lao People's Democratic Republic
122
+ #
123
+ # :va Holy See (Vatican City State)
124
+ # :vi Virgin Islands, U.S.