wukong 3.0.0.pre → 3.0.0.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (476) hide show
  1. data/.gitignore +46 -33
  2. data/.gitmodules +3 -0
  3. data/.rspec +1 -1
  4. data/.travis.yml +8 -1
  5. data/.yardopts +0 -13
  6. data/Guardfile +4 -6
  7. data/{LICENSE.textile → LICENSE.md} +43 -55
  8. data/README-old.md +422 -0
  9. data/README.md +279 -418
  10. data/Rakefile +21 -5
  11. data/TODO.md +6 -6
  12. data/bin/wu-clean-encoding +31 -0
  13. data/bin/wu-lign +2 -2
  14. data/bin/wu-local +69 -0
  15. data/bin/wu-server +70 -0
  16. data/examples/Gemfile +38 -0
  17. data/examples/README.md +9 -0
  18. data/examples/dataflow/apache_log_line.rb +64 -25
  19. data/examples/dataflow/fibonacci_series.rb +101 -0
  20. data/examples/dataflow/parse_apache_logs.rb +37 -7
  21. data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
  22. data/examples/dataflow/simple.rb +4 -4
  23. data/examples/geo.rb +4 -0
  24. data/examples/geo/geo_grids.numbers +0 -0
  25. data/examples/geo/geolocated.rb +331 -0
  26. data/examples/geo/quadtile.rb +69 -0
  27. data/examples/geo/spec/geolocated_spec.rb +247 -0
  28. data/examples/geo/tile_fetcher.rb +77 -0
  29. data/examples/graph/minimum_spanning_tree.rb +61 -61
  30. data/examples/jabberwocky.txt +36 -0
  31. data/examples/models/wikipedia.rb +20 -0
  32. data/examples/munging/Gemfile +8 -0
  33. data/examples/munging/airline_flights/airline.rb +57 -0
  34. data/examples/munging/airline_flights/airline_flights.rake +83 -0
  35. data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
  36. data/examples/munging/airline_flights/airport.rb +211 -0
  37. data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
  38. data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
  39. data/examples/munging/airline_flights/flight.rb +156 -0
  40. data/examples/munging/airline_flights/models.rb +4 -0
  41. data/examples/munging/airline_flights/parse.rb +26 -0
  42. data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
  43. data/examples/munging/airline_flights/route.rb +35 -0
  44. data/examples/munging/airline_flights/tasks.rake +83 -0
  45. data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
  46. data/examples/munging/airline_flights/topcities.rb +167 -0
  47. data/examples/munging/airports/40_wbans.txt +40 -0
  48. data/examples/munging/airports/filter_weather_reports.rb +37 -0
  49. data/examples/munging/airports/join.pig +31 -0
  50. data/examples/munging/airports/to_tsv.rb +33 -0
  51. data/examples/munging/airports/usa_wbans.pig +19 -0
  52. data/examples/munging/airports/usa_wbans.txt +2157 -0
  53. data/examples/munging/airports/wbans.pig +19 -0
  54. data/examples/munging/airports/wbans.txt +2310 -0
  55. data/examples/munging/geo/geo_json.rb +54 -0
  56. data/examples/munging/geo/geo_models.rb +69 -0
  57. data/examples/munging/geo/geonames_models.rb +78 -0
  58. data/examples/munging/geo/iso_codes.rb +172 -0
  59. data/examples/munging/geo/reconcile_countries.rb +124 -0
  60. data/examples/munging/geo/tasks.rake +71 -0
  61. data/examples/munging/rake_helper.rb +62 -0
  62. data/examples/munging/weather/.gitignore +1 -0
  63. data/examples/munging/weather/Gemfile +4 -0
  64. data/examples/munging/weather/Rakefile +28 -0
  65. data/examples/munging/weather/extract_ish.rb +13 -0
  66. data/examples/munging/weather/models/weather.rb +119 -0
  67. data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
  68. data/examples/munging/wikipedia/README.md +34 -0
  69. data/examples/munging/wikipedia/Rakefile +193 -0
  70. data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
  71. data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
  72. data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
  73. data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
  74. data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
  75. data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
  76. data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
  77. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
  78. data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
  79. data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
  80. data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
  81. data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
  82. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
  83. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
  84. data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
  85. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
  86. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
  87. data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
  88. data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
  89. data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
  90. data/examples/munging/wikipedia/pig_style_guide.md +25 -0
  91. data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
  92. data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
  93. data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
  94. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
  95. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
  96. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
  97. data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
  98. data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
  99. data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
  100. data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
  101. data/examples/munging/wikipedia/utils/namespaces.json +1 -0
  102. data/examples/rake_helper.rb +85 -0
  103. data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
  104. data/examples/server_logs/logline.rb +95 -0
  105. data/examples/server_logs/models.rb +66 -0
  106. data/examples/server_logs/page_counts.pig +48 -0
  107. data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
  108. data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
  109. data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
  110. data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
  111. data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
  112. data/examples/string_reverser.rb +26 -0
  113. data/examples/text/pig_latin.rb +2 -2
  114. data/examples/text/regional_flavor/README.md +14 -0
  115. data/examples/text/regional_flavor/article_wordbags.pig +39 -0
  116. data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
  117. data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
  118. data/examples/word_count/accumulator.rb +26 -0
  119. data/examples/word_count/tokenizer.rb +13 -0
  120. data/examples/word_count/word_count.rb +6 -0
  121. data/examples/workflow/cherry_pie.dot +97 -0
  122. data/examples/workflow/cherry_pie.png +0 -0
  123. data/examples/workflow/cherry_pie.rb +61 -26
  124. data/lib/hanuman.rb +34 -7
  125. data/lib/hanuman/graph.rb +55 -31
  126. data/lib/hanuman/graphvizzer.rb +199 -178
  127. data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
  128. data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
  129. data/lib/hanuman/link.rb +35 -0
  130. data/lib/hanuman/registry.rb +46 -0
  131. data/lib/hanuman/stage.rb +76 -32
  132. data/lib/wukong.rb +23 -24
  133. data/lib/wukong/boot.rb +87 -0
  134. data/lib/wukong/configuration.rb +8 -0
  135. data/lib/wukong/dataflow.rb +45 -78
  136. data/lib/wukong/driver.rb +99 -0
  137. data/lib/wukong/emitter.rb +22 -0
  138. data/lib/wukong/model/faker.rb +24 -24
  139. data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
  140. data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
  141. data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
  142. data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
  143. data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
  144. data/lib/wukong/processor.rb +60 -114
  145. data/lib/wukong/spec_helpers.rb +81 -0
  146. data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
  147. data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
  148. data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
  149. data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
  150. data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
  151. data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
  152. data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
  153. data/lib/wukong/version.rb +2 -1
  154. data/lib/wukong/widget/filters.rb +311 -0
  155. data/lib/wukong/widget/processors.rb +156 -0
  156. data/lib/wukong/widget/reducers.rb +7 -0
  157. data/lib/wukong/widget/reducers/accumulator.rb +73 -0
  158. data/lib/wukong/widget/reducers/bin.rb +318 -0
  159. data/lib/wukong/widget/reducers/count.rb +61 -0
  160. data/lib/wukong/widget/reducers/group.rb +85 -0
  161. data/lib/wukong/widget/reducers/group_concat.rb +70 -0
  162. data/lib/wukong/widget/reducers/moments.rb +72 -0
  163. data/lib/wukong/widget/reducers/sort.rb +130 -0
  164. data/lib/wukong/widget/serializers.rb +287 -0
  165. data/lib/wukong/widget/sink.rb +10 -52
  166. data/lib/wukong/widget/source.rb +7 -113
  167. data/lib/wukong/widget/utils.rb +46 -0
  168. data/lib/wukong/widgets.rb +6 -0
  169. data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
  170. data/spec/examples/dataflow/parsing_spec.rb +12 -11
  171. data/spec/examples/dataflow/simple_spec.rb +32 -6
  172. data/spec/examples/dataflow/telegram_spec.rb +36 -36
  173. data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
  174. data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
  175. data/spec/examples/munging/airline_flights_spec.rb +202 -0
  176. data/spec/examples/text/pig_latin_spec.rb +13 -16
  177. data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
  178. data/spec/hanuman/graph_spec.rb +27 -2
  179. data/spec/hanuman/hanuman_spec.rb +10 -0
  180. data/spec/hanuman/registry_spec.rb +123 -0
  181. data/spec/hanuman/stage_spec.rb +61 -7
  182. data/spec/spec_helper.rb +29 -19
  183. data/spec/support/hanuman_test_helpers.rb +14 -12
  184. data/spec/support/shared_context_for_reducers.rb +37 -0
  185. data/spec/support/shared_examples_for_builders.rb +101 -0
  186. data/spec/support/shared_examples_for_shortcuts.rb +57 -0
  187. data/spec/support/wukong_test_helpers.rb +37 -11
  188. data/spec/wukong/dataflow_spec.rb +77 -55
  189. data/spec/wukong/local_runner_spec.rb +24 -24
  190. data/spec/wukong/model/faker_spec.rb +132 -131
  191. data/spec/wukong/runner_spec.rb +8 -8
  192. data/spec/wukong/widget/filters_spec.rb +61 -0
  193. data/spec/wukong/widget/processors_spec.rb +126 -0
  194. data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
  195. data/spec/wukong/widget/reducers/count_spec.rb +11 -0
  196. data/spec/wukong/widget/reducers/group_spec.rb +20 -0
  197. data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
  198. data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
  199. data/spec/wukong/widget/serializers_spec.rb +92 -0
  200. data/spec/wukong/widget/sink_spec.rb +15 -15
  201. data/spec/wukong/widget/source_spec.rb +65 -41
  202. data/spec/wukong/wukong_spec.rb +10 -0
  203. data/wukong.gemspec +17 -10
  204. metadata +359 -335
  205. data/.document +0 -5
  206. data/VERSION +0 -1
  207. data/bin/hdp-bin +0 -44
  208. data/bin/hdp-bzip +0 -23
  209. data/bin/hdp-cat +0 -3
  210. data/bin/hdp-catd +0 -3
  211. data/bin/hdp-cp +0 -3
  212. data/bin/hdp-du +0 -86
  213. data/bin/hdp-get +0 -3
  214. data/bin/hdp-kill +0 -3
  215. data/bin/hdp-kill-task +0 -3
  216. data/bin/hdp-ls +0 -11
  217. data/bin/hdp-mkdir +0 -2
  218. data/bin/hdp-mkdirp +0 -12
  219. data/bin/hdp-mv +0 -3
  220. data/bin/hdp-parts_to_keys.rb +0 -77
  221. data/bin/hdp-ps +0 -3
  222. data/bin/hdp-put +0 -3
  223. data/bin/hdp-rm +0 -32
  224. data/bin/hdp-sort +0 -40
  225. data/bin/hdp-stream +0 -40
  226. data/bin/hdp-stream-flat +0 -22
  227. data/bin/hdp-stream2 +0 -39
  228. data/bin/hdp-sync +0 -17
  229. data/bin/hdp-wc +0 -67
  230. data/bin/wu-flow +0 -10
  231. data/bin/wu-map +0 -17
  232. data/bin/wu-red +0 -17
  233. data/bin/wukong +0 -17
  234. data/data/CREDITS.md +0 -355
  235. data/data/graph/airfares.tsv +0 -2174
  236. data/data/text/gift_of_the_magi.txt +0 -225
  237. data/data/text/jabberwocky.txt +0 -36
  238. data/data/text/rectification_of_names.txt +0 -33
  239. data/data/twitter/a_atsigns_b.tsv +0 -64
  240. data/data/twitter/a_follows_b.tsv +0 -53
  241. data/data/twitter/tweet.tsv +0 -167
  242. data/data/twitter/twitter_user.tsv +0 -55
  243. data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
  244. data/docpages/INSTALL.textile +0 -92
  245. data/docpages/LICENSE.textile +0 -107
  246. data/docpages/README-elastic_map_reduce.textile +0 -377
  247. data/docpages/README-performance.textile +0 -90
  248. data/docpages/README-wulign.textile +0 -65
  249. data/docpages/UsingWukong-part1-get_ready.textile +0 -17
  250. data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
  251. data/docpages/UsingWukong-part3-parsing.textile +0 -138
  252. data/docpages/_config.yml +0 -39
  253. data/docpages/avro/avro_notes.textile +0 -56
  254. data/docpages/avro/performance.textile +0 -36
  255. data/docpages/avro/tethering.textile +0 -19
  256. data/docpages/bigdata-tips.textile +0 -143
  257. data/docpages/code/api_response_example.txt +0 -20
  258. data/docpages/code/parser_skeleton.rb +0 -38
  259. data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
  260. data/docpages/favicon.ico +0 -0
  261. data/docpages/gem.css +0 -16
  262. data/docpages/hadoop-tips.textile +0 -83
  263. data/docpages/index.textile +0 -92
  264. data/docpages/intro.textile +0 -8
  265. data/docpages/moreinfo.textile +0 -174
  266. data/docpages/news.html +0 -24
  267. data/docpages/pig/PigLatinExpressionsList.txt +0 -122
  268. data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
  269. data/docpages/pig/commandline_params.txt +0 -26
  270. data/docpages/pig/cookbook.html +0 -481
  271. data/docpages/pig/images/hadoop-logo.jpg +0 -0
  272. data/docpages/pig/images/instruction_arrow.png +0 -0
  273. data/docpages/pig/images/pig-logo.gif +0 -0
  274. data/docpages/pig/piglatin_ref1.html +0 -1103
  275. data/docpages/pig/piglatin_ref2.html +0 -14340
  276. data/docpages/pig/setup.html +0 -505
  277. data/docpages/pig/skin/basic.css +0 -166
  278. data/docpages/pig/skin/breadcrumbs.js +0 -237
  279. data/docpages/pig/skin/fontsize.js +0 -166
  280. data/docpages/pig/skin/getBlank.js +0 -40
  281. data/docpages/pig/skin/getMenu.js +0 -45
  282. data/docpages/pig/skin/images/chapter.gif +0 -0
  283. data/docpages/pig/skin/images/chapter_open.gif +0 -0
  284. data/docpages/pig/skin/images/current.gif +0 -0
  285. data/docpages/pig/skin/images/external-link.gif +0 -0
  286. data/docpages/pig/skin/images/header_white_line.gif +0 -0
  287. data/docpages/pig/skin/images/page.gif +0 -0
  288. data/docpages/pig/skin/images/pdfdoc.gif +0 -0
  289. data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  290. data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  291. data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  292. data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  293. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  294. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  295. data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  296. data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  297. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  298. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  299. data/docpages/pig/skin/print.css +0 -54
  300. data/docpages/pig/skin/profile.css +0 -181
  301. data/docpages/pig/skin/screen.css +0 -587
  302. data/docpages/pig/tutorial.html +0 -1059
  303. data/docpages/pig/udf.html +0 -1509
  304. data/docpages/tutorial.textile +0 -283
  305. data/docpages/usage.textile +0 -195
  306. data/docpages/wutils.textile +0 -263
  307. data/examples/dataflow/complex.rb +0 -11
  308. data/examples/dataflow/donuts.rb +0 -13
  309. data/examples/tiny_count/jabberwocky_output.tsv +0 -92
  310. data/examples/word_count.rb +0 -48
  311. data/examples/workflow/fiddle.rb +0 -24
  312. data/lib/away/escapement.rb +0 -129
  313. data/lib/away/exe.rb +0 -11
  314. data/lib/away/experimental.rb +0 -5
  315. data/lib/away/from_file.rb +0 -52
  316. data/lib/away/job.rb +0 -56
  317. data/lib/away/job/rake_compat.rb +0 -17
  318. data/lib/away/registry.rb +0 -79
  319. data/lib/away/runner.rb +0 -276
  320. data/lib/away/runner/execute.rb +0 -121
  321. data/lib/away/script.rb +0 -161
  322. data/lib/away/script/hadoop_command.rb +0 -240
  323. data/lib/away/source/file_list_source.rb +0 -15
  324. data/lib/away/source/looper.rb +0 -18
  325. data/lib/away/task.rb +0 -219
  326. data/lib/hanuman/action.rb +0 -21
  327. data/lib/hanuman/chain.rb +0 -4
  328. data/lib/hanuman/graphviz.rb +0 -74
  329. data/lib/hanuman/resource.rb +0 -6
  330. data/lib/hanuman/slot.rb +0 -87
  331. data/lib/hanuman/slottable.rb +0 -220
  332. data/lib/wukong/bad_record.rb +0 -15
  333. data/lib/wukong/event.rb +0 -44
  334. data/lib/wukong/local_runner.rb +0 -55
  335. data/lib/wukong/mapred.rb +0 -3
  336. data/lib/wukong/universe.rb +0 -48
  337. data/lib/wukong/widget/filter.rb +0 -81
  338. data/lib/wukong/widget/gibberish.rb +0 -123
  339. data/lib/wukong/widget/monitor.rb +0 -26
  340. data/lib/wukong/widget/reducer.rb +0 -66
  341. data/lib/wukong/widget/stringifier.rb +0 -50
  342. data/lib/wukong/workflow.rb +0 -22
  343. data/lib/wukong/workflow/command.rb +0 -42
  344. data/old/config/emr-example.yaml +0 -48
  345. data/old/examples/README.txt +0 -17
  346. data/old/examples/contrib/jeans/README.markdown +0 -165
  347. data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
  348. data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
  349. data/old/examples/contrib/jeans/data/sizes +0 -3
  350. data/old/examples/contrib/jeans/normalize.rb +0 -20
  351. data/old/examples/contrib/jeans/sizes.rb +0 -55
  352. data/old/examples/corpus/bnc_word_freq.rb +0 -44
  353. data/old/examples/corpus/bucket_counter.rb +0 -47
  354. data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
  355. data/old/examples/corpus/sentence_bigrams.rb +0 -53
  356. data/old/examples/corpus/sentence_coocurrence.rb +0 -66
  357. data/old/examples/corpus/stopwords.rb +0 -138
  358. data/old/examples/corpus/words_to_bigrams.rb +0 -53
  359. data/old/examples/emr/README.textile +0 -110
  360. data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
  361. data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
  362. data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
  363. data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
  364. data/old/examples/network_graph/adjacency_list.rb +0 -74
  365. data/old/examples/network_graph/breadth_first_search.rb +0 -72
  366. data/old/examples/network_graph/gen_2paths.rb +0 -68
  367. data/old/examples/network_graph/gen_multi_edge.rb +0 -112
  368. data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
  369. data/old/examples/pagerank/README.textile +0 -6
  370. data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
  371. data/old/examples/pagerank/pagerank.rb +0 -72
  372. data/old/examples/pagerank/pagerank_initialize.rb +0 -42
  373. data/old/examples/pagerank/run_pagerank.sh +0 -21
  374. data/old/examples/sample_records.rb +0 -33
  375. data/old/examples/server_logs/apache_log_parser.rb +0 -15
  376. data/old/examples/server_logs/nook.rb +0 -48
  377. data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
  378. data/old/examples/server_logs/user_agent.rb +0 -40
  379. data/old/examples/simple_word_count.rb +0 -82
  380. data/old/examples/size.rb +0 -61
  381. data/old/examples/stats/avg_value_frequency.rb +0 -86
  382. data/old/examples/stats/binning_percentile_estimator.rb +0 -140
  383. data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
  384. data/old/examples/stats/rank_and_bin.rb +0 -173
  385. data/old/examples/stupidly_simple_filter.rb +0 -40
  386. data/old/examples/word_count.rb +0 -75
  387. data/old/graph/graphviz_builder.rb +0 -580
  388. data/old/graph_easy/Attributes.pm +0 -4181
  389. data/old/graph_easy/Graphviz.pm +0 -2232
  390. data/old/wukong.rb +0 -18
  391. data/old/wukong/and_pig.rb +0 -38
  392. data/old/wukong/bad_record.rb +0 -18
  393. data/old/wukong/datatypes.rb +0 -24
  394. data/old/wukong/datatypes/enum.rb +0 -127
  395. data/old/wukong/datatypes/fake_types.rb +0 -17
  396. data/old/wukong/decorator.rb +0 -28
  397. data/old/wukong/encoding/asciize.rb +0 -108
  398. data/old/wukong/extensions.rb +0 -16
  399. data/old/wukong/extensions/array.rb +0 -18
  400. data/old/wukong/extensions/blank.rb +0 -93
  401. data/old/wukong/extensions/class.rb +0 -189
  402. data/old/wukong/extensions/date_time.rb +0 -53
  403. data/old/wukong/extensions/emittable.rb +0 -69
  404. data/old/wukong/extensions/enumerable.rb +0 -79
  405. data/old/wukong/extensions/hash.rb +0 -167
  406. data/old/wukong/extensions/hash_keys.rb +0 -16
  407. data/old/wukong/extensions/hash_like.rb +0 -150
  408. data/old/wukong/extensions/hashlike_class.rb +0 -47
  409. data/old/wukong/extensions/module.rb +0 -2
  410. data/old/wukong/extensions/pathname.rb +0 -27
  411. data/old/wukong/extensions/string.rb +0 -65
  412. data/old/wukong/extensions/struct.rb +0 -17
  413. data/old/wukong/extensions/symbol.rb +0 -11
  414. data/old/wukong/filename_pattern.rb +0 -74
  415. data/old/wukong/helper.rb +0 -7
  416. data/old/wukong/helper/stopwords.rb +0 -195
  417. data/old/wukong/helper/tokenize.rb +0 -35
  418. data/old/wukong/logger.rb +0 -38
  419. data/old/wukong/periodic_monitor.rb +0 -72
  420. data/old/wukong/schema.rb +0 -269
  421. data/old/wukong/script.rb +0 -286
  422. data/old/wukong/script/avro_command.rb +0 -5
  423. data/old/wukong/script/cassandra_loader_script.rb +0 -40
  424. data/old/wukong/script/emr_command.rb +0 -168
  425. data/old/wukong/script/hadoop_command.rb +0 -237
  426. data/old/wukong/script/local_command.rb +0 -41
  427. data/old/wukong/store.rb +0 -10
  428. data/old/wukong/store/base.rb +0 -27
  429. data/old/wukong/store/cassandra.rb +0 -10
  430. data/old/wukong/store/cassandra/streaming.rb +0 -75
  431. data/old/wukong/store/cassandra/struct_loader.rb +0 -21
  432. data/old/wukong/store/cassandra_model.rb +0 -91
  433. data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
  434. data/old/wukong/store/chunked_flat_file_store.rb +0 -48
  435. data/old/wukong/store/conditional_store.rb +0 -57
  436. data/old/wukong/store/factory.rb +0 -8
  437. data/old/wukong/store/flat_file_store.rb +0 -89
  438. data/old/wukong/store/key_store.rb +0 -51
  439. data/old/wukong/store/null_store.rb +0 -15
  440. data/old/wukong/store/read_thru_store.rb +0 -22
  441. data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
  442. data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
  443. data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
  444. data/old/wukong/streamer.rb +0 -30
  445. data/old/wukong/streamer/accumulating_reducer.rb +0 -83
  446. data/old/wukong/streamer/base.rb +0 -126
  447. data/old/wukong/streamer/counting_reducer.rb +0 -25
  448. data/old/wukong/streamer/filter.rb +0 -20
  449. data/old/wukong/streamer/instance_streamer.rb +0 -15
  450. data/old/wukong/streamer/json_streamer.rb +0 -21
  451. data/old/wukong/streamer/line_streamer.rb +0 -12
  452. data/old/wukong/streamer/list_reducer.rb +0 -31
  453. data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
  454. data/old/wukong/streamer/record_streamer.rb +0 -14
  455. data/old/wukong/streamer/reducer.rb +0 -11
  456. data/old/wukong/streamer/set_reducer.rb +0 -14
  457. data/old/wukong/streamer/struct_streamer.rb +0 -48
  458. data/old/wukong/streamer/summing_reducer.rb +0 -29
  459. data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
  460. data/old/wukong/typed_struct.rb +0 -12
  461. data/spec/away/encoding_spec.rb +0 -32
  462. data/spec/away/exe_spec.rb +0 -20
  463. data/spec/away/flow_spec.rb +0 -82
  464. data/spec/away/graph_spec.rb +0 -6
  465. data/spec/away/job_spec.rb +0 -15
  466. data/spec/away/rake_compat_spec.rb +0 -9
  467. data/spec/away/script_spec.rb +0 -81
  468. data/spec/hanuman/graphviz_spec.rb +0 -29
  469. data/spec/hanuman/slot_spec.rb +0 -2
  470. data/spec/support/examples_helper.rb +0 -10
  471. data/spec/support/streamer_test_helpers.rb +0 -6
  472. data/spec/support/wukong_widget_helpers.rb +0 -66
  473. data/spec/wukong/processor_spec.rb +0 -109
  474. data/spec/wukong/widget/filter_spec.rb +0 -99
  475. data/spec/wukong/widget/stringifier_spec.rb +0 -51
  476. data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -1,16 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path("../lib", File.realdirpath(File.dirname(__FILE__))))
3
+ require 'wukong'
4
+
5
+ Settings.use(:commandline)
6
+ Settings.define :profiler, :default => nil
7
+ Settings.resolve!
8
+
1
9
  require File.expand_path('../examples_helper', File.dirname(__FILE__))
2
10
  require Pathname.path_to(:examples, 'dataflow/apache_log_line')
3
11
 
4
- ExampleUniverse.dataflow(:parse_apache_logs) do
12
+ Wukong.dataflow(:parse_apache_logs) do
5
13
 
6
14
  doc 'Parses an apache log line into a structured model, emits it as JSON'
7
15
 
8
- input :default, file_source(Pathname.path_to(:data, 'log/sample_apache_log.log'))
9
- output :dump, stdout
16
+ source = ($0 == __FILE__) ? stdin : file_source(Pathname.path_to(:data, 'log/sample_apache_log.log'))
17
+ set_input :default, source
18
+ set_output :dump, stdout
10
19
 
11
- input(:default) >
20
+ input >
12
21
  map{|line| ApacheLogLine.make(line) or bad_record(line) } >
13
- to_json >
14
- output(:dump)
15
-
22
+ to_tsv >
23
+ output
16
24
  end
25
+
26
+ # if ($0 == __FILE__)
27
+ # flow_name = :parse_apache_logs
28
+ # if Settings.profiler
29
+ # require 'perftools'
30
+ # Pathname(Settings.profiler).dirname.mkpath
31
+ # PerfTools::CpuProfiler.start(Settings.profiler) do
32
+ # Wukong::LocalRunner.run(Wukong.dataflow(flow_name), :default)
33
+ # end
34
+ # else
35
+ # Wukong::LocalRunner.run(Wukong.dataflow(flow_name), :default)
36
+ # end
37
+ #
38
+ # # require 'jruby/profiler'
39
+ # # profile_data = JRuby::Profiler.profile do
40
+ # # Wukong::LocalRunner.run(Wukong.dataflow(flow_name), :default)
41
+ # # end
42
+ # # profile_printer = JRuby::Profiler::GraphProfilePrinter.new(profile_data)
43
+ # # profile_printer.printProfile($stderr)
44
+ #
45
+ # # Wukong::LocalRunner.run(Wukong.dataflow(flow_name), :default)
46
+ # end
@@ -1,12 +1,12 @@
1
1
  require File.expand_path('../examples_helper', File.dirname(__FILE__))
2
2
 
3
- ExampleUniverse.dataflow(:simple) do
3
+ Wukong.dataflow(:simple) do
4
4
  doc <<-DOC
5
5
  A stupidly simple dataflow: reverses each input string
6
6
  DOC
7
7
 
8
- input :default, file_source(Pathname.path_to(:data, 'text/jabberwocky.txt'))
9
- output :dump, file_sink(Pathname.path_to(:tmp, 'dataflow/simple_output.rb'))
8
+ file_source(Pathname.path_to(:data, 'text/jabberwocky.txt')) >
9
+ map{|str| str.reverse } >
10
+ file_sink(Pathname.path_to(:tmp, 'dataflow/simple_output.rb'))
10
11
 
11
- input(:default) > map{|str| str.reverse } > output(:dump)
12
12
  end
@@ -0,0 +1,4 @@
1
+
2
+ require 'gorillib/data_munging'
3
+ require_relative 'geo/geolocated'
4
+ require_relative 'geo/quadtile'
@@ -0,0 +1,331 @@
1
+ require 'gorillib/numeric/clamp'
2
+
3
+ Numeric.class_eval do
4
+ def to_radians() self.to_f * Math::PI / 180.0 ; end
5
+ def to_degrees() self.to_f * 180.0 / Math::PI ; end
6
+ end
7
+
8
+ module Wukong
9
+ #
10
+ # reference: [Bing Maps Tile System](http://msdn.microsoft.com/en-us/library/bb259689.aspx)
11
+ #
12
+ module Geolocated
13
+ module_function # call methods as eg Wukong::Geolocated.tile_xy_to_quadkey or, if included in class, on self as private methods
14
+
15
+ # field :longitude, type: Float, description: "Longitude (X) of a point, in decimal degrees"
16
+ # field :latitude, type: Float, description: "Latitude (Y) of a point, in decimal degrees"
17
+ # field :zoom_level, type: Integer, description: "Zoom level of tile to fetch. An integer between 0 (world) and 16 or so"
18
+ # field :quadkey, type: String, description: "Quadkey of tile, eg 002313012"
19
+ # field :tile_x, type: Integer, description: "Tile X index, an integer between 0 and 2^zoom_level - 1"
20
+ # field :tile_y, type: Integer, description: "Tile Y index, an integer between 0 and 2^zoom_level - 1"
21
+
22
+ module ByCoordinates
23
+ extend Gorillib::Concern
24
+
25
+ # The quadkey is a string of 2-bit tile selectors for a quadtile
26
+ #
27
+ # @example
28
+ # infochimps_hq = Geo::Place.receive("Infochimps HQ", -97.759003, 30.273884)
29
+ # infochimps_hq.quadkey(8) # => "02313012"
30
+ #
31
+ # Interesting quadkey properties:
32
+ #
33
+ # * The quadkey length is its zoom level
34
+ #
35
+ # * To zoom out (lower zoom level, larger quadtile), just truncate the
36
+ # quadkey: austin at ZL=8 has quadkey "02313012"; at ZL=3, "023"
37
+ #
38
+ # * Nearby points typically have "nearby" quadkeys: up to the smallest
39
+ # tile that contains both, their quadkeys will have a common prefix.
40
+ # If you sort your records by quadkey,
41
+ # - Nearby points are nearby-ish on disk. (hello, HBase/Cassandra
42
+ # database owners!) This allows efficient lookup and caching of
43
+ # "popular" regions or repeated queries in an area.
44
+ # - the tiles covering a region can be covered by a limited, enumerable
45
+ # set of range scans. For map-reduce programmers, this leads to very
46
+ # efficient reducers
47
+ #
48
+ # * The quadkey is the bit-interleaved combination of its tile ids:
49
+ #
50
+ # tile_x 58 binary 0 0 1 1 1 0 1 0
51
+ # tile_y 105 binary 0 1 1 0 1 0 0 1
52
+ # interleaved binary 00 10 11 01 11 00 01 10
53
+ # quadkey 0 2 3 1 3 0 1 2 # "02313012"
54
+ #
55
+ def quadkey(zl) ; Wukong::Geolocated.tile_xy_zl_to_quadkey( tile_x(zl), tile_y(zl), zl) ; end
56
+
57
+ # the packed quadkey is the integer formed by interleaving the bits of tile_x with tile_y:
58
+ #
59
+ # tile_x 58 binary 0 0 1 1 1 0 1 0
60
+ # tile_y 105 binary 0 1 1 0 1 0 0 1
61
+ # interleaved binary 00 10 11 01 11 00 01 10
62
+ # quadkey 0 2 3 1 3 0 1 2 # "02313012"
63
+ #
64
+ # (see `quadkey` for more.)
65
+ #
66
+ # At zoom level 15, the packed quadkey is a 30-bit unsigned integer --
67
+ # meaning you can store it in a pig `int`; for languages with an `unsigned
68
+ # int` type, you can go to zoom level 16 before you have to use a
69
+ # less-efficient type. Zoom level 15 has a resolution of about one tile
70
+ # per kilometer (about 1.25 km/tile near the equator; 0.75 km/tile at
71
+ # London's latitude). It takes 1 billion tiles to tile the world at that
72
+ # scale. Ruby's integer type goes up to 60 bits, enough for any practical
73
+ # zoom level.
74
+ #
75
+ def packed_qk ; Wukong::Geolocated.tile_xy_zl_to_packed_qk(tile_x(zl), tile_y(zl), zl) ; end
76
+
77
+ # @return [Float] x index of the tile this object lies on at given zoom level
78
+ def tile_xf(zl) ; Wukong::Geolocated.lng_zl_to_tile_xf(longitude, zl) ; end
79
+ # @return [Float] y index of the tile this object lies on at given zoom level
80
+ def tile_yf(zl) ; Wukong::Geolocated.lat_zl_to_tile_yf(latitude, zl) ; end
81
+ # @return [Integer] x index of the tile this object lies on at given zoom level
82
+ def tile_x(zl) ; tile_xf(zl).floor ; end
83
+ # @return [Integer] y index of the tile this object lies on at given zoom level
84
+ def tile_y(zl) ; tile_yf(zl).floor ; end
85
+
86
+ # @return [Float] tile coordinates `(x,y)` for this object at given zoom level
87
+ def tile_xy(zl) ; [tile_x(xl), tile_y(zl)] ; end
88
+
89
+ # @returns [Array<Numeric, Numeric>] a `[longitude, latitude]` pair representing object as a point.
90
+ def lng_lat ; [longitude, latitude] ; end
91
+
92
+ # @returns [left, btm, right, top]
93
+ def bbox_for_radius(radius) ; Wukong::Geolocated.lng_lat_rad_to_bbox(longitude, latitude, radius) ; end
94
+ end
95
+
96
+ EARTH_RADIUS = 6371000 # meters
97
+ MIN_LONGITUDE = -180
98
+ MAX_LONGITUDE = 180
99
+ MIN_LATITUDE = -85.05112878
100
+ MAX_LATITUDE = 85.05112878
101
+ ALLOWED_LONGITUDE = (MIN_LONGITUDE..MAX_LONGITUDE)
102
+ ALLOWED_LATITUDE = (MIN_LATITUDE..MAX_LATITUDE)
103
+ TILE_PIXEL_SIZE = 256
104
+
105
+ # Width or height in number of tiles
106
+ def map_tile_size(zl)
107
+ 1 << zl
108
+ end
109
+
110
+ #
111
+ # Tile coordinates
112
+ #
113
+
114
+ # Convert longitude in degrees to _floating-point_ tile x,y coordinates at given zoom level
115
+ def lng_zl_to_tile_xf(longitude, zl)
116
+ raise ArgumentError, "longitude must be within bounds ((#{longitude}) vs #{ALLOWED_LONGITUDE})" unless (ALLOWED_LONGITUDE.include?(longitude))
117
+ xx = (longitude.to_f + 180.0) / 360.0
118
+ (map_tile_size(zl) * xx)
119
+ end
120
+
121
+ # Convert latitude in degrees to _floating-point_ tile x,y coordinates at given zoom level
122
+ def lat_zl_to_tile_yf(latitude, zl)
123
+ raise ArgumentError, "latitude must be within bounds ((#{latitude}) vs #{ALLOWED_LATITUDE})" unless (ALLOWED_LATITUDE.include?(latitude))
124
+ sin_lat = Math.sin(latitude.to_radians)
125
+ yy = Math.log((1 + sin_lat) / (1 - sin_lat)) / (4 * Math::PI)
126
+ (map_tile_size(zl) * (0.5 - yy))
127
+ end
128
+
129
+ # Convert latitude in degrees to integer tile x,y coordinates at given zoom level
130
+ def lng_lat_zl_to_tile_xy(longitude, latitude, zl)
131
+ [lng_zl_to_tile_xf(longitude, zl).floor, lat_zl_to_tile_yf(latitude, zl).floor]
132
+ end
133
+
134
+ # Convert from tile_x, tile_y, zoom level to longitude and latitude in
135
+ # degrees (slight loss of precision).
136
+ #
137
+ # Tile coordinates may be floats or integer; they must lie within map range.
138
+ def tile_xy_zl_to_lng_lat(tile_x, tile_y, zl)
139
+ tile_size = map_tile_size(zl)
140
+ raise ArgumentError, "tile index must be within bounds ((#{tile_x},#{tile_y}) vs #{tile_size})" unless ((0..(tile_size-1)).include?(tile_x)) && ((0..(tile_size-1)).include?(tile_x))
141
+ xx = (tile_x.to_f / tile_size)
142
+ yy = 0.5 - (tile_y.to_f / tile_size)
143
+ lng = 360.0 * xx - 180.0
144
+ lat = 90 - 360 * Math.atan(Math.exp(-yy * 2 * Math::PI)) / Math::PI
145
+ [lng, lat]
146
+ end
147
+
148
+ #
149
+ # Quadkey coordinates
150
+ #
151
+
152
+ # converts from even/odd state of tile x and tile y to quadkey. NOTE: bit order means y, x
153
+ BIT_TO_QUADKEY = { [false, false] => "0", [false, true] => "1", [true, false] => "2", [true, true] => "3", }
154
+ # converts from quadkey char to bits. NOTE: bit order means y, x
155
+ QUADKEY_TO_BIT = { "0" => [0,0], "1" => [0,1], "2" => [1,0], "3" => [1,1]}
156
+
157
+ # Convert from tile x,y into a quadkey at a specified zoom level
158
+ def tile_xy_zl_to_quadkey(tile_x, tile_y, zl)
159
+ quadkey_chars = []
160
+ tx = tile_x.to_i
161
+ ty = tile_y.to_i
162
+ zl.times do
163
+ quadkey_chars.push BIT_TO_QUADKEY[[ty.odd?, tx.odd?]] # bit order y,x
164
+ tx >>= 1 ; ty >>= 1
165
+ end
166
+ quadkey_chars.join.reverse
167
+ end
168
+
169
+ # Convert a quadkey into tile x,y coordinates and level
170
+ def quadkey_to_tile_xy_zl(quadkey)
171
+ raise ArgumentError, "Quadkey must contain only the characters 0, 1, 2 or 3: #{quadkey}!" unless quadkey =~ /\A[0-3]*\z/
172
+ zl = quadkey.to_s.length
173
+ tx = 0 ; ty = 0
174
+ quadkey.chars.each do |char|
175
+ ybit, xbit = QUADKEY_TO_BIT[char] # bit order y, x
176
+ tx = (tx << 1) + xbit
177
+ ty = (ty << 1) + ybit
178
+ end
179
+ [tx, ty, zl]
180
+ end
181
+
182
+ # Convert from tile x,y into a packed quadkey at a specified zoom level
183
+ def tile_xy_zl_to_packed_qk(tile_x, tile_y, zl)
184
+ # don't optimize unless you're sure your way is faster; string ops are
185
+ # faster than you think and loops are slower than you think
186
+ quadkey_str = tile_xy_zl_to_quadkey(tile_x, tile_y, zl)
187
+ quadkey_str.to_i(4)
188
+ end
189
+
190
+ # Convert a packed quadkey (integer) into tile x,y coordinates and level
191
+ def packed_qk_zl_to_tile_xy(packed_qk, zl=16)
192
+ # don't "optimize" this without testing... string operations are faster than you think in ruby
193
+ raise ArgumentError, "Quadkey must be an integer in range of the zoom level: #{packed_qk}, #{zl}" unless packed_qk.is_a?(Fixnum) && (packed_qk < 2 ** (zl*2))
194
+ quadkey_rhs = packed_qk.to_s(4)
195
+ quadkey = ("0" * (zl - quadkey_rhs.length)) << quadkey_rhs
196
+ quadkey_to_tile_xy_zl(quadkey)
197
+ end
198
+
199
+ # Convert a lat/lng and zoom level into a quadkey
200
+ def lng_lat_zl_to_quadkey(longitude, latitude, zl)
201
+ tile_x, tile_y = lng_lat_zl_to_tile_xy(longitude, latitude, zl)
202
+ tile_xy_zl_to_quadkey(tile_x, tile_y, zl)
203
+ end
204
+
205
+ #
206
+ # Bounding box coordinates
207
+ #
208
+
209
+ # Convert a quadkey into a bounding box using adjacent tile
210
+ def quadkey_to_bbox(quadkey)
211
+ tile_x, tile_y, zl = quadkey_to_tile_xy_zl(quadkey)
212
+ # bottom right of me is top left of my southeast neighbor
213
+ left, top = tile_xy_zl_to_lng_lat(tile_x, tile_y, zl)
214
+ right, btm = tile_xy_zl_to_lng_lat(tile_x + 1, tile_y + 1, zl)
215
+ [left, btm, right, top]
216
+ end
217
+
218
+ # Retuns the smallest quadkey containing both of corners of the given bounding box
219
+ def quadkey_containing_bbox(left, btm, right, top)
220
+ qk_tl = lng_lat_zl_to_quadkey(left, top, 23)
221
+ qk_2 = lng_lat_zl_to_quadkey(right, btm, 23)
222
+ # the containing qk is the longest one that both agree on
223
+ containing_key = ""
224
+ qk_tl.chars.zip(qk_2.chars).each do |char_tl, char_2|
225
+ break if char_tl != char_2
226
+ containing_key << char_tl
227
+ end
228
+ containing_key
229
+ end
230
+
231
+ # Returns a bounding box containing the circle created by the lat/lng and radius
232
+ def lng_lat_rad_to_bbox(longitude, latitude, radius)
233
+ left, _ = point_east( longitude, latitude, -radius)
234
+ _, btm = point_north(longitude, latitude, -radius)
235
+ right, _ = point_east( longitude, latitude, radius)
236
+ _, top = point_north(longitude, latitude, radius)
237
+ [left, btm, right, top]
238
+ end
239
+
240
+ # Returns the centroid of a bounding box
241
+ #
242
+ # @param [Array<Float, Float>] left_btm Longitude, Latitude of SW point
243
+ # @param [Array<Float, Float>] right_top Longitude, Latitude of NE point
244
+ #
245
+ # @return [Array<Float, Float>] Longitude, Latitude of centroid
246
+ def bbox_centroid(left_btm, right_top)
247
+ haversine_midpoint(*left_btm, *right_top)
248
+ end
249
+
250
+ # Return the haversine distance in meters between two points
251
+ def haversine_distance(left, btm, right, top)
252
+ delta_lng = (right - left).abs.to_radians
253
+ delta_lat = (top - btm ).abs.to_radians
254
+ btm_rad = btm.to_radians
255
+ top_rad = top.to_radians
256
+
257
+ aa = (Math.sin(delta_lat / 2.0))**2 + Math.cos(top_rad) * Math.cos(btm_rad) * (Math.sin(delta_lng / 2.0))**2
258
+ cc = 2.0 * Math.atan2(Math.sqrt(aa), Math.sqrt(1.0 - aa))
259
+ cc * EARTH_RADIUS
260
+ end
261
+
262
+ # Return the haversine midpoint in meters between two points
263
+ def haversine_midpoint(left, btm, right, top)
264
+ cos_btm = Math.cos(btm.to_radians)
265
+ cos_top = Math.cos(top.to_radians)
266
+ bearing_x = cos_btm * Math.cos((right - left).to_radians)
267
+ bearing_y = cos_btm * Math.sin((right - left).to_radians)
268
+ mid_lat = Math.atan2(
269
+ (Math.sin(top.to_radians) + Math.sin(btm.to_radians)),
270
+ (Math.sqrt((cos_top + bearing_x)**2 + bearing_y**2)))
271
+ mid_lng = left.to_radians + Math.atan2(bearing_y, (cos_top + bearing_x))
272
+ [mid_lng.to_degrees, mid_lat.to_degrees]
273
+ end
274
+
275
+ # From a given point, calculate the point directly north a specified distance
276
+ def point_north(longitude, latitude, distance)
277
+ north_lat = (latitude.to_radians + (distance.to_f / EARTH_RADIUS)).to_degrees
278
+ [longitude, north_lat]
279
+ end
280
+
281
+ # From a given point, calculate the change in degrees directly east a given distance
282
+ def point_east(longitude, latitude, distance)
283
+ radius = EARTH_RADIUS * Math.sin(((Math::PI / 2.0) - latitude.to_radians.abs))
284
+ east_lng = (longitude.to_radians + (distance.to_f / radius)).to_degrees
285
+ [east_lng, latitude]
286
+ end
287
+
288
+ #
289
+ # Pixel coordinates
290
+ #
291
+ # Use with a standard (256x256 pixel) grid-based tileserver
292
+ #
293
+
294
+ # Width or height of grid bitmap in pixels at given zoom level
295
+ def map_pixel_size(zl)
296
+ TILE_PIXEL_SIZE * map_tile_size(zl)
297
+ end
298
+
299
+ # Return pixel resolution in meters per pixel at a specified latitude and zoom level
300
+ def pixel_resolution(latitude, zl)
301
+ lat = latitude.clamp(MIN_LATITUDE, MAX_LATITUDE)
302
+ Math.cos(lat.to_radians) * 2 * Math::PI * EARTH_RADIUS / map_pixel_size(zl).to_f
303
+ end
304
+
305
+ # Map scale at a specified latitude, zoom level, & screen resolution in dpi
306
+ def map_scale_for_dpi(latitude, zl, screen_dpi)
307
+ pixel_resolution(latitude, zl) * screen_dpi / 0.0254
308
+ end
309
+
310
+ # Convert from x,y pixel pair into tile x,y coordinates
311
+ def pixel_xy_to_tile_xy(pixel_x, pixel_y)
312
+ [pixel_x / TILE_PIXEL_SIZE, pixel_y / TILE_PIXEL_SIZE]
313
+ end
314
+
315
+ # Convert from x,y tile pair into pixel x,y coordinates (top left corner)
316
+ def tile_xy_to_pixel_xy(tile_x, tile_y)
317
+ [tile_x * TILE_PIXEL_SIZE, tile_y * TILE_PIXEL_SIZE]
318
+ end
319
+
320
+ def pixel_xy_zl_to_lng_lat(pixel_x, pixel_y, zl)
321
+ tile_xy_zl_to_lng_lat(pixel_x.to_f / TILE_PIXEL_SIZE, pixel_y.to_f / TILE_PIXEL_SIZE, zl)
322
+ end
323
+
324
+ def lng_lat_zl_to_pixel_xy(lng, lat, zl)
325
+ pixel_x = lng_zl_to_tile_xf(lng, zl)
326
+ pixel_y = lat_zl_to_tile_yf(lat, zl)
327
+ [(pixel_x * TILE_PIXEL_SIZE + 0.5).floor, (pixel_y * TILE_PIXEL_SIZE + 0.5).floor]
328
+ end
329
+
330
+ end
331
+ end
@@ -0,0 +1,69 @@
1
+ module Wukong
2
+ module Geo
3
+ class Quadtile
4
+ include Gorillib::Model
5
+ #
6
+ field :tile_x, Integer, position: 0, doc: "Tile X index, an integer between 0 and 2^zoom_level - 1"
7
+ field :tile_y, Integer, position: 1, doc: "Tile Y index, an integer between 0 and 2^zoom_level - 1"
8
+ field :zl, Integer, position: 2, doc: "Zoom level of tile to fetch. 0 is the world; 16 is about a kilometer."
9
+ field :slug, String, default: 'tile', doc: "Name, prefixed on saved tiles"
10
+
11
+ def quadkey ; Wukong::Geolocated.tile_xy_zl_to_quadkey( tile_x, tile_y, zl) ; end
12
+ def packed_qk ; Wukong::Geolocated.tile_xy_zl_to_packed_qk(tile_x, tile_y, zl) ; end
13
+
14
+ # Base of URL for map tile server; anything X/Y/Z.png-addressable works,
15
+ # eg `http://b.tile.openstreetmap.org`. Defaults to 'http://b.tile.stamen.com/toner-lite'`.
16
+ class_attribute :tileserver_url_base
17
+ self.tileserver_url_base = 'http://a.tile.stamen.com/toner-lite'
18
+
19
+ def self.from_whatever(hsh)
20
+ zl = hsh[:zl] ? hsh[:zl].to_i : nil
21
+ case
22
+ when hsh[:tile_x].present? && hsh[:tile_y].present? && zl.present?
23
+ tile_x, tile_y = [hsh[:tile_x], hsh[:tile_y]]
24
+ when hsh[:longitude].present? && hsh[:latitude].present? && zl.present?
25
+ tile_x, tile_y = Wukong::Geolocated.lng_lat_zl_to_tile_xy(hsh[:longitude], hsh[:latitude], zl)
26
+ when hsh[:quadkey].present?
27
+ quadkey = hsh[:quadkey]
28
+ quadkey = quadkey[0..zl] if zl.to_i > 0
29
+ tile_x, tile_y, zl = Wukong::Geolocated.quadkey_to_tile_xy_zl(quadkey)
30
+ else
31
+ raise ArgumentError, "You must supply keys for either `:longitude`, `:latitude` and `:zl`; `:tile_x`, `:tile_y` and `:zl`; or `:quadkey`: #{hsh.inspect}"
32
+ end
33
+ return new(tile_x, tile_y, zl, hsh.to_hash)
34
+ end
35
+
36
+ def self.tileserver_conn
37
+ @tileserver_conn = Faraday.new(:url => tileserver_url_base)
38
+ end
39
+
40
+ def tile_url
41
+ [tileserver_url_base, zl, tile_x, tile_y].join('/') << ".png"
42
+ end
43
+
44
+ # A
45
+ #
46
+ # @example
47
+ # qt = Quadtile.from_whatever(longitude: -97.759003, latitude: 30.273884, zl: 15)
48
+ # qt.slug # tile-15-64587
49
+ #
50
+ #
51
+ # @returns [String]
52
+ def basename(options={})
53
+ options = { sep: '-', ext: 'png'}
54
+ sep = options[:sep]
55
+ # "%s%s%02d%s%04d%s%04d.%s" % [slug, sep, zl, sep, tile_x, sep, tile_y, options[:ext]]
56
+ "%s/%02d/%s%s%s.%s" % [slug, zl, slug, sep, quadkey, options[:ext]]
57
+ end
58
+
59
+ # Fetch the contents of a map tile from a tileserver
60
+ #
61
+ # You are responsible for requiring the faraday library and its adapter
62
+ #
63
+ def fetch
64
+ self.class.tileserver_conn.get(tile_url)
65
+ end
66
+
67
+ end
68
+ end
69
+ end