wukong 3.0.0.pre → 3.0.0.pre2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +46 -33
- data/.gitmodules +3 -0
- data/.rspec +1 -1
- data/.travis.yml +8 -1
- data/.yardopts +0 -13
- data/Guardfile +4 -6
- data/{LICENSE.textile → LICENSE.md} +43 -55
- data/README-old.md +422 -0
- data/README.md +279 -418
- data/Rakefile +21 -5
- data/TODO.md +6 -6
- data/bin/wu-clean-encoding +31 -0
- data/bin/wu-lign +2 -2
- data/bin/wu-local +69 -0
- data/bin/wu-server +70 -0
- data/examples/Gemfile +38 -0
- data/examples/README.md +9 -0
- data/examples/dataflow/apache_log_line.rb +64 -25
- data/examples/dataflow/fibonacci_series.rb +101 -0
- data/examples/dataflow/parse_apache_logs.rb +37 -7
- data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
- data/examples/dataflow/simple.rb +4 -4
- data/examples/geo.rb +4 -0
- data/examples/geo/geo_grids.numbers +0 -0
- data/examples/geo/geolocated.rb +331 -0
- data/examples/geo/quadtile.rb +69 -0
- data/examples/geo/spec/geolocated_spec.rb +247 -0
- data/examples/geo/tile_fetcher.rb +77 -0
- data/examples/graph/minimum_spanning_tree.rb +61 -61
- data/examples/jabberwocky.txt +36 -0
- data/examples/models/wikipedia.rb +20 -0
- data/examples/munging/Gemfile +8 -0
- data/examples/munging/airline_flights/airline.rb +57 -0
- data/examples/munging/airline_flights/airline_flights.rake +83 -0
- data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
- data/examples/munging/airline_flights/airport.rb +211 -0
- data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
- data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
- data/examples/munging/airline_flights/flight.rb +156 -0
- data/examples/munging/airline_flights/models.rb +4 -0
- data/examples/munging/airline_flights/parse.rb +26 -0
- data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
- data/examples/munging/airline_flights/route.rb +35 -0
- data/examples/munging/airline_flights/tasks.rake +83 -0
- data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
- data/examples/munging/airline_flights/topcities.rb +167 -0
- data/examples/munging/airports/40_wbans.txt +40 -0
- data/examples/munging/airports/filter_weather_reports.rb +37 -0
- data/examples/munging/airports/join.pig +31 -0
- data/examples/munging/airports/to_tsv.rb +33 -0
- data/examples/munging/airports/usa_wbans.pig +19 -0
- data/examples/munging/airports/usa_wbans.txt +2157 -0
- data/examples/munging/airports/wbans.pig +19 -0
- data/examples/munging/airports/wbans.txt +2310 -0
- data/examples/munging/geo/geo_json.rb +54 -0
- data/examples/munging/geo/geo_models.rb +69 -0
- data/examples/munging/geo/geonames_models.rb +78 -0
- data/examples/munging/geo/iso_codes.rb +172 -0
- data/examples/munging/geo/reconcile_countries.rb +124 -0
- data/examples/munging/geo/tasks.rake +71 -0
- data/examples/munging/rake_helper.rb +62 -0
- data/examples/munging/weather/.gitignore +1 -0
- data/examples/munging/weather/Gemfile +4 -0
- data/examples/munging/weather/Rakefile +28 -0
- data/examples/munging/weather/extract_ish.rb +13 -0
- data/examples/munging/weather/models/weather.rb +119 -0
- data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
- data/examples/munging/wikipedia/README.md +34 -0
- data/examples/munging/wikipedia/Rakefile +193 -0
- data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
- data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
- data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
- data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
- data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
- data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
- data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
- data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
- data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
- data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
- data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
- data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
- data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
- data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
- data/examples/munging/wikipedia/pig_style_guide.md +25 -0
- data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
- data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
- data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
- data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
- data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
- data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
- data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
- data/examples/munging/wikipedia/utils/namespaces.json +1 -0
- data/examples/rake_helper.rb +85 -0
- data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
- data/examples/server_logs/logline.rb +95 -0
- data/examples/server_logs/models.rb +66 -0
- data/examples/server_logs/page_counts.pig +48 -0
- data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
- data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
- data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
- data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
- data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
- data/examples/string_reverser.rb +26 -0
- data/examples/text/pig_latin.rb +2 -2
- data/examples/text/regional_flavor/README.md +14 -0
- data/examples/text/regional_flavor/article_wordbags.pig +39 -0
- data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
- data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
- data/examples/word_count/accumulator.rb +26 -0
- data/examples/word_count/tokenizer.rb +13 -0
- data/examples/word_count/word_count.rb +6 -0
- data/examples/workflow/cherry_pie.dot +97 -0
- data/examples/workflow/cherry_pie.png +0 -0
- data/examples/workflow/cherry_pie.rb +61 -26
- data/lib/hanuman.rb +34 -7
- data/lib/hanuman/graph.rb +55 -31
- data/lib/hanuman/graphvizzer.rb +199 -178
- data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
- data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
- data/lib/hanuman/link.rb +35 -0
- data/lib/hanuman/registry.rb +46 -0
- data/lib/hanuman/stage.rb +76 -32
- data/lib/wukong.rb +23 -24
- data/lib/wukong/boot.rb +87 -0
- data/lib/wukong/configuration.rb +8 -0
- data/lib/wukong/dataflow.rb +45 -78
- data/lib/wukong/driver.rb +99 -0
- data/lib/wukong/emitter.rb +22 -0
- data/lib/wukong/model/faker.rb +24 -24
- data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
- data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
- data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
- data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
- data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
- data/lib/wukong/processor.rb +60 -114
- data/lib/wukong/spec_helpers.rb +81 -0
- data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
- data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
- data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
- data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
- data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
- data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
- data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
- data/lib/wukong/version.rb +2 -1
- data/lib/wukong/widget/filters.rb +311 -0
- data/lib/wukong/widget/processors.rb +156 -0
- data/lib/wukong/widget/reducers.rb +7 -0
- data/lib/wukong/widget/reducers/accumulator.rb +73 -0
- data/lib/wukong/widget/reducers/bin.rb +318 -0
- data/lib/wukong/widget/reducers/count.rb +61 -0
- data/lib/wukong/widget/reducers/group.rb +85 -0
- data/lib/wukong/widget/reducers/group_concat.rb +70 -0
- data/lib/wukong/widget/reducers/moments.rb +72 -0
- data/lib/wukong/widget/reducers/sort.rb +130 -0
- data/lib/wukong/widget/serializers.rb +287 -0
- data/lib/wukong/widget/sink.rb +10 -52
- data/lib/wukong/widget/source.rb +7 -113
- data/lib/wukong/widget/utils.rb +46 -0
- data/lib/wukong/widgets.rb +6 -0
- data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
- data/spec/examples/dataflow/parsing_spec.rb +12 -11
- data/spec/examples/dataflow/simple_spec.rb +32 -6
- data/spec/examples/dataflow/telegram_spec.rb +36 -36
- data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
- data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
- data/spec/examples/munging/airline_flights_spec.rb +202 -0
- data/spec/examples/text/pig_latin_spec.rb +13 -16
- data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
- data/spec/hanuman/graph_spec.rb +27 -2
- data/spec/hanuman/hanuman_spec.rb +10 -0
- data/spec/hanuman/registry_spec.rb +123 -0
- data/spec/hanuman/stage_spec.rb +61 -7
- data/spec/spec_helper.rb +29 -19
- data/spec/support/hanuman_test_helpers.rb +14 -12
- data/spec/support/shared_context_for_reducers.rb +37 -0
- data/spec/support/shared_examples_for_builders.rb +101 -0
- data/spec/support/shared_examples_for_shortcuts.rb +57 -0
- data/spec/support/wukong_test_helpers.rb +37 -11
- data/spec/wukong/dataflow_spec.rb +77 -55
- data/spec/wukong/local_runner_spec.rb +24 -24
- data/spec/wukong/model/faker_spec.rb +132 -131
- data/spec/wukong/runner_spec.rb +8 -8
- data/spec/wukong/widget/filters_spec.rb +61 -0
- data/spec/wukong/widget/processors_spec.rb +126 -0
- data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
- data/spec/wukong/widget/reducers/count_spec.rb +11 -0
- data/spec/wukong/widget/reducers/group_spec.rb +20 -0
- data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
- data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
- data/spec/wukong/widget/serializers_spec.rb +92 -0
- data/spec/wukong/widget/sink_spec.rb +15 -15
- data/spec/wukong/widget/source_spec.rb +65 -41
- data/spec/wukong/wukong_spec.rb +10 -0
- data/wukong.gemspec +17 -10
- metadata +359 -335
- data/.document +0 -5
- data/VERSION +0 -1
- data/bin/hdp-bin +0 -44
- data/bin/hdp-bzip +0 -23
- data/bin/hdp-cat +0 -3
- data/bin/hdp-catd +0 -3
- data/bin/hdp-cp +0 -3
- data/bin/hdp-du +0 -86
- data/bin/hdp-get +0 -3
- data/bin/hdp-kill +0 -3
- data/bin/hdp-kill-task +0 -3
- data/bin/hdp-ls +0 -11
- data/bin/hdp-mkdir +0 -2
- data/bin/hdp-mkdirp +0 -12
- data/bin/hdp-mv +0 -3
- data/bin/hdp-parts_to_keys.rb +0 -77
- data/bin/hdp-ps +0 -3
- data/bin/hdp-put +0 -3
- data/bin/hdp-rm +0 -32
- data/bin/hdp-sort +0 -40
- data/bin/hdp-stream +0 -40
- data/bin/hdp-stream-flat +0 -22
- data/bin/hdp-stream2 +0 -39
- data/bin/hdp-sync +0 -17
- data/bin/hdp-wc +0 -67
- data/bin/wu-flow +0 -10
- data/bin/wu-map +0 -17
- data/bin/wu-red +0 -17
- data/bin/wukong +0 -17
- data/data/CREDITS.md +0 -355
- data/data/graph/airfares.tsv +0 -2174
- data/data/text/gift_of_the_magi.txt +0 -225
- data/data/text/jabberwocky.txt +0 -36
- data/data/text/rectification_of_names.txt +0 -33
- data/data/twitter/a_atsigns_b.tsv +0 -64
- data/data/twitter/a_follows_b.tsv +0 -53
- data/data/twitter/tweet.tsv +0 -167
- data/data/twitter/twitter_user.tsv +0 -55
- data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
- data/docpages/INSTALL.textile +0 -92
- data/docpages/LICENSE.textile +0 -107
- data/docpages/README-elastic_map_reduce.textile +0 -377
- data/docpages/README-performance.textile +0 -90
- data/docpages/README-wulign.textile +0 -65
- data/docpages/UsingWukong-part1-get_ready.textile +0 -17
- data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
- data/docpages/UsingWukong-part3-parsing.textile +0 -138
- data/docpages/_config.yml +0 -39
- data/docpages/avro/avro_notes.textile +0 -56
- data/docpages/avro/performance.textile +0 -36
- data/docpages/avro/tethering.textile +0 -19
- data/docpages/bigdata-tips.textile +0 -143
- data/docpages/code/api_response_example.txt +0 -20
- data/docpages/code/parser_skeleton.rb +0 -38
- data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
- data/docpages/favicon.ico +0 -0
- data/docpages/gem.css +0 -16
- data/docpages/hadoop-tips.textile +0 -83
- data/docpages/index.textile +0 -92
- data/docpages/intro.textile +0 -8
- data/docpages/moreinfo.textile +0 -174
- data/docpages/news.html +0 -24
- data/docpages/pig/PigLatinExpressionsList.txt +0 -122
- data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
- data/docpages/pig/commandline_params.txt +0 -26
- data/docpages/pig/cookbook.html +0 -481
- data/docpages/pig/images/hadoop-logo.jpg +0 -0
- data/docpages/pig/images/instruction_arrow.png +0 -0
- data/docpages/pig/images/pig-logo.gif +0 -0
- data/docpages/pig/piglatin_ref1.html +0 -1103
- data/docpages/pig/piglatin_ref2.html +0 -14340
- data/docpages/pig/setup.html +0 -505
- data/docpages/pig/skin/basic.css +0 -166
- data/docpages/pig/skin/breadcrumbs.js +0 -237
- data/docpages/pig/skin/fontsize.js +0 -166
- data/docpages/pig/skin/getBlank.js +0 -40
- data/docpages/pig/skin/getMenu.js +0 -45
- data/docpages/pig/skin/images/chapter.gif +0 -0
- data/docpages/pig/skin/images/chapter_open.gif +0 -0
- data/docpages/pig/skin/images/current.gif +0 -0
- data/docpages/pig/skin/images/external-link.gif +0 -0
- data/docpages/pig/skin/images/header_white_line.gif +0 -0
- data/docpages/pig/skin/images/page.gif +0 -0
- data/docpages/pig/skin/images/pdfdoc.gif +0 -0
- data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/print.css +0 -54
- data/docpages/pig/skin/profile.css +0 -181
- data/docpages/pig/skin/screen.css +0 -587
- data/docpages/pig/tutorial.html +0 -1059
- data/docpages/pig/udf.html +0 -1509
- data/docpages/tutorial.textile +0 -283
- data/docpages/usage.textile +0 -195
- data/docpages/wutils.textile +0 -263
- data/examples/dataflow/complex.rb +0 -11
- data/examples/dataflow/donuts.rb +0 -13
- data/examples/tiny_count/jabberwocky_output.tsv +0 -92
- data/examples/word_count.rb +0 -48
- data/examples/workflow/fiddle.rb +0 -24
- data/lib/away/escapement.rb +0 -129
- data/lib/away/exe.rb +0 -11
- data/lib/away/experimental.rb +0 -5
- data/lib/away/from_file.rb +0 -52
- data/lib/away/job.rb +0 -56
- data/lib/away/job/rake_compat.rb +0 -17
- data/lib/away/registry.rb +0 -79
- data/lib/away/runner.rb +0 -276
- data/lib/away/runner/execute.rb +0 -121
- data/lib/away/script.rb +0 -161
- data/lib/away/script/hadoop_command.rb +0 -240
- data/lib/away/source/file_list_source.rb +0 -15
- data/lib/away/source/looper.rb +0 -18
- data/lib/away/task.rb +0 -219
- data/lib/hanuman/action.rb +0 -21
- data/lib/hanuman/chain.rb +0 -4
- data/lib/hanuman/graphviz.rb +0 -74
- data/lib/hanuman/resource.rb +0 -6
- data/lib/hanuman/slot.rb +0 -87
- data/lib/hanuman/slottable.rb +0 -220
- data/lib/wukong/bad_record.rb +0 -15
- data/lib/wukong/event.rb +0 -44
- data/lib/wukong/local_runner.rb +0 -55
- data/lib/wukong/mapred.rb +0 -3
- data/lib/wukong/universe.rb +0 -48
- data/lib/wukong/widget/filter.rb +0 -81
- data/lib/wukong/widget/gibberish.rb +0 -123
- data/lib/wukong/widget/monitor.rb +0 -26
- data/lib/wukong/widget/reducer.rb +0 -66
- data/lib/wukong/widget/stringifier.rb +0 -50
- data/lib/wukong/workflow.rb +0 -22
- data/lib/wukong/workflow/command.rb +0 -42
- data/old/config/emr-example.yaml +0 -48
- data/old/examples/README.txt +0 -17
- data/old/examples/contrib/jeans/README.markdown +0 -165
- data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
- data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
- data/old/examples/contrib/jeans/data/sizes +0 -3
- data/old/examples/contrib/jeans/normalize.rb +0 -20
- data/old/examples/contrib/jeans/sizes.rb +0 -55
- data/old/examples/corpus/bnc_word_freq.rb +0 -44
- data/old/examples/corpus/bucket_counter.rb +0 -47
- data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
- data/old/examples/corpus/sentence_bigrams.rb +0 -53
- data/old/examples/corpus/sentence_coocurrence.rb +0 -66
- data/old/examples/corpus/stopwords.rb +0 -138
- data/old/examples/corpus/words_to_bigrams.rb +0 -53
- data/old/examples/emr/README.textile +0 -110
- data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
- data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
- data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
- data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
- data/old/examples/network_graph/adjacency_list.rb +0 -74
- data/old/examples/network_graph/breadth_first_search.rb +0 -72
- data/old/examples/network_graph/gen_2paths.rb +0 -68
- data/old/examples/network_graph/gen_multi_edge.rb +0 -112
- data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
- data/old/examples/pagerank/README.textile +0 -6
- data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
- data/old/examples/pagerank/pagerank.rb +0 -72
- data/old/examples/pagerank/pagerank_initialize.rb +0 -42
- data/old/examples/pagerank/run_pagerank.sh +0 -21
- data/old/examples/sample_records.rb +0 -33
- data/old/examples/server_logs/apache_log_parser.rb +0 -15
- data/old/examples/server_logs/nook.rb +0 -48
- data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
- data/old/examples/server_logs/user_agent.rb +0 -40
- data/old/examples/simple_word_count.rb +0 -82
- data/old/examples/size.rb +0 -61
- data/old/examples/stats/avg_value_frequency.rb +0 -86
- data/old/examples/stats/binning_percentile_estimator.rb +0 -140
- data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
- data/old/examples/stats/rank_and_bin.rb +0 -173
- data/old/examples/stupidly_simple_filter.rb +0 -40
- data/old/examples/word_count.rb +0 -75
- data/old/graph/graphviz_builder.rb +0 -580
- data/old/graph_easy/Attributes.pm +0 -4181
- data/old/graph_easy/Graphviz.pm +0 -2232
- data/old/wukong.rb +0 -18
- data/old/wukong/and_pig.rb +0 -38
- data/old/wukong/bad_record.rb +0 -18
- data/old/wukong/datatypes.rb +0 -24
- data/old/wukong/datatypes/enum.rb +0 -127
- data/old/wukong/datatypes/fake_types.rb +0 -17
- data/old/wukong/decorator.rb +0 -28
- data/old/wukong/encoding/asciize.rb +0 -108
- data/old/wukong/extensions.rb +0 -16
- data/old/wukong/extensions/array.rb +0 -18
- data/old/wukong/extensions/blank.rb +0 -93
- data/old/wukong/extensions/class.rb +0 -189
- data/old/wukong/extensions/date_time.rb +0 -53
- data/old/wukong/extensions/emittable.rb +0 -69
- data/old/wukong/extensions/enumerable.rb +0 -79
- data/old/wukong/extensions/hash.rb +0 -167
- data/old/wukong/extensions/hash_keys.rb +0 -16
- data/old/wukong/extensions/hash_like.rb +0 -150
- data/old/wukong/extensions/hashlike_class.rb +0 -47
- data/old/wukong/extensions/module.rb +0 -2
- data/old/wukong/extensions/pathname.rb +0 -27
- data/old/wukong/extensions/string.rb +0 -65
- data/old/wukong/extensions/struct.rb +0 -17
- data/old/wukong/extensions/symbol.rb +0 -11
- data/old/wukong/filename_pattern.rb +0 -74
- data/old/wukong/helper.rb +0 -7
- data/old/wukong/helper/stopwords.rb +0 -195
- data/old/wukong/helper/tokenize.rb +0 -35
- data/old/wukong/logger.rb +0 -38
- data/old/wukong/periodic_monitor.rb +0 -72
- data/old/wukong/schema.rb +0 -269
- data/old/wukong/script.rb +0 -286
- data/old/wukong/script/avro_command.rb +0 -5
- data/old/wukong/script/cassandra_loader_script.rb +0 -40
- data/old/wukong/script/emr_command.rb +0 -168
- data/old/wukong/script/hadoop_command.rb +0 -237
- data/old/wukong/script/local_command.rb +0 -41
- data/old/wukong/store.rb +0 -10
- data/old/wukong/store/base.rb +0 -27
- data/old/wukong/store/cassandra.rb +0 -10
- data/old/wukong/store/cassandra/streaming.rb +0 -75
- data/old/wukong/store/cassandra/struct_loader.rb +0 -21
- data/old/wukong/store/cassandra_model.rb +0 -91
- data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
- data/old/wukong/store/chunked_flat_file_store.rb +0 -48
- data/old/wukong/store/conditional_store.rb +0 -57
- data/old/wukong/store/factory.rb +0 -8
- data/old/wukong/store/flat_file_store.rb +0 -89
- data/old/wukong/store/key_store.rb +0 -51
- data/old/wukong/store/null_store.rb +0 -15
- data/old/wukong/store/read_thru_store.rb +0 -22
- data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
- data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
- data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
- data/old/wukong/streamer.rb +0 -30
- data/old/wukong/streamer/accumulating_reducer.rb +0 -83
- data/old/wukong/streamer/base.rb +0 -126
- data/old/wukong/streamer/counting_reducer.rb +0 -25
- data/old/wukong/streamer/filter.rb +0 -20
- data/old/wukong/streamer/instance_streamer.rb +0 -15
- data/old/wukong/streamer/json_streamer.rb +0 -21
- data/old/wukong/streamer/line_streamer.rb +0 -12
- data/old/wukong/streamer/list_reducer.rb +0 -31
- data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
- data/old/wukong/streamer/record_streamer.rb +0 -14
- data/old/wukong/streamer/reducer.rb +0 -11
- data/old/wukong/streamer/set_reducer.rb +0 -14
- data/old/wukong/streamer/struct_streamer.rb +0 -48
- data/old/wukong/streamer/summing_reducer.rb +0 -29
- data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
- data/old/wukong/typed_struct.rb +0 -12
- data/spec/away/encoding_spec.rb +0 -32
- data/spec/away/exe_spec.rb +0 -20
- data/spec/away/flow_spec.rb +0 -82
- data/spec/away/graph_spec.rb +0 -6
- data/spec/away/job_spec.rb +0 -15
- data/spec/away/rake_compat_spec.rb +0 -9
- data/spec/away/script_spec.rb +0 -81
- data/spec/hanuman/graphviz_spec.rb +0 -29
- data/spec/hanuman/slot_spec.rb +0 -2
- data/spec/support/examples_helper.rb +0 -10
- data/spec/support/streamer_test_helpers.rb +0 -6
- data/spec/support/wukong_widget_helpers.rb +0 -66
- data/spec/wukong/processor_spec.rb +0 -109
- data/spec/wukong/widget/filter_spec.rb +0 -99
- data/spec/wukong/widget/stringifier_spec.rb +0 -51
- data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -1,16 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH.unshift(File.expand_path("../lib", File.realdirpath(File.dirname(__FILE__))))
|
3
|
+
require 'wukong'
|
4
|
+
|
5
|
+
Settings.use(:commandline)
|
6
|
+
Settings.define :profiler, :default => nil
|
7
|
+
Settings.resolve!
|
8
|
+
|
1
9
|
require File.expand_path('../examples_helper', File.dirname(__FILE__))
|
2
10
|
require Pathname.path_to(:examples, 'dataflow/apache_log_line')
|
3
11
|
|
4
|
-
|
12
|
+
Wukong.dataflow(:parse_apache_logs) do
|
5
13
|
|
6
14
|
doc 'Parses an apache log line into a structured model, emits it as JSON'
|
7
15
|
|
8
|
-
|
9
|
-
|
16
|
+
source = ($0 == __FILE__) ? stdin : file_source(Pathname.path_to(:data, 'log/sample_apache_log.log'))
|
17
|
+
set_input :default, source
|
18
|
+
set_output :dump, stdout
|
10
19
|
|
11
|
-
input
|
20
|
+
input >
|
12
21
|
map{|line| ApacheLogLine.make(line) or bad_record(line) } >
|
13
|
-
|
14
|
-
output
|
15
|
-
|
22
|
+
to_tsv >
|
23
|
+
output
|
16
24
|
end
|
25
|
+
|
26
|
+
# if ($0 == __FILE__)
|
27
|
+
# flow_name = :parse_apache_logs
|
28
|
+
# if Settings.profiler
|
29
|
+
# require 'perftools'
|
30
|
+
# Pathname(Settings.profiler).dirname.mkpath
|
31
|
+
# PerfTools::CpuProfiler.start(Settings.profiler) do
|
32
|
+
# Wukong::LocalRunner.run(Wukong.dataflow(flow_name), :default)
|
33
|
+
# end
|
34
|
+
# else
|
35
|
+
# Wukong::LocalRunner.run(Wukong.dataflow(flow_name), :default)
|
36
|
+
# end
|
37
|
+
#
|
38
|
+
# # require 'jruby/profiler'
|
39
|
+
# # profile_data = JRuby::Profiler.profile do
|
40
|
+
# # Wukong::LocalRunner.run(Wukong.dataflow(flow_name), :default)
|
41
|
+
# # end
|
42
|
+
# # profile_printer = JRuby::Profiler::GraphProfilePrinter.new(profile_data)
|
43
|
+
# # profile_printer.printProfile($stderr)
|
44
|
+
#
|
45
|
+
# # Wukong::LocalRunner.run(Wukong.dataflow(flow_name), :default)
|
46
|
+
# end
|
File without changes
|
data/examples/dataflow/simple.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
require File.expand_path('../examples_helper', File.dirname(__FILE__))
|
2
2
|
|
3
|
-
|
3
|
+
Wukong.dataflow(:simple) do
|
4
4
|
doc <<-DOC
|
5
5
|
A stupidly simple dataflow: reverses each input string
|
6
6
|
DOC
|
7
7
|
|
8
|
-
|
9
|
-
|
8
|
+
file_source(Pathname.path_to(:data, 'text/jabberwocky.txt')) >
|
9
|
+
map{|str| str.reverse } >
|
10
|
+
file_sink(Pathname.path_to(:tmp, 'dataflow/simple_output.rb'))
|
10
11
|
|
11
|
-
input(:default) > map{|str| str.reverse } > output(:dump)
|
12
12
|
end
|
data/examples/geo.rb
ADDED
Binary file
|
@@ -0,0 +1,331 @@
|
|
1
|
+
require 'gorillib/numeric/clamp'
|
2
|
+
|
3
|
+
Numeric.class_eval do
|
4
|
+
def to_radians() self.to_f * Math::PI / 180.0 ; end
|
5
|
+
def to_degrees() self.to_f * 180.0 / Math::PI ; end
|
6
|
+
end
|
7
|
+
|
8
|
+
module Wukong
|
9
|
+
#
|
10
|
+
# reference: [Bing Maps Tile System](http://msdn.microsoft.com/en-us/library/bb259689.aspx)
|
11
|
+
#
|
12
|
+
module Geolocated
|
13
|
+
module_function # call methods as eg Wukong::Geolocated.tile_xy_to_quadkey or, if included in class, on self as private methods
|
14
|
+
|
15
|
+
# field :longitude, type: Float, description: "Longitude (X) of a point, in decimal degrees"
|
16
|
+
# field :latitude, type: Float, description: "Latitude (Y) of a point, in decimal degrees"
|
17
|
+
# field :zoom_level, type: Integer, description: "Zoom level of tile to fetch. An integer between 0 (world) and 16 or so"
|
18
|
+
# field :quadkey, type: String, description: "Quadkey of tile, eg 002313012"
|
19
|
+
# field :tile_x, type: Integer, description: "Tile X index, an integer between 0 and 2^zoom_level - 1"
|
20
|
+
# field :tile_y, type: Integer, description: "Tile Y index, an integer between 0 and 2^zoom_level - 1"
|
21
|
+
|
22
|
+
module ByCoordinates
|
23
|
+
extend Gorillib::Concern
|
24
|
+
|
25
|
+
# The quadkey is a string of 2-bit tile selectors for a quadtile
|
26
|
+
#
|
27
|
+
# @example
|
28
|
+
# infochimps_hq = Geo::Place.receive("Infochimps HQ", -97.759003, 30.273884)
|
29
|
+
# infochimps_hq.quadkey(8) # => "02313012"
|
30
|
+
#
|
31
|
+
# Interesting quadkey properties:
|
32
|
+
#
|
33
|
+
# * The quadkey length is its zoom level
|
34
|
+
#
|
35
|
+
# * To zoom out (lower zoom level, larger quadtile), just truncate the
|
36
|
+
# quadkey: austin at ZL=8 has quadkey "02313012"; at ZL=3, "023"
|
37
|
+
#
|
38
|
+
# * Nearby points typically have "nearby" quadkeys: up to the smallest
|
39
|
+
# tile that contains both, their quadkeys will have a common prefix.
|
40
|
+
# If you sort your records by quadkey,
|
41
|
+
# - Nearby points are nearby-ish on disk. (hello, HBase/Cassandra
|
42
|
+
# database owners!) This allows efficient lookup and caching of
|
43
|
+
# "popular" regions or repeated queries in an area.
|
44
|
+
# - the tiles covering a region can be covered by a limited, enumerable
|
45
|
+
# set of range scans. For map-reduce programmers, this leads to very
|
46
|
+
# efficient reducers
|
47
|
+
#
|
48
|
+
# * The quadkey is the bit-interleaved combination of its tile ids:
|
49
|
+
#
|
50
|
+
# tile_x 58 binary 0 0 1 1 1 0 1 0
|
51
|
+
# tile_y 105 binary 0 1 1 0 1 0 0 1
|
52
|
+
# interleaved binary 00 10 11 01 11 00 01 10
|
53
|
+
# quadkey 0 2 3 1 3 0 1 2 # "02313012"
|
54
|
+
#
|
55
|
+
def quadkey(zl) ; Wukong::Geolocated.tile_xy_zl_to_quadkey( tile_x(zl), tile_y(zl), zl) ; end
|
56
|
+
|
57
|
+
# the packed quadkey is the integer formed by interleaving the bits of tile_x with tile_y:
|
58
|
+
#
|
59
|
+
# tile_x 58 binary 0 0 1 1 1 0 1 0
|
60
|
+
# tile_y 105 binary 0 1 1 0 1 0 0 1
|
61
|
+
# interleaved binary 00 10 11 01 11 00 01 10
|
62
|
+
# quadkey 0 2 3 1 3 0 1 2 # "02313012"
|
63
|
+
#
|
64
|
+
# (see `quadkey` for more.)
|
65
|
+
#
|
66
|
+
# At zoom level 15, the packed quadkey is a 30-bit unsigned integer --
|
67
|
+
# meaning you can store it in a pig `int`; for languages with an `unsigned
|
68
|
+
# int` type, you can go to zoom level 16 before you have to use a
|
69
|
+
# less-efficient type. Zoom level 15 has a resolution of about one tile
|
70
|
+
# per kilometer (about 1.25 km/tile near the equator; 0.75 km/tile at
|
71
|
+
# London's latitude). It takes 1 billion tiles to tile the world at that
|
72
|
+
# scale. Ruby's integer type goes up to 60 bits, enough for any practical
|
73
|
+
# zoom level.
|
74
|
+
#
|
75
|
+
def packed_qk ; Wukong::Geolocated.tile_xy_zl_to_packed_qk(tile_x(zl), tile_y(zl), zl) ; end
|
76
|
+
|
77
|
+
# @return [Float] x index of the tile this object lies on at given zoom level
|
78
|
+
def tile_xf(zl) ; Wukong::Geolocated.lng_zl_to_tile_xf(longitude, zl) ; end
|
79
|
+
# @return [Float] y index of the tile this object lies on at given zoom level
|
80
|
+
def tile_yf(zl) ; Wukong::Geolocated.lat_zl_to_tile_yf(latitude, zl) ; end
|
81
|
+
# @return [Integer] x index of the tile this object lies on at given zoom level
|
82
|
+
def tile_x(zl) ; tile_xf(zl).floor ; end
|
83
|
+
# @return [Integer] y index of the tile this object lies on at given zoom level
|
84
|
+
def tile_y(zl) ; tile_yf(zl).floor ; end
|
85
|
+
|
86
|
+
# @return [Float] tile coordinates `(x,y)` for this object at given zoom level
|
87
|
+
def tile_xy(zl) ; [tile_x(xl), tile_y(zl)] ; end
|
88
|
+
|
89
|
+
# @returns [Array<Numeric, Numeric>] a `[longitude, latitude]` pair representing object as a point.
|
90
|
+
def lng_lat ; [longitude, latitude] ; end
|
91
|
+
|
92
|
+
# @returns [left, btm, right, top]
|
93
|
+
def bbox_for_radius(radius) ; Wukong::Geolocated.lng_lat_rad_to_bbox(longitude, latitude, radius) ; end
|
94
|
+
end
|
95
|
+
|
96
|
+
EARTH_RADIUS = 6371000 # meters
|
97
|
+
MIN_LONGITUDE = -180
|
98
|
+
MAX_LONGITUDE = 180
|
99
|
+
MIN_LATITUDE = -85.05112878
|
100
|
+
MAX_LATITUDE = 85.05112878
|
101
|
+
ALLOWED_LONGITUDE = (MIN_LONGITUDE..MAX_LONGITUDE)
|
102
|
+
ALLOWED_LATITUDE = (MIN_LATITUDE..MAX_LATITUDE)
|
103
|
+
TILE_PIXEL_SIZE = 256
|
104
|
+
|
105
|
+
# Width or height in number of tiles
|
106
|
+
def map_tile_size(zl)
|
107
|
+
1 << zl
|
108
|
+
end
|
109
|
+
|
110
|
+
#
|
111
|
+
# Tile coordinates
|
112
|
+
#
|
113
|
+
|
114
|
+
# Convert longitude in degrees to _floating-point_ tile x,y coordinates at given zoom level
|
115
|
+
def lng_zl_to_tile_xf(longitude, zl)
|
116
|
+
raise ArgumentError, "longitude must be within bounds ((#{longitude}) vs #{ALLOWED_LONGITUDE})" unless (ALLOWED_LONGITUDE.include?(longitude))
|
117
|
+
xx = (longitude.to_f + 180.0) / 360.0
|
118
|
+
(map_tile_size(zl) * xx)
|
119
|
+
end
|
120
|
+
|
121
|
+
# Convert latitude in degrees to _floating-point_ tile x,y coordinates at given zoom level
|
122
|
+
def lat_zl_to_tile_yf(latitude, zl)
|
123
|
+
raise ArgumentError, "latitude must be within bounds ((#{latitude}) vs #{ALLOWED_LATITUDE})" unless (ALLOWED_LATITUDE.include?(latitude))
|
124
|
+
sin_lat = Math.sin(latitude.to_radians)
|
125
|
+
yy = Math.log((1 + sin_lat) / (1 - sin_lat)) / (4 * Math::PI)
|
126
|
+
(map_tile_size(zl) * (0.5 - yy))
|
127
|
+
end
|
128
|
+
|
129
|
+
# Convert latitude in degrees to integer tile x,y coordinates at given zoom level
|
130
|
+
def lng_lat_zl_to_tile_xy(longitude, latitude, zl)
|
131
|
+
[lng_zl_to_tile_xf(longitude, zl).floor, lat_zl_to_tile_yf(latitude, zl).floor]
|
132
|
+
end
|
133
|
+
|
134
|
+
# Convert from tile_x, tile_y, zoom level to longitude and latitude in
|
135
|
+
# degrees (slight loss of precision).
|
136
|
+
#
|
137
|
+
# Tile coordinates may be floats or integer; they must lie within map range.
|
138
|
+
def tile_xy_zl_to_lng_lat(tile_x, tile_y, zl)
|
139
|
+
tile_size = map_tile_size(zl)
|
140
|
+
raise ArgumentError, "tile index must be within bounds ((#{tile_x},#{tile_y}) vs #{tile_size})" unless ((0..(tile_size-1)).include?(tile_x)) && ((0..(tile_size-1)).include?(tile_x))
|
141
|
+
xx = (tile_x.to_f / tile_size)
|
142
|
+
yy = 0.5 - (tile_y.to_f / tile_size)
|
143
|
+
lng = 360.0 * xx - 180.0
|
144
|
+
lat = 90 - 360 * Math.atan(Math.exp(-yy * 2 * Math::PI)) / Math::PI
|
145
|
+
[lng, lat]
|
146
|
+
end
|
147
|
+
|
148
|
+
#
|
149
|
+
# Quadkey coordinates
|
150
|
+
#
|
151
|
+
|
152
|
+
# converts from even/odd state of tile x and tile y to quadkey. NOTE: bit order means y, x
|
153
|
+
BIT_TO_QUADKEY = { [false, false] => "0", [false, true] => "1", [true, false] => "2", [true, true] => "3", }
|
154
|
+
# converts from quadkey char to bits. NOTE: bit order means y, x
|
155
|
+
QUADKEY_TO_BIT = { "0" => [0,0], "1" => [0,1], "2" => [1,0], "3" => [1,1]}
|
156
|
+
|
157
|
+
# Convert from tile x,y into a quadkey at a specified zoom level
|
158
|
+
def tile_xy_zl_to_quadkey(tile_x, tile_y, zl)
|
159
|
+
quadkey_chars = []
|
160
|
+
tx = tile_x.to_i
|
161
|
+
ty = tile_y.to_i
|
162
|
+
zl.times do
|
163
|
+
quadkey_chars.push BIT_TO_QUADKEY[[ty.odd?, tx.odd?]] # bit order y,x
|
164
|
+
tx >>= 1 ; ty >>= 1
|
165
|
+
end
|
166
|
+
quadkey_chars.join.reverse
|
167
|
+
end
|
168
|
+
|
169
|
+
# Convert a quadkey into tile x,y coordinates and level
|
170
|
+
def quadkey_to_tile_xy_zl(quadkey)
|
171
|
+
raise ArgumentError, "Quadkey must contain only the characters 0, 1, 2 or 3: #{quadkey}!" unless quadkey =~ /\A[0-3]*\z/
|
172
|
+
zl = quadkey.to_s.length
|
173
|
+
tx = 0 ; ty = 0
|
174
|
+
quadkey.chars.each do |char|
|
175
|
+
ybit, xbit = QUADKEY_TO_BIT[char] # bit order y, x
|
176
|
+
tx = (tx << 1) + xbit
|
177
|
+
ty = (ty << 1) + ybit
|
178
|
+
end
|
179
|
+
[tx, ty, zl]
|
180
|
+
end
|
181
|
+
|
182
|
+
# Convert from tile x,y into a packed quadkey at a specified zoom level
|
183
|
+
def tile_xy_zl_to_packed_qk(tile_x, tile_y, zl)
|
184
|
+
# don't optimize unless you're sure your way is faster; string ops are
|
185
|
+
# faster than you think and loops are slower than you think
|
186
|
+
quadkey_str = tile_xy_zl_to_quadkey(tile_x, tile_y, zl)
|
187
|
+
quadkey_str.to_i(4)
|
188
|
+
end
|
189
|
+
|
190
|
+
# Convert a packed quadkey (integer) into tile x,y coordinates and level
|
191
|
+
def packed_qk_zl_to_tile_xy(packed_qk, zl=16)
|
192
|
+
# don't "optimize" this without testing... string operations are faster than you think in ruby
|
193
|
+
raise ArgumentError, "Quadkey must be an integer in range of the zoom level: #{packed_qk}, #{zl}" unless packed_qk.is_a?(Fixnum) && (packed_qk < 2 ** (zl*2))
|
194
|
+
quadkey_rhs = packed_qk.to_s(4)
|
195
|
+
quadkey = ("0" * (zl - quadkey_rhs.length)) << quadkey_rhs
|
196
|
+
quadkey_to_tile_xy_zl(quadkey)
|
197
|
+
end
|
198
|
+
|
199
|
+
# Convert a lat/lng and zoom level into a quadkey
|
200
|
+
def lng_lat_zl_to_quadkey(longitude, latitude, zl)
|
201
|
+
tile_x, tile_y = lng_lat_zl_to_tile_xy(longitude, latitude, zl)
|
202
|
+
tile_xy_zl_to_quadkey(tile_x, tile_y, zl)
|
203
|
+
end
|
204
|
+
|
205
|
+
#
|
206
|
+
# Bounding box coordinates
|
207
|
+
#
|
208
|
+
|
209
|
+
# Convert a quadkey into a bounding box using adjacent tile
|
210
|
+
def quadkey_to_bbox(quadkey)
|
211
|
+
tile_x, tile_y, zl = quadkey_to_tile_xy_zl(quadkey)
|
212
|
+
# bottom right of me is top left of my southeast neighbor
|
213
|
+
left, top = tile_xy_zl_to_lng_lat(tile_x, tile_y, zl)
|
214
|
+
right, btm = tile_xy_zl_to_lng_lat(tile_x + 1, tile_y + 1, zl)
|
215
|
+
[left, btm, right, top]
|
216
|
+
end
|
217
|
+
|
218
|
+
# Retuns the smallest quadkey containing both of corners of the given bounding box
|
219
|
+
def quadkey_containing_bbox(left, btm, right, top)
|
220
|
+
qk_tl = lng_lat_zl_to_quadkey(left, top, 23)
|
221
|
+
qk_2 = lng_lat_zl_to_quadkey(right, btm, 23)
|
222
|
+
# the containing qk is the longest one that both agree on
|
223
|
+
containing_key = ""
|
224
|
+
qk_tl.chars.zip(qk_2.chars).each do |char_tl, char_2|
|
225
|
+
break if char_tl != char_2
|
226
|
+
containing_key << char_tl
|
227
|
+
end
|
228
|
+
containing_key
|
229
|
+
end
|
230
|
+
|
231
|
+
# Returns a bounding box containing the circle created by the lat/lng and radius
|
232
|
+
def lng_lat_rad_to_bbox(longitude, latitude, radius)
|
233
|
+
left, _ = point_east( longitude, latitude, -radius)
|
234
|
+
_, btm = point_north(longitude, latitude, -radius)
|
235
|
+
right, _ = point_east( longitude, latitude, radius)
|
236
|
+
_, top = point_north(longitude, latitude, radius)
|
237
|
+
[left, btm, right, top]
|
238
|
+
end
|
239
|
+
|
240
|
+
# Returns the centroid of a bounding box
|
241
|
+
#
|
242
|
+
# @param [Array<Float, Float>] left_btm Longitude, Latitude of SW point
|
243
|
+
# @param [Array<Float, Float>] right_top Longitude, Latitude of NE point
|
244
|
+
#
|
245
|
+
# @return [Array<Float, Float>] Longitude, Latitude of centroid
|
246
|
+
def bbox_centroid(left_btm, right_top)
|
247
|
+
haversine_midpoint(*left_btm, *right_top)
|
248
|
+
end
|
249
|
+
|
250
|
+
# Return the haversine distance in meters between two points
|
251
|
+
def haversine_distance(left, btm, right, top)
|
252
|
+
delta_lng = (right - left).abs.to_radians
|
253
|
+
delta_lat = (top - btm ).abs.to_radians
|
254
|
+
btm_rad = btm.to_radians
|
255
|
+
top_rad = top.to_radians
|
256
|
+
|
257
|
+
aa = (Math.sin(delta_lat / 2.0))**2 + Math.cos(top_rad) * Math.cos(btm_rad) * (Math.sin(delta_lng / 2.0))**2
|
258
|
+
cc = 2.0 * Math.atan2(Math.sqrt(aa), Math.sqrt(1.0 - aa))
|
259
|
+
cc * EARTH_RADIUS
|
260
|
+
end
|
261
|
+
|
262
|
+
# Return the haversine midpoint in meters between two points
|
263
|
+
def haversine_midpoint(left, btm, right, top)
|
264
|
+
cos_btm = Math.cos(btm.to_radians)
|
265
|
+
cos_top = Math.cos(top.to_radians)
|
266
|
+
bearing_x = cos_btm * Math.cos((right - left).to_radians)
|
267
|
+
bearing_y = cos_btm * Math.sin((right - left).to_radians)
|
268
|
+
mid_lat = Math.atan2(
|
269
|
+
(Math.sin(top.to_radians) + Math.sin(btm.to_radians)),
|
270
|
+
(Math.sqrt((cos_top + bearing_x)**2 + bearing_y**2)))
|
271
|
+
mid_lng = left.to_radians + Math.atan2(bearing_y, (cos_top + bearing_x))
|
272
|
+
[mid_lng.to_degrees, mid_lat.to_degrees]
|
273
|
+
end
|
274
|
+
|
275
|
+
# From a given point, calculate the point directly north a specified distance
|
276
|
+
def point_north(longitude, latitude, distance)
|
277
|
+
north_lat = (latitude.to_radians + (distance.to_f / EARTH_RADIUS)).to_degrees
|
278
|
+
[longitude, north_lat]
|
279
|
+
end
|
280
|
+
|
281
|
+
# From a given point, calculate the change in degrees directly east a given distance
|
282
|
+
def point_east(longitude, latitude, distance)
|
283
|
+
radius = EARTH_RADIUS * Math.sin(((Math::PI / 2.0) - latitude.to_radians.abs))
|
284
|
+
east_lng = (longitude.to_radians + (distance.to_f / radius)).to_degrees
|
285
|
+
[east_lng, latitude]
|
286
|
+
end
|
287
|
+
|
288
|
+
#
|
289
|
+
# Pixel coordinates
|
290
|
+
#
|
291
|
+
# Use with a standard (256x256 pixel) grid-based tileserver
|
292
|
+
#
|
293
|
+
|
294
|
+
# Width or height of grid bitmap in pixels at given zoom level
|
295
|
+
def map_pixel_size(zl)
|
296
|
+
TILE_PIXEL_SIZE * map_tile_size(zl)
|
297
|
+
end
|
298
|
+
|
299
|
+
# Return pixel resolution in meters per pixel at a specified latitude and zoom level
|
300
|
+
def pixel_resolution(latitude, zl)
|
301
|
+
lat = latitude.clamp(MIN_LATITUDE, MAX_LATITUDE)
|
302
|
+
Math.cos(lat.to_radians) * 2 * Math::PI * EARTH_RADIUS / map_pixel_size(zl).to_f
|
303
|
+
end
|
304
|
+
|
305
|
+
# Map scale at a specified latitude, zoom level, & screen resolution in dpi
|
306
|
+
def map_scale_for_dpi(latitude, zl, screen_dpi)
|
307
|
+
pixel_resolution(latitude, zl) * screen_dpi / 0.0254
|
308
|
+
end
|
309
|
+
|
310
|
+
# Convert from x,y pixel pair into tile x,y coordinates
|
311
|
+
def pixel_xy_to_tile_xy(pixel_x, pixel_y)
|
312
|
+
[pixel_x / TILE_PIXEL_SIZE, pixel_y / TILE_PIXEL_SIZE]
|
313
|
+
end
|
314
|
+
|
315
|
+
# Convert from x,y tile pair into pixel x,y coordinates (top left corner)
|
316
|
+
def tile_xy_to_pixel_xy(tile_x, tile_y)
|
317
|
+
[tile_x * TILE_PIXEL_SIZE, tile_y * TILE_PIXEL_SIZE]
|
318
|
+
end
|
319
|
+
|
320
|
+
def pixel_xy_zl_to_lng_lat(pixel_x, pixel_y, zl)
|
321
|
+
tile_xy_zl_to_lng_lat(pixel_x.to_f / TILE_PIXEL_SIZE, pixel_y.to_f / TILE_PIXEL_SIZE, zl)
|
322
|
+
end
|
323
|
+
|
324
|
+
def lng_lat_zl_to_pixel_xy(lng, lat, zl)
|
325
|
+
pixel_x = lng_zl_to_tile_xf(lng, zl)
|
326
|
+
pixel_y = lat_zl_to_tile_yf(lat, zl)
|
327
|
+
[(pixel_x * TILE_PIXEL_SIZE + 0.5).floor, (pixel_y * TILE_PIXEL_SIZE + 0.5).floor]
|
328
|
+
end
|
329
|
+
|
330
|
+
end
|
331
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Geo
|
3
|
+
class Quadtile
|
4
|
+
include Gorillib::Model
|
5
|
+
#
|
6
|
+
field :tile_x, Integer, position: 0, doc: "Tile X index, an integer between 0 and 2^zoom_level - 1"
|
7
|
+
field :tile_y, Integer, position: 1, doc: "Tile Y index, an integer between 0 and 2^zoom_level - 1"
|
8
|
+
field :zl, Integer, position: 2, doc: "Zoom level of tile to fetch. 0 is the world; 16 is about a kilometer."
|
9
|
+
field :slug, String, default: 'tile', doc: "Name, prefixed on saved tiles"
|
10
|
+
|
11
|
+
def quadkey ; Wukong::Geolocated.tile_xy_zl_to_quadkey( tile_x, tile_y, zl) ; end
|
12
|
+
def packed_qk ; Wukong::Geolocated.tile_xy_zl_to_packed_qk(tile_x, tile_y, zl) ; end
|
13
|
+
|
14
|
+
# Base of URL for map tile server; anything X/Y/Z.png-addressable works,
|
15
|
+
# eg `http://b.tile.openstreetmap.org`. Defaults to 'http://b.tile.stamen.com/toner-lite'`.
|
16
|
+
class_attribute :tileserver_url_base
|
17
|
+
self.tileserver_url_base = 'http://a.tile.stamen.com/toner-lite'
|
18
|
+
|
19
|
+
def self.from_whatever(hsh)
|
20
|
+
zl = hsh[:zl] ? hsh[:zl].to_i : nil
|
21
|
+
case
|
22
|
+
when hsh[:tile_x].present? && hsh[:tile_y].present? && zl.present?
|
23
|
+
tile_x, tile_y = [hsh[:tile_x], hsh[:tile_y]]
|
24
|
+
when hsh[:longitude].present? && hsh[:latitude].present? && zl.present?
|
25
|
+
tile_x, tile_y = Wukong::Geolocated.lng_lat_zl_to_tile_xy(hsh[:longitude], hsh[:latitude], zl)
|
26
|
+
when hsh[:quadkey].present?
|
27
|
+
quadkey = hsh[:quadkey]
|
28
|
+
quadkey = quadkey[0..zl] if zl.to_i > 0
|
29
|
+
tile_x, tile_y, zl = Wukong::Geolocated.quadkey_to_tile_xy_zl(quadkey)
|
30
|
+
else
|
31
|
+
raise ArgumentError, "You must supply keys for either `:longitude`, `:latitude` and `:zl`; `:tile_x`, `:tile_y` and `:zl`; or `:quadkey`: #{hsh.inspect}"
|
32
|
+
end
|
33
|
+
return new(tile_x, tile_y, zl, hsh.to_hash)
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.tileserver_conn
|
37
|
+
@tileserver_conn = Faraday.new(:url => tileserver_url_base)
|
38
|
+
end
|
39
|
+
|
40
|
+
def tile_url
|
41
|
+
[tileserver_url_base, zl, tile_x, tile_y].join('/') << ".png"
|
42
|
+
end
|
43
|
+
|
44
|
+
# A
|
45
|
+
#
|
46
|
+
# @example
|
47
|
+
# qt = Quadtile.from_whatever(longitude: -97.759003, latitude: 30.273884, zl: 15)
|
48
|
+
# qt.slug # tile-15-64587
|
49
|
+
#
|
50
|
+
#
|
51
|
+
# @returns [String]
|
52
|
+
def basename(options={})
|
53
|
+
options = { sep: '-', ext: 'png'}
|
54
|
+
sep = options[:sep]
|
55
|
+
# "%s%s%02d%s%04d%s%04d.%s" % [slug, sep, zl, sep, tile_x, sep, tile_y, options[:ext]]
|
56
|
+
"%s/%02d/%s%s%s.%s" % [slug, zl, slug, sep, quadkey, options[:ext]]
|
57
|
+
end
|
58
|
+
|
59
|
+
# Fetch the contents of a map tile from a tileserver
|
60
|
+
#
|
61
|
+
# You are responsible for requiring the faraday library and its adapter
|
62
|
+
#
|
63
|
+
def fetch
|
64
|
+
self.class.tileserver_conn.get(tile_url)
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|