wukong 3.0.0.pre → 3.0.0.pre2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +46 -33
- data/.gitmodules +3 -0
- data/.rspec +1 -1
- data/.travis.yml +8 -1
- data/.yardopts +0 -13
- data/Guardfile +4 -6
- data/{LICENSE.textile → LICENSE.md} +43 -55
- data/README-old.md +422 -0
- data/README.md +279 -418
- data/Rakefile +21 -5
- data/TODO.md +6 -6
- data/bin/wu-clean-encoding +31 -0
- data/bin/wu-lign +2 -2
- data/bin/wu-local +69 -0
- data/bin/wu-server +70 -0
- data/examples/Gemfile +38 -0
- data/examples/README.md +9 -0
- data/examples/dataflow/apache_log_line.rb +64 -25
- data/examples/dataflow/fibonacci_series.rb +101 -0
- data/examples/dataflow/parse_apache_logs.rb +37 -7
- data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
- data/examples/dataflow/simple.rb +4 -4
- data/examples/geo.rb +4 -0
- data/examples/geo/geo_grids.numbers +0 -0
- data/examples/geo/geolocated.rb +331 -0
- data/examples/geo/quadtile.rb +69 -0
- data/examples/geo/spec/geolocated_spec.rb +247 -0
- data/examples/geo/tile_fetcher.rb +77 -0
- data/examples/graph/minimum_spanning_tree.rb +61 -61
- data/examples/jabberwocky.txt +36 -0
- data/examples/models/wikipedia.rb +20 -0
- data/examples/munging/Gemfile +8 -0
- data/examples/munging/airline_flights/airline.rb +57 -0
- data/examples/munging/airline_flights/airline_flights.rake +83 -0
- data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
- data/examples/munging/airline_flights/airport.rb +211 -0
- data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
- data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
- data/examples/munging/airline_flights/flight.rb +156 -0
- data/examples/munging/airline_flights/models.rb +4 -0
- data/examples/munging/airline_flights/parse.rb +26 -0
- data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
- data/examples/munging/airline_flights/route.rb +35 -0
- data/examples/munging/airline_flights/tasks.rake +83 -0
- data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
- data/examples/munging/airline_flights/topcities.rb +167 -0
- data/examples/munging/airports/40_wbans.txt +40 -0
- data/examples/munging/airports/filter_weather_reports.rb +37 -0
- data/examples/munging/airports/join.pig +31 -0
- data/examples/munging/airports/to_tsv.rb +33 -0
- data/examples/munging/airports/usa_wbans.pig +19 -0
- data/examples/munging/airports/usa_wbans.txt +2157 -0
- data/examples/munging/airports/wbans.pig +19 -0
- data/examples/munging/airports/wbans.txt +2310 -0
- data/examples/munging/geo/geo_json.rb +54 -0
- data/examples/munging/geo/geo_models.rb +69 -0
- data/examples/munging/geo/geonames_models.rb +78 -0
- data/examples/munging/geo/iso_codes.rb +172 -0
- data/examples/munging/geo/reconcile_countries.rb +124 -0
- data/examples/munging/geo/tasks.rake +71 -0
- data/examples/munging/rake_helper.rb +62 -0
- data/examples/munging/weather/.gitignore +1 -0
- data/examples/munging/weather/Gemfile +4 -0
- data/examples/munging/weather/Rakefile +28 -0
- data/examples/munging/weather/extract_ish.rb +13 -0
- data/examples/munging/weather/models/weather.rb +119 -0
- data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
- data/examples/munging/wikipedia/README.md +34 -0
- data/examples/munging/wikipedia/Rakefile +193 -0
- data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
- data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
- data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
- data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
- data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
- data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
- data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
- data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
- data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
- data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
- data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
- data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
- data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
- data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
- data/examples/munging/wikipedia/pig_style_guide.md +25 -0
- data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
- data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
- data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
- data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
- data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
- data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
- data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
- data/examples/munging/wikipedia/utils/namespaces.json +1 -0
- data/examples/rake_helper.rb +85 -0
- data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
- data/examples/server_logs/logline.rb +95 -0
- data/examples/server_logs/models.rb +66 -0
- data/examples/server_logs/page_counts.pig +48 -0
- data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
- data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
- data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
- data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
- data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
- data/examples/string_reverser.rb +26 -0
- data/examples/text/pig_latin.rb +2 -2
- data/examples/text/regional_flavor/README.md +14 -0
- data/examples/text/regional_flavor/article_wordbags.pig +39 -0
- data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
- data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
- data/examples/word_count/accumulator.rb +26 -0
- data/examples/word_count/tokenizer.rb +13 -0
- data/examples/word_count/word_count.rb +6 -0
- data/examples/workflow/cherry_pie.dot +97 -0
- data/examples/workflow/cherry_pie.png +0 -0
- data/examples/workflow/cherry_pie.rb +61 -26
- data/lib/hanuman.rb +34 -7
- data/lib/hanuman/graph.rb +55 -31
- data/lib/hanuman/graphvizzer.rb +199 -178
- data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
- data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
- data/lib/hanuman/link.rb +35 -0
- data/lib/hanuman/registry.rb +46 -0
- data/lib/hanuman/stage.rb +76 -32
- data/lib/wukong.rb +23 -24
- data/lib/wukong/boot.rb +87 -0
- data/lib/wukong/configuration.rb +8 -0
- data/lib/wukong/dataflow.rb +45 -78
- data/lib/wukong/driver.rb +99 -0
- data/lib/wukong/emitter.rb +22 -0
- data/lib/wukong/model/faker.rb +24 -24
- data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
- data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
- data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
- data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
- data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
- data/lib/wukong/processor.rb +60 -114
- data/lib/wukong/spec_helpers.rb +81 -0
- data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
- data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
- data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
- data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
- data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
- data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
- data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
- data/lib/wukong/version.rb +2 -1
- data/lib/wukong/widget/filters.rb +311 -0
- data/lib/wukong/widget/processors.rb +156 -0
- data/lib/wukong/widget/reducers.rb +7 -0
- data/lib/wukong/widget/reducers/accumulator.rb +73 -0
- data/lib/wukong/widget/reducers/bin.rb +318 -0
- data/lib/wukong/widget/reducers/count.rb +61 -0
- data/lib/wukong/widget/reducers/group.rb +85 -0
- data/lib/wukong/widget/reducers/group_concat.rb +70 -0
- data/lib/wukong/widget/reducers/moments.rb +72 -0
- data/lib/wukong/widget/reducers/sort.rb +130 -0
- data/lib/wukong/widget/serializers.rb +287 -0
- data/lib/wukong/widget/sink.rb +10 -52
- data/lib/wukong/widget/source.rb +7 -113
- data/lib/wukong/widget/utils.rb +46 -0
- data/lib/wukong/widgets.rb +6 -0
- data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
- data/spec/examples/dataflow/parsing_spec.rb +12 -11
- data/spec/examples/dataflow/simple_spec.rb +32 -6
- data/spec/examples/dataflow/telegram_spec.rb +36 -36
- data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
- data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
- data/spec/examples/munging/airline_flights_spec.rb +202 -0
- data/spec/examples/text/pig_latin_spec.rb +13 -16
- data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
- data/spec/hanuman/graph_spec.rb +27 -2
- data/spec/hanuman/hanuman_spec.rb +10 -0
- data/spec/hanuman/registry_spec.rb +123 -0
- data/spec/hanuman/stage_spec.rb +61 -7
- data/spec/spec_helper.rb +29 -19
- data/spec/support/hanuman_test_helpers.rb +14 -12
- data/spec/support/shared_context_for_reducers.rb +37 -0
- data/spec/support/shared_examples_for_builders.rb +101 -0
- data/spec/support/shared_examples_for_shortcuts.rb +57 -0
- data/spec/support/wukong_test_helpers.rb +37 -11
- data/spec/wukong/dataflow_spec.rb +77 -55
- data/spec/wukong/local_runner_spec.rb +24 -24
- data/spec/wukong/model/faker_spec.rb +132 -131
- data/spec/wukong/runner_spec.rb +8 -8
- data/spec/wukong/widget/filters_spec.rb +61 -0
- data/spec/wukong/widget/processors_spec.rb +126 -0
- data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
- data/spec/wukong/widget/reducers/count_spec.rb +11 -0
- data/spec/wukong/widget/reducers/group_spec.rb +20 -0
- data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
- data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
- data/spec/wukong/widget/serializers_spec.rb +92 -0
- data/spec/wukong/widget/sink_spec.rb +15 -15
- data/spec/wukong/widget/source_spec.rb +65 -41
- data/spec/wukong/wukong_spec.rb +10 -0
- data/wukong.gemspec +17 -10
- metadata +359 -335
- data/.document +0 -5
- data/VERSION +0 -1
- data/bin/hdp-bin +0 -44
- data/bin/hdp-bzip +0 -23
- data/bin/hdp-cat +0 -3
- data/bin/hdp-catd +0 -3
- data/bin/hdp-cp +0 -3
- data/bin/hdp-du +0 -86
- data/bin/hdp-get +0 -3
- data/bin/hdp-kill +0 -3
- data/bin/hdp-kill-task +0 -3
- data/bin/hdp-ls +0 -11
- data/bin/hdp-mkdir +0 -2
- data/bin/hdp-mkdirp +0 -12
- data/bin/hdp-mv +0 -3
- data/bin/hdp-parts_to_keys.rb +0 -77
- data/bin/hdp-ps +0 -3
- data/bin/hdp-put +0 -3
- data/bin/hdp-rm +0 -32
- data/bin/hdp-sort +0 -40
- data/bin/hdp-stream +0 -40
- data/bin/hdp-stream-flat +0 -22
- data/bin/hdp-stream2 +0 -39
- data/bin/hdp-sync +0 -17
- data/bin/hdp-wc +0 -67
- data/bin/wu-flow +0 -10
- data/bin/wu-map +0 -17
- data/bin/wu-red +0 -17
- data/bin/wukong +0 -17
- data/data/CREDITS.md +0 -355
- data/data/graph/airfares.tsv +0 -2174
- data/data/text/gift_of_the_magi.txt +0 -225
- data/data/text/jabberwocky.txt +0 -36
- data/data/text/rectification_of_names.txt +0 -33
- data/data/twitter/a_atsigns_b.tsv +0 -64
- data/data/twitter/a_follows_b.tsv +0 -53
- data/data/twitter/tweet.tsv +0 -167
- data/data/twitter/twitter_user.tsv +0 -55
- data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
- data/docpages/INSTALL.textile +0 -92
- data/docpages/LICENSE.textile +0 -107
- data/docpages/README-elastic_map_reduce.textile +0 -377
- data/docpages/README-performance.textile +0 -90
- data/docpages/README-wulign.textile +0 -65
- data/docpages/UsingWukong-part1-get_ready.textile +0 -17
- data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
- data/docpages/UsingWukong-part3-parsing.textile +0 -138
- data/docpages/_config.yml +0 -39
- data/docpages/avro/avro_notes.textile +0 -56
- data/docpages/avro/performance.textile +0 -36
- data/docpages/avro/tethering.textile +0 -19
- data/docpages/bigdata-tips.textile +0 -143
- data/docpages/code/api_response_example.txt +0 -20
- data/docpages/code/parser_skeleton.rb +0 -38
- data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
- data/docpages/favicon.ico +0 -0
- data/docpages/gem.css +0 -16
- data/docpages/hadoop-tips.textile +0 -83
- data/docpages/index.textile +0 -92
- data/docpages/intro.textile +0 -8
- data/docpages/moreinfo.textile +0 -174
- data/docpages/news.html +0 -24
- data/docpages/pig/PigLatinExpressionsList.txt +0 -122
- data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
- data/docpages/pig/commandline_params.txt +0 -26
- data/docpages/pig/cookbook.html +0 -481
- data/docpages/pig/images/hadoop-logo.jpg +0 -0
- data/docpages/pig/images/instruction_arrow.png +0 -0
- data/docpages/pig/images/pig-logo.gif +0 -0
- data/docpages/pig/piglatin_ref1.html +0 -1103
- data/docpages/pig/piglatin_ref2.html +0 -14340
- data/docpages/pig/setup.html +0 -505
- data/docpages/pig/skin/basic.css +0 -166
- data/docpages/pig/skin/breadcrumbs.js +0 -237
- data/docpages/pig/skin/fontsize.js +0 -166
- data/docpages/pig/skin/getBlank.js +0 -40
- data/docpages/pig/skin/getMenu.js +0 -45
- data/docpages/pig/skin/images/chapter.gif +0 -0
- data/docpages/pig/skin/images/chapter_open.gif +0 -0
- data/docpages/pig/skin/images/current.gif +0 -0
- data/docpages/pig/skin/images/external-link.gif +0 -0
- data/docpages/pig/skin/images/header_white_line.gif +0 -0
- data/docpages/pig/skin/images/page.gif +0 -0
- data/docpages/pig/skin/images/pdfdoc.gif +0 -0
- data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/print.css +0 -54
- data/docpages/pig/skin/profile.css +0 -181
- data/docpages/pig/skin/screen.css +0 -587
- data/docpages/pig/tutorial.html +0 -1059
- data/docpages/pig/udf.html +0 -1509
- data/docpages/tutorial.textile +0 -283
- data/docpages/usage.textile +0 -195
- data/docpages/wutils.textile +0 -263
- data/examples/dataflow/complex.rb +0 -11
- data/examples/dataflow/donuts.rb +0 -13
- data/examples/tiny_count/jabberwocky_output.tsv +0 -92
- data/examples/word_count.rb +0 -48
- data/examples/workflow/fiddle.rb +0 -24
- data/lib/away/escapement.rb +0 -129
- data/lib/away/exe.rb +0 -11
- data/lib/away/experimental.rb +0 -5
- data/lib/away/from_file.rb +0 -52
- data/lib/away/job.rb +0 -56
- data/lib/away/job/rake_compat.rb +0 -17
- data/lib/away/registry.rb +0 -79
- data/lib/away/runner.rb +0 -276
- data/lib/away/runner/execute.rb +0 -121
- data/lib/away/script.rb +0 -161
- data/lib/away/script/hadoop_command.rb +0 -240
- data/lib/away/source/file_list_source.rb +0 -15
- data/lib/away/source/looper.rb +0 -18
- data/lib/away/task.rb +0 -219
- data/lib/hanuman/action.rb +0 -21
- data/lib/hanuman/chain.rb +0 -4
- data/lib/hanuman/graphviz.rb +0 -74
- data/lib/hanuman/resource.rb +0 -6
- data/lib/hanuman/slot.rb +0 -87
- data/lib/hanuman/slottable.rb +0 -220
- data/lib/wukong/bad_record.rb +0 -15
- data/lib/wukong/event.rb +0 -44
- data/lib/wukong/local_runner.rb +0 -55
- data/lib/wukong/mapred.rb +0 -3
- data/lib/wukong/universe.rb +0 -48
- data/lib/wukong/widget/filter.rb +0 -81
- data/lib/wukong/widget/gibberish.rb +0 -123
- data/lib/wukong/widget/monitor.rb +0 -26
- data/lib/wukong/widget/reducer.rb +0 -66
- data/lib/wukong/widget/stringifier.rb +0 -50
- data/lib/wukong/workflow.rb +0 -22
- data/lib/wukong/workflow/command.rb +0 -42
- data/old/config/emr-example.yaml +0 -48
- data/old/examples/README.txt +0 -17
- data/old/examples/contrib/jeans/README.markdown +0 -165
- data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
- data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
- data/old/examples/contrib/jeans/data/sizes +0 -3
- data/old/examples/contrib/jeans/normalize.rb +0 -20
- data/old/examples/contrib/jeans/sizes.rb +0 -55
- data/old/examples/corpus/bnc_word_freq.rb +0 -44
- data/old/examples/corpus/bucket_counter.rb +0 -47
- data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
- data/old/examples/corpus/sentence_bigrams.rb +0 -53
- data/old/examples/corpus/sentence_coocurrence.rb +0 -66
- data/old/examples/corpus/stopwords.rb +0 -138
- data/old/examples/corpus/words_to_bigrams.rb +0 -53
- data/old/examples/emr/README.textile +0 -110
- data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
- data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
- data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
- data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
- data/old/examples/network_graph/adjacency_list.rb +0 -74
- data/old/examples/network_graph/breadth_first_search.rb +0 -72
- data/old/examples/network_graph/gen_2paths.rb +0 -68
- data/old/examples/network_graph/gen_multi_edge.rb +0 -112
- data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
- data/old/examples/pagerank/README.textile +0 -6
- data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
- data/old/examples/pagerank/pagerank.rb +0 -72
- data/old/examples/pagerank/pagerank_initialize.rb +0 -42
- data/old/examples/pagerank/run_pagerank.sh +0 -21
- data/old/examples/sample_records.rb +0 -33
- data/old/examples/server_logs/apache_log_parser.rb +0 -15
- data/old/examples/server_logs/nook.rb +0 -48
- data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
- data/old/examples/server_logs/user_agent.rb +0 -40
- data/old/examples/simple_word_count.rb +0 -82
- data/old/examples/size.rb +0 -61
- data/old/examples/stats/avg_value_frequency.rb +0 -86
- data/old/examples/stats/binning_percentile_estimator.rb +0 -140
- data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
- data/old/examples/stats/rank_and_bin.rb +0 -173
- data/old/examples/stupidly_simple_filter.rb +0 -40
- data/old/examples/word_count.rb +0 -75
- data/old/graph/graphviz_builder.rb +0 -580
- data/old/graph_easy/Attributes.pm +0 -4181
- data/old/graph_easy/Graphviz.pm +0 -2232
- data/old/wukong.rb +0 -18
- data/old/wukong/and_pig.rb +0 -38
- data/old/wukong/bad_record.rb +0 -18
- data/old/wukong/datatypes.rb +0 -24
- data/old/wukong/datatypes/enum.rb +0 -127
- data/old/wukong/datatypes/fake_types.rb +0 -17
- data/old/wukong/decorator.rb +0 -28
- data/old/wukong/encoding/asciize.rb +0 -108
- data/old/wukong/extensions.rb +0 -16
- data/old/wukong/extensions/array.rb +0 -18
- data/old/wukong/extensions/blank.rb +0 -93
- data/old/wukong/extensions/class.rb +0 -189
- data/old/wukong/extensions/date_time.rb +0 -53
- data/old/wukong/extensions/emittable.rb +0 -69
- data/old/wukong/extensions/enumerable.rb +0 -79
- data/old/wukong/extensions/hash.rb +0 -167
- data/old/wukong/extensions/hash_keys.rb +0 -16
- data/old/wukong/extensions/hash_like.rb +0 -150
- data/old/wukong/extensions/hashlike_class.rb +0 -47
- data/old/wukong/extensions/module.rb +0 -2
- data/old/wukong/extensions/pathname.rb +0 -27
- data/old/wukong/extensions/string.rb +0 -65
- data/old/wukong/extensions/struct.rb +0 -17
- data/old/wukong/extensions/symbol.rb +0 -11
- data/old/wukong/filename_pattern.rb +0 -74
- data/old/wukong/helper.rb +0 -7
- data/old/wukong/helper/stopwords.rb +0 -195
- data/old/wukong/helper/tokenize.rb +0 -35
- data/old/wukong/logger.rb +0 -38
- data/old/wukong/periodic_monitor.rb +0 -72
- data/old/wukong/schema.rb +0 -269
- data/old/wukong/script.rb +0 -286
- data/old/wukong/script/avro_command.rb +0 -5
- data/old/wukong/script/cassandra_loader_script.rb +0 -40
- data/old/wukong/script/emr_command.rb +0 -168
- data/old/wukong/script/hadoop_command.rb +0 -237
- data/old/wukong/script/local_command.rb +0 -41
- data/old/wukong/store.rb +0 -10
- data/old/wukong/store/base.rb +0 -27
- data/old/wukong/store/cassandra.rb +0 -10
- data/old/wukong/store/cassandra/streaming.rb +0 -75
- data/old/wukong/store/cassandra/struct_loader.rb +0 -21
- data/old/wukong/store/cassandra_model.rb +0 -91
- data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
- data/old/wukong/store/chunked_flat_file_store.rb +0 -48
- data/old/wukong/store/conditional_store.rb +0 -57
- data/old/wukong/store/factory.rb +0 -8
- data/old/wukong/store/flat_file_store.rb +0 -89
- data/old/wukong/store/key_store.rb +0 -51
- data/old/wukong/store/null_store.rb +0 -15
- data/old/wukong/store/read_thru_store.rb +0 -22
- data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
- data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
- data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
- data/old/wukong/streamer.rb +0 -30
- data/old/wukong/streamer/accumulating_reducer.rb +0 -83
- data/old/wukong/streamer/base.rb +0 -126
- data/old/wukong/streamer/counting_reducer.rb +0 -25
- data/old/wukong/streamer/filter.rb +0 -20
- data/old/wukong/streamer/instance_streamer.rb +0 -15
- data/old/wukong/streamer/json_streamer.rb +0 -21
- data/old/wukong/streamer/line_streamer.rb +0 -12
- data/old/wukong/streamer/list_reducer.rb +0 -31
- data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
- data/old/wukong/streamer/record_streamer.rb +0 -14
- data/old/wukong/streamer/reducer.rb +0 -11
- data/old/wukong/streamer/set_reducer.rb +0 -14
- data/old/wukong/streamer/struct_streamer.rb +0 -48
- data/old/wukong/streamer/summing_reducer.rb +0 -29
- data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
- data/old/wukong/typed_struct.rb +0 -12
- data/spec/away/encoding_spec.rb +0 -32
- data/spec/away/exe_spec.rb +0 -20
- data/spec/away/flow_spec.rb +0 -82
- data/spec/away/graph_spec.rb +0 -6
- data/spec/away/job_spec.rb +0 -15
- data/spec/away/rake_compat_spec.rb +0 -9
- data/spec/away/script_spec.rb +0 -81
- data/spec/hanuman/graphviz_spec.rb +0 -29
- data/spec/hanuman/slot_spec.rb +0 -2
- data/spec/support/examples_helper.rb +0 -10
- data/spec/support/streamer_test_helpers.rb +0 -6
- data/spec/support/wukong_widget_helpers.rb +0 -66
- data/spec/wukong/processor_spec.rb +0 -109
- data/spec/wukong/widget/filter_spec.rb +0 -99
- data/spec/wukong/widget/stringifier_spec.rb +0 -51
- data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -0,0 +1,36 @@
|
|
1
|
+
"Jabberwocky"
|
2
|
+
|
3
|
+
'Twas brillig, and the slithy toves
|
4
|
+
Did gyre and gimble in the wabe;
|
5
|
+
All mimsy were the borogoves,
|
6
|
+
And the mome raths outgrabe.
|
7
|
+
|
8
|
+
"Beware the Jabberwock, my son!
|
9
|
+
The jaws that bite, the claws that catch!
|
10
|
+
Beware the Jubjub bird, and shun
|
11
|
+
The frumious Bandersnatch!"
|
12
|
+
|
13
|
+
He took his vorpal sword in hand:
|
14
|
+
Long time the manxome foe he sought—
|
15
|
+
So rested he by the Tumtum tree,
|
16
|
+
And stood awhile in thought.
|
17
|
+
|
18
|
+
And as in uffish thought he stood,
|
19
|
+
The Jabberwock, with eyes of flame,
|
20
|
+
Came whiffling through the tulgey wood,
|
21
|
+
And burbled as it came!
|
22
|
+
|
23
|
+
One, two! One, two! and through and through
|
24
|
+
The vorpal blade went snicker-snack!
|
25
|
+
He left it dead, and with its head
|
26
|
+
He went galumphing back.
|
27
|
+
|
28
|
+
"And hast thou slain the Jabberwock?
|
29
|
+
Come to my arms, my beamish boy!
|
30
|
+
O frabjous day! Callooh! Callay!"
|
31
|
+
He chortled in his joy.
|
32
|
+
|
33
|
+
'Twas brillig, and the slithy toves
|
34
|
+
Did gyre and gimble in the wabe;
|
35
|
+
All mimsy were the borogoves,
|
36
|
+
And the mome raths outgrabe.
|
@@ -0,0 +1,20 @@
|
|
1
|
+
|
2
|
+
module Wu
|
3
|
+
module Data
|
4
|
+
module Wikipedia
|
5
|
+
|
6
|
+
class Article
|
7
|
+
include Gorillib::Model
|
8
|
+
field :title, String
|
9
|
+
field :namespace, Integer
|
10
|
+
field :id, Integer
|
11
|
+
field :restrictions, String
|
12
|
+
field :revision_id, String
|
13
|
+
field :timestamp, String
|
14
|
+
field :sha1, String
|
15
|
+
field :redirect, String
|
16
|
+
field :xml_text, String
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
class Airline
|
2
|
+
include Gorillib::Model
|
3
|
+
field :icao_id, String, doc: "3-letter ICAO code, if available", identifier: true, length: 2
|
4
|
+
field :iata_id, String, doc: "2-letter IATA code, if available", identifier: true, length: 2
|
5
|
+
field :airline_ofid, Integer, doc: "Unique OpenFlights identifier for this airline.", identifier: true
|
6
|
+
field :active, :boolean, doc: 'true if the airline is or has until recently been operational, false if it is defunct. (This is only a rough indication and should not be taken as 100% accurate)'
|
7
|
+
field :country, String, doc: "Country or territory where airline is incorporated"
|
8
|
+
field :name, String, doc: "Airline name."
|
9
|
+
field :callsign, String, doc: "Airline callsign", identifier: true
|
10
|
+
field :alias, String, doc: "Alias of the airline. For example, 'All Nippon Airways' is commonly known as 'ANA'"
|
11
|
+
end
|
12
|
+
|
13
|
+
#
|
14
|
+
# As of January 2012, the OpenFlights Airlines Database contains 5888
|
15
|
+
# airlines. If you enjoy this data, please consider [visiting their page and
|
16
|
+
# donating](http://openflights.org/data.html)
|
17
|
+
#
|
18
|
+
# > Notes: Airlines with null codes/callsigns/countries generally represent
|
19
|
+
# > user-added airlines. Since the data is intended primarily for current
|
20
|
+
# > flights, defunct IATA codes are generally not included. For example,
|
21
|
+
# > "Sabena" is not listed with a SN IATA code, since "SN" is presently used by
|
22
|
+
# > its successor Brussels Airlines.
|
23
|
+
#
|
24
|
+
# Sample entries
|
25
|
+
#
|
26
|
+
# 324,"All Nippon Airways","ANA All Nippon Airways","NH","ANA","ALL NIPPON","Japan","Y"
|
27
|
+
# 412,"Aerolineas Argentinas",\N,"AR","ARG","ARGENTINA","Argentina","Y"
|
28
|
+
# 413,"Arrowhead Airways",\N,"","ARH","ARROWHEAD","United States","N"
|
29
|
+
#
|
30
|
+
class RawOpenflightAirline
|
31
|
+
include Gorillib::Model
|
32
|
+
include Gorillib::Model::LoadFromCsv
|
33
|
+
BLANKISH_STRINGS = ["", nil, "NULL", '\\N', "NONE", "NA", "Null", "..."]
|
34
|
+
|
35
|
+
field :airline_ofid, Integer, blankish: BLANKISH_STRINGS, doc: "Unique OpenFlights identifier for this airline.", identifier: true
|
36
|
+
field :name, String, blankish: BLANKISH_STRINGS, doc: "Airline name."
|
37
|
+
field :alias, String, blankish: BLANKISH_STRINGS, doc: "Alias of the airline. For example, 'All Nippon Airways' is commonly known as 'ANA'"
|
38
|
+
field :iata_id, String, blankish: BLANKISH_STRINGS, doc: "2-letter IATA code, if available", identifier: true, length: 2
|
39
|
+
field :icao_id, String, blankish: BLANKISH_STRINGS, doc: "3-letter ICAO code, if available", identifier: true, length: 2
|
40
|
+
field :callsign, String, blankish: BLANKISH_STRINGS, doc: "Airline callsign"
|
41
|
+
field :country, String, blankish: BLANKISH_STRINGS, doc: "Country or territory where airline is incorporated"
|
42
|
+
field :active, :boolean, blankish: BLANKISH_STRINGS, doc: 'true if the airline is or has until recently been operational, false if it is defunct. (This is only a rough indication and should not be taken as 100% accurate)'
|
43
|
+
|
44
|
+
def receive_iata_id(val) super if val =~ /\A\w+\z/ ; end
|
45
|
+
def receive_icao_id(val) super if val =~ /\A\w+\z/ ; end
|
46
|
+
def receive_active(val)
|
47
|
+
super(case val.to_s when "Y" then true when "N" then false else val ; end)
|
48
|
+
end
|
49
|
+
|
50
|
+
def to_airline
|
51
|
+
Airline.receive(self.compact_attributes)
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.load_airlines(filename)
|
55
|
+
load_csv(filename){|raw_airline| yield(raw_airline.to_airline) }
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require_relative('../../rake_helper')
|
2
|
+
require_relative('./models')
|
3
|
+
|
4
|
+
Pathname.register_paths(
|
5
|
+
af_data: [:data, 'airline_flights'],
|
6
|
+
af_work: [:work, 'airline_flights'],
|
7
|
+
af_code: File.dirname(__FILE__),
|
8
|
+
#
|
9
|
+
openflights_raw_airports: [:af_data, "openflights_airports-raw#{Settings[:mini_slug]}.csv" ],
|
10
|
+
openflights_raw_airlines: [:af_data, "openflights_airlines-raw.csv" ],
|
11
|
+
dataexpo_raw_airports: [:af_data, "dataexpo_airports-raw#{Settings[:mini_slug]}.csv" ],
|
12
|
+
wikipedia_icao: [:af_data, "wikipedia_icao.tsv" ],
|
13
|
+
wikipedia_iata: [:af_data, "wikipedia_iata.tsv" ],
|
14
|
+
wikipedia_us_abroad: [:af_data, "wikipedia_us_abroad.tsv" ],
|
15
|
+
#
|
16
|
+
openflights_airports: [:af_work, "openflights_airports-parsed#{Settings[:mini_slug]}.tsv"],
|
17
|
+
openflights_airlines: [:af_work, "openflights_airlines-parsed#{Settings[:mini_slug]}.tsv"],
|
18
|
+
dataexpo_airports: [:af_work, "dataexpo_airports-parsed#{Settings[:mini_slug]}.tsv" ],
|
19
|
+
airport_identifiers: [:af_work, "airport_identifiers.tsv" ],
|
20
|
+
airport_identifiers_mini: [:af_work, "airport_identifiers-sample.tsv" ],
|
21
|
+
# helpers
|
22
|
+
country_name_lookup: [:work, 'geo', "country_name_lookup.tsv"],
|
23
|
+
)
|
24
|
+
|
25
|
+
chain :airline_flights do
|
26
|
+
code_files = FileList[Pathname.of(:af_code, '*.rb').to_s]
|
27
|
+
chain(:parse) do
|
28
|
+
|
29
|
+
# desc 'parse the dataexpo airports'
|
30
|
+
# create_file(:dataexpo_airports, after: code_files) do |dest|
|
31
|
+
# RawDataexpoAirport.load_airports(:dataexpo_raw_airports) do |airport|
|
32
|
+
# dest << airport.to_tsv << "\n"
|
33
|
+
# end
|
34
|
+
# end
|
35
|
+
|
36
|
+
desc 'parse the openflights airports'
|
37
|
+
create_file(:openflights_airports, after: [code_files, :force]) do |dest|
|
38
|
+
require_relative('../geo/geo_models')
|
39
|
+
Geo::CountryNameLookup.load
|
40
|
+
RawOpenflightAirport.load_airports(:openflights_raw_airports) do |airport|
|
41
|
+
dest << airport.to_tsv << "\n"
|
42
|
+
# puts airport.country
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# task :reconcile_airports => [:dataexpo_airports, :openflights_airports] do
|
47
|
+
# require_relative 'reconcile_airports'
|
48
|
+
# Airport::IdReconciler.load_all
|
49
|
+
# end
|
50
|
+
#
|
51
|
+
# desc 'run the identifier reconciler'
|
52
|
+
# create_file(:airport_identifiers, after: code_files, invoke: 'airline_flights:parse:reconcile_airports') do |dest|
|
53
|
+
# Airport::IdReconciler.airports.each do |airport|
|
54
|
+
# dest << airport.to_tsv << "\n"
|
55
|
+
# end
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# desc 'run the identifier reconciler'
|
59
|
+
# create_file(:airport_identifiers_mini, after: code_files, invoke: 'airline_flights:parse:reconcile_airports') do |dest|
|
60
|
+
# Airport::IdReconciler.exemplars.each do |airport|
|
61
|
+
# dest << airport.to_tsv << "\n"
|
62
|
+
# end
|
63
|
+
# end
|
64
|
+
#
|
65
|
+
# desc 'parse the openflights airlines'
|
66
|
+
# create_file(:openflights_airlines, after: code_files) do |dest|
|
67
|
+
# RawOpenflightAirline.load_airlines(:openflights_raw_airlines) do |airline|
|
68
|
+
# dest << airline.to_tsv << "\n"
|
69
|
+
# puts airline.to_tsv
|
70
|
+
# end
|
71
|
+
# end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
task :default => [
|
77
|
+
'airline_flights',
|
78
|
+
# 'airline_flights:parse:dataexpo_airports',
|
79
|
+
# 'airline_flights:parse:openflights_airports',
|
80
|
+
# 'airline_flights:parse:airport_identifiers',
|
81
|
+
# 'airline_flights:parse:airport_identifiers_mini',
|
82
|
+
# 'airline_flights:parse:openflights_airlines',
|
83
|
+
]
|
File without changes
|
@@ -0,0 +1,211 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
### @export "airport_model"
|
4
|
+
class Airport
|
5
|
+
include Gorillib::Model
|
6
|
+
|
7
|
+
field :icao, String, doc: "4-letter ICAO code, or blank if not assigned.", length: 4, identifier: true, :blankish => ["", nil]
|
8
|
+
field :iata, String, doc: "3-letter IATA code, or blank if not assigned.", length: 3, identifier: true, :blankish => ["", nil]
|
9
|
+
field :faa, String, doc: "3-letter FAA code, or blank if not assigned.", length: 3, identifier: true, :blankish => ["", nil]
|
10
|
+
field :utc_offset, Float, doc: "Hours offset from UTC. Fractional hours are expressed as decimals, eg. India is 5.5.", validates: { inclusion: (-12...12) }
|
11
|
+
field :dst_rule, String, doc: "Daylight savings time rule. One of E (Europe), A (US/Canada), S (South America), O (Australia), Z (New Zealand), N (None) or U (Unknown). See the readme for more.", validates: { inclusion: %w[E A S O Z N U] }
|
12
|
+
field :longitude, Float, doc: "Decimal degrees, usually to six significant digits. Negative is West, positive is East.", validates: { inclusion: (-180...180) }
|
13
|
+
field :latitude, Float, doc: "Decimal degrees, usually to six significant digits. Negative is South, positive is North.", validates: { inclusion: (-90.0...90.0) }
|
14
|
+
field :altitude, Float, doc: "Elevation in meters."
|
15
|
+
field :name, String, doc: "Name of airport."
|
16
|
+
field :country, String, doc: "Country or territory where airport is located.", length: 2
|
17
|
+
field :state, String, doc: "State in which the airport is located", length: 2
|
18
|
+
field :city, String, doc: "Main city served by airport. This is the logical city it serves; so, for example SFO gets 'San Francisco', not 'San Bruno'"
|
19
|
+
field :airport_ofid, String, doc: "OpenFlights identifier for this airport.", identifier: true
|
20
|
+
end
|
21
|
+
### @export "nil"
|
22
|
+
class Airport
|
23
|
+
EXEMPLARS = %w[
|
24
|
+
ANC ATL AUS BDL BNA BOI BOS BWI CLE CLT
|
25
|
+
CMH DCA DEN DFW DTW EWR FLL HNL IAD IAH
|
26
|
+
IND JAX JFK LAS LAX LGA MCI MCO MDW MIA
|
27
|
+
MSP MSY OAK ORD PDX PHL PHX PIT PVD RDU
|
28
|
+
SAN SEA SFO SJC SJU SLC SMF STL TPA YYZ
|
29
|
+
]
|
30
|
+
|
31
|
+
def utc_time_for(tm)
|
32
|
+
utc_time = tm.get_utc + utc_offset
|
33
|
+
utc_time += (60*60) if TimezoneFixup.dst?(tm)
|
34
|
+
utc_time
|
35
|
+
end
|
36
|
+
|
37
|
+
BLANKISH_STRINGS = ["", nil, "NULL", '\\N', "NONE", "NA", "Null", "..."]
|
38
|
+
OK_CHARS_RE = /[^a-zA-Z0-9\:\ \/\.\,\-\(\)\'ÁÂÄÅÇÉÍÎÑÓÖØÚÜÞàáâãäåæçèéêëìíîïðñóôõöøúüýĀāăĆćČčēėęěğīİıŁłńņňŌōőřŞşŠšţťūůųźŽžơț]/
|
39
|
+
|
40
|
+
def lint
|
41
|
+
errors = {}
|
42
|
+
errors["ICAO is wrong length"] = icao if icao.present? && icao.length != 4
|
43
|
+
if (icao && faa && (icao =~ /^K.../))
|
44
|
+
errors["ICAO != K+FAA yet ICAO is a K..."] = [icao, faa] if (icao != "K#{faa}")
|
45
|
+
end
|
46
|
+
# errors["ICAO present for piddlyshit airport"] = icao if icao.present? && ((faa.to_s.length == 4) || (faa.to_s =~ /\d/))
|
47
|
+
errors[:spaces] ||= []
|
48
|
+
errors[:funny] ||= []
|
49
|
+
attributes.each do |attr, val|
|
50
|
+
next if val.blank?
|
51
|
+
errors["#{attr} looks blankish"] = val if BLANKISH_STRINGS.include?(val)
|
52
|
+
if (val.is_a?(String))
|
53
|
+
errors[:spaces] << [attr, val] if (val.strip != val)
|
54
|
+
errors[:funny] << [attr, val] if val =~ OK_CHARS_RE
|
55
|
+
end
|
56
|
+
end
|
57
|
+
errors.compact_blank
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_s
|
61
|
+
str = "#<Airport "
|
62
|
+
str << [icao, iata, faa,
|
63
|
+
(latitude && "%4.1f" % latitude), (longitude && "%5.1f" % longitude), state, country,
|
64
|
+
"%-30s" % name, country, city].join("\t")
|
65
|
+
str << ">"
|
66
|
+
end
|
67
|
+
|
68
|
+
def faa_controlled?
|
69
|
+
icao =~ /^(?:K|P[ABFGHJKMOPW]|T[IJ]|NS(AS|FQ|TU))/
|
70
|
+
end
|
71
|
+
end
|
72
|
+
### @export "airport_load"
|
73
|
+
class Airport
|
74
|
+
include Gorillib::Model::LoadFromTsv
|
75
|
+
self.tsv_options.merge!(num_fields: 10..20)
|
76
|
+
def self.load_airports(filename)
|
77
|
+
load_tsv(filename){|airport| yield(airport) }
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
### @export "nil"
|
82
|
+
|
83
|
+
#
|
84
|
+
# As of January 2012, the OpenFlights Airports Database contains 6977 airports
|
85
|
+
# [spanning the globe](http://openflights.org/demo/openflights-apdb-2048.png).
|
86
|
+
# If you enjoy this data, please consider [visiting their page and
|
87
|
+
# donating](http://openflights.org/data.html)
|
88
|
+
#
|
89
|
+
# > Note: Rules for daylight savings time change from year to year and from
|
90
|
+
# > country to country. The current data is an approximation for 2009, built on
|
91
|
+
# > a country level. Most airports in DST-less regions in countries that
|
92
|
+
# > generally observe DST (eg. AL, HI in the USA, NT, QL in Australia, parts of
|
93
|
+
# > Canada) are marked incorrectly.
|
94
|
+
#
|
95
|
+
# Sample entries
|
96
|
+
#
|
97
|
+
# 507,"Heathrow","London","United Kingdom","LHR","EGLL",51.4775,-0.461389,83,0,"E"
|
98
|
+
# 26,"Kugaaruk","Pelly Bay","Canada","YBB","CYBB",68.534444,-89.808056,56,-6,"A"
|
99
|
+
# 3127,"Pokhara","Pokhara","Nepal","PKR","VNPK",28.200881,83.982056,2712,5.75,"N"
|
100
|
+
#
|
101
|
+
|
102
|
+
### @export "raw_openflight_airport"
|
103
|
+
|
104
|
+
module RawAirport
|
105
|
+
COUNTRIES = { 'Puerto Rico' => 'us', 'Canada' => 'ca', 'USA' => 'us', 'United States' => 'us',
|
106
|
+
'Northern Mariana Islands' => 'us', 'N Mariana Islands' => 'us',
|
107
|
+
'Federated States of Micronesia' => 'fm',
|
108
|
+
'Thailand' => 'th', 'Palau' => 'pw',
|
109
|
+
'American Samoa' => 'as', 'Wake Island' => 'us', 'Virgin Islands' => 'vi', 'Guam' => 'gu'
|
110
|
+
}
|
111
|
+
BLANKISH_STRINGS = ["", nil, "NULL", '\\N', "NONE", "NA", "Null", "..."]
|
112
|
+
OK_CHARS_RE = /[^a-zA-Z0-9\:\ \/\.\,\-\(\)\'ÁÂÄÅÇÉÍÎÑÓÖØÚÜÞàáâãäåæçèéêëìíîïðñóôõöøúüýĀāăĆćČčēėęěğīİıŁłńņňŌōőřŞşŠšţťūůųźŽžơț]/
|
113
|
+
|
114
|
+
def receive_city(val)
|
115
|
+
super.tap{|val| if val then val.strip! ; val.gsub!(/\\+/, '') ; end }
|
116
|
+
end
|
117
|
+
|
118
|
+
def receive_country(val)
|
119
|
+
super(COUNTRIES[val] || val)
|
120
|
+
end
|
121
|
+
|
122
|
+
def receive_name(val)
|
123
|
+
super.tap do |val|
|
124
|
+
if val
|
125
|
+
val.strip!
|
126
|
+
val.gsub!(/\\+/, '')
|
127
|
+
val.gsub!(/\s*\[(military|private)\]/, '')
|
128
|
+
val.gsub!(/\b(Int\'l|International)\b/, 'Intl')
|
129
|
+
val.gsub!(/\b(Intercontinental)\b/, 'Intcntl')
|
130
|
+
val.gsub!(/\b(Airpt)\b/, 'Airport')
|
131
|
+
val.gsub!(/ Airport$/, '')
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
#
|
138
|
+
class RawOpenflightAirport
|
139
|
+
include Gorillib::Model
|
140
|
+
include Gorillib::Model::LoadFromCsv
|
141
|
+
include RawAirport
|
142
|
+
#
|
143
|
+
field :airport_ofid, String, doc: "Unique OpenFlights identifier for this airport."
|
144
|
+
field :name, String, doc: "Name of airport. May or may not contain the City name."
|
145
|
+
field :city, String, blankish: BLANKISH_STRINGS, doc: "Main city served by airport. May be spelled differently from Name."
|
146
|
+
field :country, String, doc: "Country or territory where airport is located."
|
147
|
+
field :iata_faa, String, blankish: BLANKISH_STRINGS, doc: "3-letter FAA code, for airports located in the USA. For all other airports, 3-letter IATA code, or blank if not assigned."
|
148
|
+
field :icao, String, blankish: BLANKISH_STRINGS, doc: "4-letter ICAO code; Blank if not assigned."
|
149
|
+
field :latitude, Float, doc: "Decimal degrees, usually to six significant digits. Negative is South, positive is North."
|
150
|
+
field :longitude, Float, doc: "Decimal degrees, usually to six significant digits. Negative is West, positive is East."
|
151
|
+
field :altitude_ft, Float, blankish: ['', nil, 0, '0'], doc: "In feet."
|
152
|
+
field :utc_offset, Float, doc: "Hours offset from UTC. Fractional hours are expressed as decimals, eg. India is 5.5."
|
153
|
+
field :dst_rule, String, doc: "Daylight savings time rule. One of E (Europe), A (US/Canada), S (South America), O (Australia), Z (New Zealand), N (None) or U (Unknown). See the readme for more."
|
154
|
+
|
155
|
+
UNRELIABLE_OPENFLIGHTS_IATA_VALUES = /^(7AK|AGA|AUQ|BDJ|BGW|BME|BPM|BXH|BZY|CAT|CEE|CEJ|CFS|CGU|CIO|CLV|CNN|DEE|DIB|DNM|DUH|DUR|FKI|GES|GSM|HKV|HOJ|HYD|IEO|IFN|IKA|IZA|JCU|JGS|KMW|KNC|LGQ|LUM|MCU|MCY|MDO|MOH|MON|MPH|MVF|NAY|NMA|NOE|NQY|OTU|OUI|PBV|PCA|PCB|PGK|PHO|PIF|PKN|PKY|PMK|PTG|PZO|QAS|QKT|QVY|RCM|RJL|RTG|SBG|SDZ|SFG|SIC|SIQ|SJI|SRI|STP|STU|SWQ|TJQ|TJS|TMC|TYA|UKC|VIY|VQS|VTS|WDH|WKM|WPR|WPU|ZQF)$/
|
156
|
+
|
157
|
+
def id_is_faa?
|
158
|
+
(icao =~ /^(?:K)/) || (icao.blank? && country == 'us')
|
159
|
+
end
|
160
|
+
|
161
|
+
def iata ; (id_is_faa? ? nil : iata_faa) unless iata_faa =~ UNRELIABLE_OPENFLIGHTS_IATA_VALUES end
|
162
|
+
def faa ; (id_is_faa? ? iata_faa : nil ) end
|
163
|
+
def altitude
|
164
|
+
altitude_ft && (0.3048 * altitude_ft).round(1)
|
165
|
+
end
|
166
|
+
|
167
|
+
def receive_country(val)
|
168
|
+
country = Geo::CountryNameLookup.for_alt_name(val, nil)
|
169
|
+
p val unless country
|
170
|
+
super(country ? country.country_id : val)
|
171
|
+
end
|
172
|
+
|
173
|
+
def to_airport
|
174
|
+
attrs = self.compact_attributes.except(:altitude_ft)
|
175
|
+
attrs[:altitude] = altitude
|
176
|
+
attrs[:iata] = iata unless iata.to_s =~ UNRELIABLE_OPENFLIGHTS_IATA_VALUES
|
177
|
+
attrs[:faa] = faa
|
178
|
+
Airport.receive(attrs)
|
179
|
+
end
|
180
|
+
|
181
|
+
def self.load_airports(filename)
|
182
|
+
load_csv(filename){|raw_airport| yield(raw_airport.to_airport) }
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
### @export "raw_dataexpo_airport"
|
187
|
+
class RawDataexpoAirport
|
188
|
+
include Gorillib::Model
|
189
|
+
include Gorillib::Model::LoadFromCsv
|
190
|
+
include RawAirport
|
191
|
+
self.csv_options = self.csv_options.merge(pop_headers: true)
|
192
|
+
|
193
|
+
field :faa, String, doc: "the international airport abbreviation code"
|
194
|
+
field :name, String, doc: "Airport name"
|
195
|
+
field :city, String, blankish: ["NA"], doc: "city in which the airport is located"
|
196
|
+
field :state, String, blankish: ["NA"], doc: "state in which the airport is located"
|
197
|
+
field :country, String, doc: "country in which airport is located"
|
198
|
+
field :latitude, Float, doc: "latitude of the airport"
|
199
|
+
field :longitude, Float, doc: "longitude of the airport"
|
200
|
+
|
201
|
+
def to_airport
|
202
|
+
attrs = self.compact_attributes
|
203
|
+
attrs[:icao] = "K#{faa}" if faa =~ /[A-Z]{3}/ && (not ['PR', 'AK', 'CQ', 'HI', 'AS', 'GU', 'VI'].include?(state)) && (country == 'us')
|
204
|
+
Airport.receive(attrs)
|
205
|
+
end
|
206
|
+
|
207
|
+
def self.load_airports(filename)
|
208
|
+
load_csv(filename){|raw_airport| yield(raw_airport.to_airport) }
|
209
|
+
end
|
210
|
+
end
|
211
|
+
### @export "nil"
|
@@ -0,0 +1,129 @@
|
|
1
|
+
class Airport
|
2
|
+
|
3
|
+
# [Hash] all options passed to the field not recognized by one of its own current fields
|
4
|
+
attr_reader :_extra_attributes
|
5
|
+
|
6
|
+
# # Airports whose IATA and FAA codes differ; all are in the US, so their ICAO is "K"+the FAA id
|
7
|
+
# FAA_ICAO_FIXUP = {
|
8
|
+
# "GRM" => "CKC", "CLD" => "CRQ", "SDX" => "SEZ", "AZA" => "IWA", "SCE" => "UNV", "BLD" => "BVU",
|
9
|
+
# "LKE" => "W55", "HSH" => "HND", "BKG" => "BBG", "UST" => "SGJ", "LYU" => "ELO", "WFK" => "FVE",
|
10
|
+
# "FRD" => "FHR", "ESD" => "ORS", "RKH" => "UZA", "NZC" => "VQQ", "SCF" => "SDL", "JCI" => "IXD",
|
11
|
+
# "AVW" => "AVQ", "UTM" => "UTA", "ONP" => "NOP", }
|
12
|
+
#
|
13
|
+
# [:iata, :icao, :latitude, :longitude, :country, :city, :name].each do |attr|
|
14
|
+
# define_method("of_#{attr}"){ @_extra_attributes[:"of_#{attr}"] }
|
15
|
+
# define_method("de_#{attr}"){ @_extra_attributes[:"de_#{attr}"] }
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
# def lint_differences
|
19
|
+
# errors = {}
|
20
|
+
# return errors unless de_name.present? && of_name.present?
|
21
|
+
# [
|
22
|
+
# [:iata, of_iata, de_iata], [:icao, of_icao, de_icao], [:country, of_country, de_country],
|
23
|
+
# [:city, of_city, de_city],
|
24
|
+
# [:name, of_name, de_name],
|
25
|
+
# ].each{|attr, of, de| next unless of && de ; errors[attr] = [of, de] if of != de }
|
26
|
+
#
|
27
|
+
# if (of_latitude && of_longitude && de_latitude && de_longitude)
|
28
|
+
# lat_diff = (of_latitude - de_latitude ).abs
|
29
|
+
# lng_diff = (of_longitude - de_longitude).abs
|
30
|
+
# unless (lat_diff < 0.015) && (lng_diff < 0.015)
|
31
|
+
# msg = [of_latitude, de_latitude, of_longitude, de_longitude, lat_diff, lng_diff].map{|val| "%9.4f" % val }.join(" ")
|
32
|
+
# errors["distance"] = ([msg, of_city, de_city, of_name, de_name])
|
33
|
+
# end
|
34
|
+
# end
|
35
|
+
#
|
36
|
+
# errors
|
37
|
+
# end
|
38
|
+
#
|
39
|
+
# AIRPORTS = Hash.new # unless defined?(AIRPORTS)
|
40
|
+
# def self.load(of_filename, de_filename)
|
41
|
+
# RawOpenflightAirport.load_csv(of_filename) do |raw_airport|
|
42
|
+
# airport = raw_airport.to_airport
|
43
|
+
# AIRPORTS[airport.iata_icao] = airport
|
44
|
+
# end
|
45
|
+
# RawDataexpoAirport.load_csv(de_filename) do |raw_airport|
|
46
|
+
# airport = (AIRPORTS[raw_airport.iata_icao] ||= self.new)
|
47
|
+
# if airport.de_name
|
48
|
+
# warn "duplicate data for #{[iata, de_iata, icao, de_icao]}: #{raw_airport.to_tsv} #{airport.to_tsv}"
|
49
|
+
# end
|
50
|
+
# airport.receive!(raw_airport.airport_attrs)
|
51
|
+
# end
|
52
|
+
# AIRPORTS
|
53
|
+
# end
|
54
|
+
|
55
|
+
def self.load(dirname)
|
56
|
+
load_csv(File.join(dirname, 'wikipedia_icao.tsv')) do |id_mapping|
|
57
|
+
[:icao, :iata, :faa ].each do |attr|
|
58
|
+
val = id_mapping.read_attribute(attr) or next
|
59
|
+
next if (val == '.') || (val == '_')
|
60
|
+
if that = ID_MAPPINGS[attr][val]
|
61
|
+
lint = that.disagreements(id_mapping)
|
62
|
+
puts [attr, val, "%-25s" % lint.inspect, id_mapping, that, "%-60s" % id_mapping.name, "%-25s" % that.name].join("\t") if lint.present?
|
63
|
+
else
|
64
|
+
ID_MAPPINGS[attr][val] = id_mapping
|
65
|
+
end
|
66
|
+
end
|
67
|
+
# [:icao, :iata, :faa ].each do |attr|
|
68
|
+
# val = id_mapping.read_attribute(attr)
|
69
|
+
# ID_MAPPINGS[attr][val] = id_mapping
|
70
|
+
# end
|
71
|
+
end
|
72
|
+
load_csv(File.join(dirname, 'wikipedia_iata.tsv')) do |id_mapping|
|
73
|
+
# if not ID_MAPPINGS[:icao].has_key?(id_mapping.icao)
|
74
|
+
# puts [:badicao, "%-25s" % "", id_mapping, " "*24, "%-60s" % id_mapping.name].join("\t")
|
75
|
+
# end
|
76
|
+
[:icao, :iata, :faa ].each do |attr|
|
77
|
+
val = id_mapping.read_attribute(attr) or next
|
78
|
+
next if (val == '.') || (val == '_')
|
79
|
+
if that = ID_MAPPINGS[attr][val]
|
80
|
+
lint = that.disagreements(id_mapping)
|
81
|
+
puts [attr, val, "%-25s" % lint.inspect, id_mapping, that, "%-60s" % id_mapping.name, "%-25s" % that.name].join("\t") if lint.present?
|
82
|
+
else
|
83
|
+
ID_MAPPINGS[attr][val] = id_mapping
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# def adopt_field(that, attr)
|
89
|
+
# this_val = self.read_attribute(attr)
|
90
|
+
# that_val = that.read_attribute(attr)
|
91
|
+
# if name =~ /Bogus|Austin/i
|
92
|
+
# puts [attr, this_val, that_val, attribute_set?(attr), that.attribute_set?(attr), to_tsv, that.to_tsv].join("\t")
|
93
|
+
# end
|
94
|
+
# if this_val && that_val
|
95
|
+
# if (this_val != that_val) then warn [attr, this_val, that_val, name].join("\t") ; end
|
96
|
+
# elsif that_val
|
97
|
+
# write_attribute(that_val)
|
98
|
+
# end
|
99
|
+
# end
|
100
|
+
|
101
|
+
def to_s
|
102
|
+
attributes.values[0..2].join("\t")
|
103
|
+
end
|
104
|
+
|
105
|
+
def disagreements(that)
|
106
|
+
errors = {}
|
107
|
+
[:icao, :iata, :faa ].each do |attr|
|
108
|
+
this_val = self.read_attribute(attr) or next
|
109
|
+
that_val = that.read_attribute(attr) or next
|
110
|
+
next if that_val == '.' || that_val == '_'
|
111
|
+
errors[attr] = [this_val, that_val] if this_val != that_val
|
112
|
+
end
|
113
|
+
errors
|
114
|
+
end
|
115
|
+
|
116
|
+
def self.dump_ids(ids)
|
117
|
+
"%s\t%s\t%s" % [icao, iata, faa]
|
118
|
+
end
|
119
|
+
def self.dump_mapping
|
120
|
+
[:icao, :iata, :faa].map do |attr|
|
121
|
+
"%-50s" % ID_MAP[attr].to_a.sort.map{|id, val| "#{id}:#{val.icao||' '}|#{val.iata||' '}|#{val.faa||' '}"}.join(";")
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def self.dump_info(kind, ids, reconciler, existing, *args)
|
126
|
+
ex_str = [existing.map{|el| dump_ids(el.ids) }, "\t\t","\t\t","\t\t"].flatten[0..2]
|
127
|
+
puts [kind, dump_ids(ids), dump_ids(reconciler.ids), ex_str, *args, dump_mapping.join("//") ].flatten.join("\t| ")
|
128
|
+
end
|
129
|
+
end
|