wukong 3.0.0.pre → 3.0.0.pre2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +46 -33
- data/.gitmodules +3 -0
- data/.rspec +1 -1
- data/.travis.yml +8 -1
- data/.yardopts +0 -13
- data/Guardfile +4 -6
- data/{LICENSE.textile → LICENSE.md} +43 -55
- data/README-old.md +422 -0
- data/README.md +279 -418
- data/Rakefile +21 -5
- data/TODO.md +6 -6
- data/bin/wu-clean-encoding +31 -0
- data/bin/wu-lign +2 -2
- data/bin/wu-local +69 -0
- data/bin/wu-server +70 -0
- data/examples/Gemfile +38 -0
- data/examples/README.md +9 -0
- data/examples/dataflow/apache_log_line.rb +64 -25
- data/examples/dataflow/fibonacci_series.rb +101 -0
- data/examples/dataflow/parse_apache_logs.rb +37 -7
- data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
- data/examples/dataflow/simple.rb +4 -4
- data/examples/geo.rb +4 -0
- data/examples/geo/geo_grids.numbers +0 -0
- data/examples/geo/geolocated.rb +331 -0
- data/examples/geo/quadtile.rb +69 -0
- data/examples/geo/spec/geolocated_spec.rb +247 -0
- data/examples/geo/tile_fetcher.rb +77 -0
- data/examples/graph/minimum_spanning_tree.rb +61 -61
- data/examples/jabberwocky.txt +36 -0
- data/examples/models/wikipedia.rb +20 -0
- data/examples/munging/Gemfile +8 -0
- data/examples/munging/airline_flights/airline.rb +57 -0
- data/examples/munging/airline_flights/airline_flights.rake +83 -0
- data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
- data/examples/munging/airline_flights/airport.rb +211 -0
- data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
- data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
- data/examples/munging/airline_flights/flight.rb +156 -0
- data/examples/munging/airline_flights/models.rb +4 -0
- data/examples/munging/airline_flights/parse.rb +26 -0
- data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
- data/examples/munging/airline_flights/route.rb +35 -0
- data/examples/munging/airline_flights/tasks.rake +83 -0
- data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
- data/examples/munging/airline_flights/topcities.rb +167 -0
- data/examples/munging/airports/40_wbans.txt +40 -0
- data/examples/munging/airports/filter_weather_reports.rb +37 -0
- data/examples/munging/airports/join.pig +31 -0
- data/examples/munging/airports/to_tsv.rb +33 -0
- data/examples/munging/airports/usa_wbans.pig +19 -0
- data/examples/munging/airports/usa_wbans.txt +2157 -0
- data/examples/munging/airports/wbans.pig +19 -0
- data/examples/munging/airports/wbans.txt +2310 -0
- data/examples/munging/geo/geo_json.rb +54 -0
- data/examples/munging/geo/geo_models.rb +69 -0
- data/examples/munging/geo/geonames_models.rb +78 -0
- data/examples/munging/geo/iso_codes.rb +172 -0
- data/examples/munging/geo/reconcile_countries.rb +124 -0
- data/examples/munging/geo/tasks.rake +71 -0
- data/examples/munging/rake_helper.rb +62 -0
- data/examples/munging/weather/.gitignore +1 -0
- data/examples/munging/weather/Gemfile +4 -0
- data/examples/munging/weather/Rakefile +28 -0
- data/examples/munging/weather/extract_ish.rb +13 -0
- data/examples/munging/weather/models/weather.rb +119 -0
- data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
- data/examples/munging/wikipedia/README.md +34 -0
- data/examples/munging/wikipedia/Rakefile +193 -0
- data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
- data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
- data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
- data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
- data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
- data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
- data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
- data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
- data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
- data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
- data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
- data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
- data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
- data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
- data/examples/munging/wikipedia/pig_style_guide.md +25 -0
- data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
- data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
- data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
- data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
- data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
- data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
- data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
- data/examples/munging/wikipedia/utils/namespaces.json +1 -0
- data/examples/rake_helper.rb +85 -0
- data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
- data/examples/server_logs/logline.rb +95 -0
- data/examples/server_logs/models.rb +66 -0
- data/examples/server_logs/page_counts.pig +48 -0
- data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
- data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
- data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
- data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
- data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
- data/examples/string_reverser.rb +26 -0
- data/examples/text/pig_latin.rb +2 -2
- data/examples/text/regional_flavor/README.md +14 -0
- data/examples/text/regional_flavor/article_wordbags.pig +39 -0
- data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
- data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
- data/examples/word_count/accumulator.rb +26 -0
- data/examples/word_count/tokenizer.rb +13 -0
- data/examples/word_count/word_count.rb +6 -0
- data/examples/workflow/cherry_pie.dot +97 -0
- data/examples/workflow/cherry_pie.png +0 -0
- data/examples/workflow/cherry_pie.rb +61 -26
- data/lib/hanuman.rb +34 -7
- data/lib/hanuman/graph.rb +55 -31
- data/lib/hanuman/graphvizzer.rb +199 -178
- data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
- data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
- data/lib/hanuman/link.rb +35 -0
- data/lib/hanuman/registry.rb +46 -0
- data/lib/hanuman/stage.rb +76 -32
- data/lib/wukong.rb +23 -24
- data/lib/wukong/boot.rb +87 -0
- data/lib/wukong/configuration.rb +8 -0
- data/lib/wukong/dataflow.rb +45 -78
- data/lib/wukong/driver.rb +99 -0
- data/lib/wukong/emitter.rb +22 -0
- data/lib/wukong/model/faker.rb +24 -24
- data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
- data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
- data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
- data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
- data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
- data/lib/wukong/processor.rb +60 -114
- data/lib/wukong/spec_helpers.rb +81 -0
- data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
- data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
- data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
- data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
- data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
- data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
- data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
- data/lib/wukong/version.rb +2 -1
- data/lib/wukong/widget/filters.rb +311 -0
- data/lib/wukong/widget/processors.rb +156 -0
- data/lib/wukong/widget/reducers.rb +7 -0
- data/lib/wukong/widget/reducers/accumulator.rb +73 -0
- data/lib/wukong/widget/reducers/bin.rb +318 -0
- data/lib/wukong/widget/reducers/count.rb +61 -0
- data/lib/wukong/widget/reducers/group.rb +85 -0
- data/lib/wukong/widget/reducers/group_concat.rb +70 -0
- data/lib/wukong/widget/reducers/moments.rb +72 -0
- data/lib/wukong/widget/reducers/sort.rb +130 -0
- data/lib/wukong/widget/serializers.rb +287 -0
- data/lib/wukong/widget/sink.rb +10 -52
- data/lib/wukong/widget/source.rb +7 -113
- data/lib/wukong/widget/utils.rb +46 -0
- data/lib/wukong/widgets.rb +6 -0
- data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
- data/spec/examples/dataflow/parsing_spec.rb +12 -11
- data/spec/examples/dataflow/simple_spec.rb +32 -6
- data/spec/examples/dataflow/telegram_spec.rb +36 -36
- data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
- data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
- data/spec/examples/munging/airline_flights_spec.rb +202 -0
- data/spec/examples/text/pig_latin_spec.rb +13 -16
- data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
- data/spec/hanuman/graph_spec.rb +27 -2
- data/spec/hanuman/hanuman_spec.rb +10 -0
- data/spec/hanuman/registry_spec.rb +123 -0
- data/spec/hanuman/stage_spec.rb +61 -7
- data/spec/spec_helper.rb +29 -19
- data/spec/support/hanuman_test_helpers.rb +14 -12
- data/spec/support/shared_context_for_reducers.rb +37 -0
- data/spec/support/shared_examples_for_builders.rb +101 -0
- data/spec/support/shared_examples_for_shortcuts.rb +57 -0
- data/spec/support/wukong_test_helpers.rb +37 -11
- data/spec/wukong/dataflow_spec.rb +77 -55
- data/spec/wukong/local_runner_spec.rb +24 -24
- data/spec/wukong/model/faker_spec.rb +132 -131
- data/spec/wukong/runner_spec.rb +8 -8
- data/spec/wukong/widget/filters_spec.rb +61 -0
- data/spec/wukong/widget/processors_spec.rb +126 -0
- data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
- data/spec/wukong/widget/reducers/count_spec.rb +11 -0
- data/spec/wukong/widget/reducers/group_spec.rb +20 -0
- data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
- data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
- data/spec/wukong/widget/serializers_spec.rb +92 -0
- data/spec/wukong/widget/sink_spec.rb +15 -15
- data/spec/wukong/widget/source_spec.rb +65 -41
- data/spec/wukong/wukong_spec.rb +10 -0
- data/wukong.gemspec +17 -10
- metadata +359 -335
- data/.document +0 -5
- data/VERSION +0 -1
- data/bin/hdp-bin +0 -44
- data/bin/hdp-bzip +0 -23
- data/bin/hdp-cat +0 -3
- data/bin/hdp-catd +0 -3
- data/bin/hdp-cp +0 -3
- data/bin/hdp-du +0 -86
- data/bin/hdp-get +0 -3
- data/bin/hdp-kill +0 -3
- data/bin/hdp-kill-task +0 -3
- data/bin/hdp-ls +0 -11
- data/bin/hdp-mkdir +0 -2
- data/bin/hdp-mkdirp +0 -12
- data/bin/hdp-mv +0 -3
- data/bin/hdp-parts_to_keys.rb +0 -77
- data/bin/hdp-ps +0 -3
- data/bin/hdp-put +0 -3
- data/bin/hdp-rm +0 -32
- data/bin/hdp-sort +0 -40
- data/bin/hdp-stream +0 -40
- data/bin/hdp-stream-flat +0 -22
- data/bin/hdp-stream2 +0 -39
- data/bin/hdp-sync +0 -17
- data/bin/hdp-wc +0 -67
- data/bin/wu-flow +0 -10
- data/bin/wu-map +0 -17
- data/bin/wu-red +0 -17
- data/bin/wukong +0 -17
- data/data/CREDITS.md +0 -355
- data/data/graph/airfares.tsv +0 -2174
- data/data/text/gift_of_the_magi.txt +0 -225
- data/data/text/jabberwocky.txt +0 -36
- data/data/text/rectification_of_names.txt +0 -33
- data/data/twitter/a_atsigns_b.tsv +0 -64
- data/data/twitter/a_follows_b.tsv +0 -53
- data/data/twitter/tweet.tsv +0 -167
- data/data/twitter/twitter_user.tsv +0 -55
- data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
- data/docpages/INSTALL.textile +0 -92
- data/docpages/LICENSE.textile +0 -107
- data/docpages/README-elastic_map_reduce.textile +0 -377
- data/docpages/README-performance.textile +0 -90
- data/docpages/README-wulign.textile +0 -65
- data/docpages/UsingWukong-part1-get_ready.textile +0 -17
- data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
- data/docpages/UsingWukong-part3-parsing.textile +0 -138
- data/docpages/_config.yml +0 -39
- data/docpages/avro/avro_notes.textile +0 -56
- data/docpages/avro/performance.textile +0 -36
- data/docpages/avro/tethering.textile +0 -19
- data/docpages/bigdata-tips.textile +0 -143
- data/docpages/code/api_response_example.txt +0 -20
- data/docpages/code/parser_skeleton.rb +0 -38
- data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
- data/docpages/favicon.ico +0 -0
- data/docpages/gem.css +0 -16
- data/docpages/hadoop-tips.textile +0 -83
- data/docpages/index.textile +0 -92
- data/docpages/intro.textile +0 -8
- data/docpages/moreinfo.textile +0 -174
- data/docpages/news.html +0 -24
- data/docpages/pig/PigLatinExpressionsList.txt +0 -122
- data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
- data/docpages/pig/commandline_params.txt +0 -26
- data/docpages/pig/cookbook.html +0 -481
- data/docpages/pig/images/hadoop-logo.jpg +0 -0
- data/docpages/pig/images/instruction_arrow.png +0 -0
- data/docpages/pig/images/pig-logo.gif +0 -0
- data/docpages/pig/piglatin_ref1.html +0 -1103
- data/docpages/pig/piglatin_ref2.html +0 -14340
- data/docpages/pig/setup.html +0 -505
- data/docpages/pig/skin/basic.css +0 -166
- data/docpages/pig/skin/breadcrumbs.js +0 -237
- data/docpages/pig/skin/fontsize.js +0 -166
- data/docpages/pig/skin/getBlank.js +0 -40
- data/docpages/pig/skin/getMenu.js +0 -45
- data/docpages/pig/skin/images/chapter.gif +0 -0
- data/docpages/pig/skin/images/chapter_open.gif +0 -0
- data/docpages/pig/skin/images/current.gif +0 -0
- data/docpages/pig/skin/images/external-link.gif +0 -0
- data/docpages/pig/skin/images/header_white_line.gif +0 -0
- data/docpages/pig/skin/images/page.gif +0 -0
- data/docpages/pig/skin/images/pdfdoc.gif +0 -0
- data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/print.css +0 -54
- data/docpages/pig/skin/profile.css +0 -181
- data/docpages/pig/skin/screen.css +0 -587
- data/docpages/pig/tutorial.html +0 -1059
- data/docpages/pig/udf.html +0 -1509
- data/docpages/tutorial.textile +0 -283
- data/docpages/usage.textile +0 -195
- data/docpages/wutils.textile +0 -263
- data/examples/dataflow/complex.rb +0 -11
- data/examples/dataflow/donuts.rb +0 -13
- data/examples/tiny_count/jabberwocky_output.tsv +0 -92
- data/examples/word_count.rb +0 -48
- data/examples/workflow/fiddle.rb +0 -24
- data/lib/away/escapement.rb +0 -129
- data/lib/away/exe.rb +0 -11
- data/lib/away/experimental.rb +0 -5
- data/lib/away/from_file.rb +0 -52
- data/lib/away/job.rb +0 -56
- data/lib/away/job/rake_compat.rb +0 -17
- data/lib/away/registry.rb +0 -79
- data/lib/away/runner.rb +0 -276
- data/lib/away/runner/execute.rb +0 -121
- data/lib/away/script.rb +0 -161
- data/lib/away/script/hadoop_command.rb +0 -240
- data/lib/away/source/file_list_source.rb +0 -15
- data/lib/away/source/looper.rb +0 -18
- data/lib/away/task.rb +0 -219
- data/lib/hanuman/action.rb +0 -21
- data/lib/hanuman/chain.rb +0 -4
- data/lib/hanuman/graphviz.rb +0 -74
- data/lib/hanuman/resource.rb +0 -6
- data/lib/hanuman/slot.rb +0 -87
- data/lib/hanuman/slottable.rb +0 -220
- data/lib/wukong/bad_record.rb +0 -15
- data/lib/wukong/event.rb +0 -44
- data/lib/wukong/local_runner.rb +0 -55
- data/lib/wukong/mapred.rb +0 -3
- data/lib/wukong/universe.rb +0 -48
- data/lib/wukong/widget/filter.rb +0 -81
- data/lib/wukong/widget/gibberish.rb +0 -123
- data/lib/wukong/widget/monitor.rb +0 -26
- data/lib/wukong/widget/reducer.rb +0 -66
- data/lib/wukong/widget/stringifier.rb +0 -50
- data/lib/wukong/workflow.rb +0 -22
- data/lib/wukong/workflow/command.rb +0 -42
- data/old/config/emr-example.yaml +0 -48
- data/old/examples/README.txt +0 -17
- data/old/examples/contrib/jeans/README.markdown +0 -165
- data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
- data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
- data/old/examples/contrib/jeans/data/sizes +0 -3
- data/old/examples/contrib/jeans/normalize.rb +0 -20
- data/old/examples/contrib/jeans/sizes.rb +0 -55
- data/old/examples/corpus/bnc_word_freq.rb +0 -44
- data/old/examples/corpus/bucket_counter.rb +0 -47
- data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
- data/old/examples/corpus/sentence_bigrams.rb +0 -53
- data/old/examples/corpus/sentence_coocurrence.rb +0 -66
- data/old/examples/corpus/stopwords.rb +0 -138
- data/old/examples/corpus/words_to_bigrams.rb +0 -53
- data/old/examples/emr/README.textile +0 -110
- data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
- data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
- data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
- data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
- data/old/examples/network_graph/adjacency_list.rb +0 -74
- data/old/examples/network_graph/breadth_first_search.rb +0 -72
- data/old/examples/network_graph/gen_2paths.rb +0 -68
- data/old/examples/network_graph/gen_multi_edge.rb +0 -112
- data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
- data/old/examples/pagerank/README.textile +0 -6
- data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
- data/old/examples/pagerank/pagerank.rb +0 -72
- data/old/examples/pagerank/pagerank_initialize.rb +0 -42
- data/old/examples/pagerank/run_pagerank.sh +0 -21
- data/old/examples/sample_records.rb +0 -33
- data/old/examples/server_logs/apache_log_parser.rb +0 -15
- data/old/examples/server_logs/nook.rb +0 -48
- data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
- data/old/examples/server_logs/user_agent.rb +0 -40
- data/old/examples/simple_word_count.rb +0 -82
- data/old/examples/size.rb +0 -61
- data/old/examples/stats/avg_value_frequency.rb +0 -86
- data/old/examples/stats/binning_percentile_estimator.rb +0 -140
- data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
- data/old/examples/stats/rank_and_bin.rb +0 -173
- data/old/examples/stupidly_simple_filter.rb +0 -40
- data/old/examples/word_count.rb +0 -75
- data/old/graph/graphviz_builder.rb +0 -580
- data/old/graph_easy/Attributes.pm +0 -4181
- data/old/graph_easy/Graphviz.pm +0 -2232
- data/old/wukong.rb +0 -18
- data/old/wukong/and_pig.rb +0 -38
- data/old/wukong/bad_record.rb +0 -18
- data/old/wukong/datatypes.rb +0 -24
- data/old/wukong/datatypes/enum.rb +0 -127
- data/old/wukong/datatypes/fake_types.rb +0 -17
- data/old/wukong/decorator.rb +0 -28
- data/old/wukong/encoding/asciize.rb +0 -108
- data/old/wukong/extensions.rb +0 -16
- data/old/wukong/extensions/array.rb +0 -18
- data/old/wukong/extensions/blank.rb +0 -93
- data/old/wukong/extensions/class.rb +0 -189
- data/old/wukong/extensions/date_time.rb +0 -53
- data/old/wukong/extensions/emittable.rb +0 -69
- data/old/wukong/extensions/enumerable.rb +0 -79
- data/old/wukong/extensions/hash.rb +0 -167
- data/old/wukong/extensions/hash_keys.rb +0 -16
- data/old/wukong/extensions/hash_like.rb +0 -150
- data/old/wukong/extensions/hashlike_class.rb +0 -47
- data/old/wukong/extensions/module.rb +0 -2
- data/old/wukong/extensions/pathname.rb +0 -27
- data/old/wukong/extensions/string.rb +0 -65
- data/old/wukong/extensions/struct.rb +0 -17
- data/old/wukong/extensions/symbol.rb +0 -11
- data/old/wukong/filename_pattern.rb +0 -74
- data/old/wukong/helper.rb +0 -7
- data/old/wukong/helper/stopwords.rb +0 -195
- data/old/wukong/helper/tokenize.rb +0 -35
- data/old/wukong/logger.rb +0 -38
- data/old/wukong/periodic_monitor.rb +0 -72
- data/old/wukong/schema.rb +0 -269
- data/old/wukong/script.rb +0 -286
- data/old/wukong/script/avro_command.rb +0 -5
- data/old/wukong/script/cassandra_loader_script.rb +0 -40
- data/old/wukong/script/emr_command.rb +0 -168
- data/old/wukong/script/hadoop_command.rb +0 -237
- data/old/wukong/script/local_command.rb +0 -41
- data/old/wukong/store.rb +0 -10
- data/old/wukong/store/base.rb +0 -27
- data/old/wukong/store/cassandra.rb +0 -10
- data/old/wukong/store/cassandra/streaming.rb +0 -75
- data/old/wukong/store/cassandra/struct_loader.rb +0 -21
- data/old/wukong/store/cassandra_model.rb +0 -91
- data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
- data/old/wukong/store/chunked_flat_file_store.rb +0 -48
- data/old/wukong/store/conditional_store.rb +0 -57
- data/old/wukong/store/factory.rb +0 -8
- data/old/wukong/store/flat_file_store.rb +0 -89
- data/old/wukong/store/key_store.rb +0 -51
- data/old/wukong/store/null_store.rb +0 -15
- data/old/wukong/store/read_thru_store.rb +0 -22
- data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
- data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
- data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
- data/old/wukong/streamer.rb +0 -30
- data/old/wukong/streamer/accumulating_reducer.rb +0 -83
- data/old/wukong/streamer/base.rb +0 -126
- data/old/wukong/streamer/counting_reducer.rb +0 -25
- data/old/wukong/streamer/filter.rb +0 -20
- data/old/wukong/streamer/instance_streamer.rb +0 -15
- data/old/wukong/streamer/json_streamer.rb +0 -21
- data/old/wukong/streamer/line_streamer.rb +0 -12
- data/old/wukong/streamer/list_reducer.rb +0 -31
- data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
- data/old/wukong/streamer/record_streamer.rb +0 -14
- data/old/wukong/streamer/reducer.rb +0 -11
- data/old/wukong/streamer/set_reducer.rb +0 -14
- data/old/wukong/streamer/struct_streamer.rb +0 -48
- data/old/wukong/streamer/summing_reducer.rb +0 -29
- data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
- data/old/wukong/typed_struct.rb +0 -12
- data/spec/away/encoding_spec.rb +0 -32
- data/spec/away/exe_spec.rb +0 -20
- data/spec/away/flow_spec.rb +0 -82
- data/spec/away/graph_spec.rb +0 -6
- data/spec/away/job_spec.rb +0 -15
- data/spec/away/rake_compat_spec.rb +0 -9
- data/spec/away/script_spec.rb +0 -81
- data/spec/hanuman/graphviz_spec.rb +0 -29
- data/spec/hanuman/slot_spec.rb +0 -2
- data/spec/support/examples_helper.rb +0 -10
- data/spec/support/streamer_test_helpers.rb +0 -6
- data/spec/support/wukong_widget_helpers.rb +0 -66
- data/spec/wukong/processor_spec.rb +0 -109
- data/spec/wukong/widget/filter_spec.rb +0 -99
- data/spec/wukong/widget/stringifier_spec.rb +0 -51
- data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -1,19 +1,19 @@
|
|
1
|
-
require 'spec_helper'
|
1
|
+
# require 'spec_helper'
|
2
2
|
|
3
|
-
describe :sinks, :helpers => true do
|
3
|
+
# describe :sinks, :helpers => true do
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
5
|
+
# describe Wukong::Sink::Stdout do
|
6
|
+
# it 'dumps records to $stdout' do
|
7
|
+
# $stdout.should_receive(:puts).with(mock_record)
|
8
|
+
# subject.process(mock_record)
|
9
|
+
# end
|
10
|
+
# end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
12
|
+
# describe Wukong::Sink::Stderr do
|
13
|
+
# it 'dumps records to $stderr' do
|
14
|
+
# $stderr.should_receive(:puts).with(mock_record)
|
15
|
+
# subject.process(mock_record)
|
16
|
+
# end
|
17
17
|
|
18
|
-
|
19
|
-
end
|
18
|
+
# end
|
19
|
+
# end
|
@@ -1,41 +1,65 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
require 'wukong/widget/gibberish'
|
3
|
-
|
4
|
-
describe :sources, :helpers => true do
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
1
|
+
# require 'spec_helper'
|
2
|
+
# require 'wukong/widget/gibberish'
|
3
|
+
|
4
|
+
# describe :sources, :helpers => true do
|
5
|
+
|
6
|
+
# describe Wukong::Source::Iter do
|
7
|
+
# subject{ described_class.new(obj: (9 .. 14), owner: test_dataflow) }
|
8
|
+
# it 'iterates over a given collection' do
|
9
|
+
# subject.to_enum.to_a.should == [9, 10, 11, 12, 13, 14]
|
10
|
+
# end
|
11
|
+
# context 'dataflow method' do
|
12
|
+
# it 'simplified args' do
|
13
|
+
# test_dataflow.iter(9 .. 14).should == subject
|
14
|
+
# end
|
15
|
+
# end
|
16
|
+
# end
|
17
|
+
|
18
|
+
# # describe Wukong::Source::FileSource do
|
19
|
+
# # let(:example_filename){ Pathname.path_to(:data, 'text/jabberwocky.txt') }
|
20
|
+
# # subject{ described_class.receive(filename: example_filename, owner: test_dataflow) }
|
21
|
+
# # before{ subject.setup }
|
22
|
+
# # it 'iterates over a given collection' do
|
23
|
+
# # subject.to_enum.to_a[6, 4].should == ["`Twas brillig, and the slithy toves", " Did gyre and gimble in the wabe:", "All mimsy were the borogoves,", " And the mome raths outgrabe.",]
|
24
|
+
# # end
|
25
|
+
# # context 'dataflow method' do
|
26
|
+
# # it 'simplified args' do
|
27
|
+
# # test_dataflow.file_source(example_filename).should == subject
|
28
|
+
# # end
|
29
|
+
# # end
|
30
|
+
# # end
|
31
|
+
|
32
|
+
# describe Wukong::Source::Integers do
|
33
|
+
# subject{ described_class.receive(qty: 10, owner: test_dataflow) }
|
34
|
+
# before{ subject.setup }
|
35
|
+
|
36
|
+
# it 'generates integers up to the given limit' do
|
37
|
+
# subject.to_enum.to_a.should == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
38
|
+
# end
|
39
|
+
# it 'generates nothing if the initial range is void' do
|
40
|
+
# subject.qty = 0
|
41
|
+
# subject.to_enum.to_a.should == []
|
42
|
+
# end
|
43
|
+
# it 'generates one thing if the min and max are equal' do
|
44
|
+
# subject.qty = 1
|
45
|
+
# subject.to_enum.to_a.should == [0]
|
46
|
+
# end
|
47
|
+
|
48
|
+
# context 'dataflow method' do
|
49
|
+
# it 'takes simplified args' do
|
50
|
+
# test_dataflow.integers(10).should == subject
|
51
|
+
# end
|
52
|
+
# end
|
53
|
+
# end
|
54
|
+
|
55
|
+
# describe Wukong::Widget::Gibberish do
|
56
|
+
# subject{ described_class.receive(:qty => 4) }
|
57
|
+
# before{ subject.setup }
|
58
|
+
|
59
|
+
# it 'generates integers up to the given limit' do
|
60
|
+
# subject.rng = Random.new(8675309)
|
61
|
+
# subject.to_enum.to_a.should == ["loaiaeiaeo neidgfo heeume sptfmeec naet sttptlm waaaioh detov elrrltv nii ulcsnn", "set ensr poeleaa seqi tmnreoee boooral oczncgp deaia rykcoao leo rim mmibpbfii", "artrru sto quuu doo peoehrile nto esl tia gaili tuiooey lkedotp sts kaiy weeeia", "crhi tyiiae mieubmbooa teeae roi ednz taieh zaloy syhe ret kuoa deeeo xittipl mo"]
|
62
|
+
# end
|
63
|
+
|
64
|
+
# end
|
65
|
+
# end
|
data/wukong.gemspec
CHANGED
@@ -2,12 +2,15 @@
|
|
2
2
|
require File.expand_path('../lib/wukong/version', __FILE__)
|
3
3
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
|
-
gem.name
|
6
|
-
gem.
|
7
|
-
gem.
|
8
|
-
gem.
|
9
|
-
gem.
|
10
|
-
gem.
|
5
|
+
gem.name = 'wukong'
|
6
|
+
gem.homepage = 'https://github.com/infochimps-labs/wukong'
|
7
|
+
gem.licenses = ["Apache 2.0"]
|
8
|
+
gem.email = 'coders@infochimps.org'
|
9
|
+
gem.authors = ['Infochimps', 'Philip (flip) Kromer', 'Travis Dempsey']
|
10
|
+
gem.version = Wukong::VERSION
|
11
|
+
|
12
|
+
gem.summary = 'Hadoop Streaming for Ruby. Wukong makes Hadoop so easy a chimpanzee can use it, yet handles terabyte-scale computation with ease.'
|
13
|
+
gem.description = <<-EOF
|
11
14
|
Treat your dataset like a:
|
12
15
|
|
13
16
|
* stream of lines when it's efficient to process by lines
|
@@ -15,21 +18,23 @@ Gem::Specification.new do |gem|
|
|
15
18
|
* stream of lightweight objects when it's efficient to deal with objects
|
16
19
|
|
17
20
|
Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.
|
18
|
-
|
21
|
+
EOF
|
19
22
|
|
20
|
-
gem.files = `git ls-files`.split("\n")
|
21
|
-
gem.executables = []
|
23
|
+
gem.files = `git ls-files`.split("\n").reject { |path| path =~ /^(data|docpages|notes|old)/ }
|
24
|
+
gem.executables = ['wu-local']
|
22
25
|
gem.test_files = gem.files.grep(/^spec/)
|
23
26
|
gem.require_paths = ['lib']
|
24
27
|
|
25
|
-
gem.add_dependency('bundler', '~> 1.1')
|
26
28
|
gem.add_dependency('configliere', '~> 0.4')
|
27
29
|
gem.add_dependency('multi_json', '>= 1.3.6')
|
28
30
|
gem.add_dependency('vayacondios-client', '>= 0.0.3')
|
29
31
|
gem.add_dependency('gorillib', '>= 0.4.2')
|
30
32
|
gem.add_dependency('forgery')
|
31
33
|
gem.add_dependency('uuidtools')
|
34
|
+
gem.add_dependency('eventmachine')
|
35
|
+
gem.add_dependency('log4r')
|
32
36
|
|
37
|
+
gem.add_development_dependency('bundler', '~> 1.1')
|
33
38
|
gem.add_development_dependency('rake', '>= 0.9')
|
34
39
|
gem.add_development_dependency('rspec', '>= 2.8')
|
35
40
|
gem.add_development_dependency('guard', '>= 1.0')
|
@@ -38,5 +43,7 @@ DESC
|
|
38
43
|
gem.add_development_dependency('pry')
|
39
44
|
gem.add_development_dependency('yard')
|
40
45
|
gem.add_development_dependency('redcarpet')
|
46
|
+
gem.add_development_dependency('addressable')
|
47
|
+
gem.add_development_dependency('htmlentities')
|
41
48
|
|
42
49
|
end
|
metadata
CHANGED
@@ -1,42 +1,37 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.0.
|
4
|
+
version: 3.0.0.pre2
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
|
+
- Infochimps
|
8
9
|
- Philip (flip) Kromer
|
9
10
|
- Travis Dempsey
|
10
11
|
autorequire:
|
11
12
|
bindir: bin
|
12
13
|
cert_chain: []
|
13
|
-
date: 2012-
|
14
|
+
date: 2012-12-01 00:00:00.000000000 Z
|
14
15
|
dependencies:
|
15
16
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
17
|
-
requirement:
|
17
|
+
name: configliere
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
18
19
|
none: false
|
19
20
|
requirements:
|
20
21
|
- - ~>
|
21
22
|
- !ruby/object:Gem::Version
|
22
|
-
version: '
|
23
|
+
version: '0.4'
|
23
24
|
type: :runtime
|
24
25
|
prerelease: false
|
25
|
-
version_requirements:
|
26
|
-
- !ruby/object:Gem::Dependency
|
27
|
-
name: configliere
|
28
|
-
requirement: &2162964020 !ruby/object:Gem::Requirement
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
29
27
|
none: false
|
30
28
|
requirements:
|
31
29
|
- - ~>
|
32
30
|
- !ruby/object:Gem::Version
|
33
31
|
version: '0.4'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: *2162964020
|
37
32
|
- !ruby/object:Gem::Dependency
|
38
33
|
name: multi_json
|
39
|
-
requirement:
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
40
35
|
none: false
|
41
36
|
requirements:
|
42
37
|
- - ! '>='
|
@@ -44,10 +39,15 @@ dependencies:
|
|
44
39
|
version: 1.3.6
|
45
40
|
type: :runtime
|
46
41
|
prerelease: false
|
47
|
-
version_requirements:
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.3.6
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: vayacondios-client
|
50
|
-
requirement:
|
50
|
+
requirement: !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ! '>='
|
@@ -55,10 +55,15 @@ dependencies:
|
|
55
55
|
version: 0.0.3
|
56
56
|
type: :runtime
|
57
57
|
prerelease: false
|
58
|
-
version_requirements:
|
58
|
+
version_requirements: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 0.0.3
|
59
64
|
- !ruby/object:Gem::Dependency
|
60
65
|
name: gorillib
|
61
|
-
requirement:
|
66
|
+
requirement: !ruby/object:Gem::Requirement
|
62
67
|
none: false
|
63
68
|
requirements:
|
64
69
|
- - ! '>='
|
@@ -66,10 +71,15 @@ dependencies:
|
|
66
71
|
version: 0.4.2
|
67
72
|
type: :runtime
|
68
73
|
prerelease: false
|
69
|
-
version_requirements:
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 0.4.2
|
70
80
|
- !ruby/object:Gem::Dependency
|
71
81
|
name: forgery
|
72
|
-
requirement:
|
82
|
+
requirement: !ruby/object:Gem::Requirement
|
73
83
|
none: false
|
74
84
|
requirements:
|
75
85
|
- - ! '>='
|
@@ -77,10 +87,31 @@ dependencies:
|
|
77
87
|
version: '0'
|
78
88
|
type: :runtime
|
79
89
|
prerelease: false
|
80
|
-
version_requirements:
|
90
|
+
version_requirements: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
92
|
+
requirements:
|
93
|
+
- - ! '>='
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
81
96
|
- !ruby/object:Gem::Dependency
|
82
97
|
name: uuidtools
|
83
|
-
requirement:
|
98
|
+
requirement: !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ! '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ! '>='
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
112
|
+
- !ruby/object:Gem::Dependency
|
113
|
+
name: eventmachine
|
114
|
+
requirement: !ruby/object:Gem::Requirement
|
84
115
|
none: false
|
85
116
|
requirements:
|
86
117
|
- - ! '>='
|
@@ -88,10 +119,47 @@ dependencies:
|
|
88
119
|
version: '0'
|
89
120
|
type: :runtime
|
90
121
|
prerelease: false
|
91
|
-
version_requirements:
|
122
|
+
version_requirements: !ruby/object:Gem::Requirement
|
123
|
+
none: false
|
124
|
+
requirements:
|
125
|
+
- - ! '>='
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
version: '0'
|
128
|
+
- !ruby/object:Gem::Dependency
|
129
|
+
name: log4r
|
130
|
+
requirement: !ruby/object:Gem::Requirement
|
131
|
+
none: false
|
132
|
+
requirements:
|
133
|
+
- - ! '>='
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: '0'
|
136
|
+
type: :runtime
|
137
|
+
prerelease: false
|
138
|
+
version_requirements: !ruby/object:Gem::Requirement
|
139
|
+
none: false
|
140
|
+
requirements:
|
141
|
+
- - ! '>='
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: '0'
|
144
|
+
- !ruby/object:Gem::Dependency
|
145
|
+
name: bundler
|
146
|
+
requirement: !ruby/object:Gem::Requirement
|
147
|
+
none: false
|
148
|
+
requirements:
|
149
|
+
- - ~>
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: '1.1'
|
152
|
+
type: :development
|
153
|
+
prerelease: false
|
154
|
+
version_requirements: !ruby/object:Gem::Requirement
|
155
|
+
none: false
|
156
|
+
requirements:
|
157
|
+
- - ~>
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '1.1'
|
92
160
|
- !ruby/object:Gem::Dependency
|
93
161
|
name: rake
|
94
|
-
requirement:
|
162
|
+
requirement: !ruby/object:Gem::Requirement
|
95
163
|
none: false
|
96
164
|
requirements:
|
97
165
|
- - ! '>='
|
@@ -99,10 +167,15 @@ dependencies:
|
|
99
167
|
version: '0.9'
|
100
168
|
type: :development
|
101
169
|
prerelease: false
|
102
|
-
version_requirements:
|
170
|
+
version_requirements: !ruby/object:Gem::Requirement
|
171
|
+
none: false
|
172
|
+
requirements:
|
173
|
+
- - ! '>='
|
174
|
+
- !ruby/object:Gem::Version
|
175
|
+
version: '0.9'
|
103
176
|
- !ruby/object:Gem::Dependency
|
104
177
|
name: rspec
|
105
|
-
requirement:
|
178
|
+
requirement: !ruby/object:Gem::Requirement
|
106
179
|
none: false
|
107
180
|
requirements:
|
108
181
|
- - ! '>='
|
@@ -110,10 +183,15 @@ dependencies:
|
|
110
183
|
version: '2.8'
|
111
184
|
type: :development
|
112
185
|
prerelease: false
|
113
|
-
version_requirements:
|
186
|
+
version_requirements: !ruby/object:Gem::Requirement
|
187
|
+
none: false
|
188
|
+
requirements:
|
189
|
+
- - ! '>='
|
190
|
+
- !ruby/object:Gem::Version
|
191
|
+
version: '2.8'
|
114
192
|
- !ruby/object:Gem::Dependency
|
115
193
|
name: guard
|
116
|
-
requirement:
|
194
|
+
requirement: !ruby/object:Gem::Requirement
|
117
195
|
none: false
|
118
196
|
requirements:
|
119
197
|
- - ! '>='
|
@@ -121,10 +199,15 @@ dependencies:
|
|
121
199
|
version: '1.0'
|
122
200
|
type: :development
|
123
201
|
prerelease: false
|
124
|
-
version_requirements:
|
202
|
+
version_requirements: !ruby/object:Gem::Requirement
|
203
|
+
none: false
|
204
|
+
requirements:
|
205
|
+
- - ! '>='
|
206
|
+
- !ruby/object:Gem::Version
|
207
|
+
version: '1.0'
|
125
208
|
- !ruby/object:Gem::Dependency
|
126
209
|
name: guard-rspec
|
127
|
-
requirement:
|
210
|
+
requirement: !ruby/object:Gem::Requirement
|
128
211
|
none: false
|
129
212
|
requirements:
|
130
213
|
- - ! '>='
|
@@ -132,10 +215,15 @@ dependencies:
|
|
132
215
|
version: '0.6'
|
133
216
|
type: :development
|
134
217
|
prerelease: false
|
135
|
-
version_requirements:
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
none: false
|
220
|
+
requirements:
|
221
|
+
- - ! '>='
|
222
|
+
- !ruby/object:Gem::Version
|
223
|
+
version: '0.6'
|
136
224
|
- !ruby/object:Gem::Dependency
|
137
225
|
name: simplecov
|
138
|
-
requirement:
|
226
|
+
requirement: !ruby/object:Gem::Requirement
|
139
227
|
none: false
|
140
228
|
requirements:
|
141
229
|
- - ! '>='
|
@@ -143,10 +231,15 @@ dependencies:
|
|
143
231
|
version: '0.5'
|
144
232
|
type: :development
|
145
233
|
prerelease: false
|
146
|
-
version_requirements:
|
234
|
+
version_requirements: !ruby/object:Gem::Requirement
|
235
|
+
none: false
|
236
|
+
requirements:
|
237
|
+
- - ! '>='
|
238
|
+
- !ruby/object:Gem::Version
|
239
|
+
version: '0.5'
|
147
240
|
- !ruby/object:Gem::Dependency
|
148
241
|
name: pry
|
149
|
-
requirement:
|
242
|
+
requirement: !ruby/object:Gem::Requirement
|
150
243
|
none: false
|
151
244
|
requirements:
|
152
245
|
- - ! '>='
|
@@ -154,10 +247,15 @@ dependencies:
|
|
154
247
|
version: '0'
|
155
248
|
type: :development
|
156
249
|
prerelease: false
|
157
|
-
version_requirements:
|
250
|
+
version_requirements: !ruby/object:Gem::Requirement
|
251
|
+
none: false
|
252
|
+
requirements:
|
253
|
+
- - ! '>='
|
254
|
+
- !ruby/object:Gem::Version
|
255
|
+
version: '0'
|
158
256
|
- !ruby/object:Gem::Dependency
|
159
257
|
name: yard
|
160
|
-
requirement:
|
258
|
+
requirement: !ruby/object:Gem::Requirement
|
161
259
|
none: false
|
162
260
|
requirements:
|
163
261
|
- - ! '>='
|
@@ -165,10 +263,47 @@ dependencies:
|
|
165
263
|
version: '0'
|
166
264
|
type: :development
|
167
265
|
prerelease: false
|
168
|
-
version_requirements:
|
266
|
+
version_requirements: !ruby/object:Gem::Requirement
|
267
|
+
none: false
|
268
|
+
requirements:
|
269
|
+
- - ! '>='
|
270
|
+
- !ruby/object:Gem::Version
|
271
|
+
version: '0'
|
169
272
|
- !ruby/object:Gem::Dependency
|
170
273
|
name: redcarpet
|
171
|
-
requirement:
|
274
|
+
requirement: !ruby/object:Gem::Requirement
|
275
|
+
none: false
|
276
|
+
requirements:
|
277
|
+
- - ! '>='
|
278
|
+
- !ruby/object:Gem::Version
|
279
|
+
version: '0'
|
280
|
+
type: :development
|
281
|
+
prerelease: false
|
282
|
+
version_requirements: !ruby/object:Gem::Requirement
|
283
|
+
none: false
|
284
|
+
requirements:
|
285
|
+
- - ! '>='
|
286
|
+
- !ruby/object:Gem::Version
|
287
|
+
version: '0'
|
288
|
+
- !ruby/object:Gem::Dependency
|
289
|
+
name: addressable
|
290
|
+
requirement: !ruby/object:Gem::Requirement
|
291
|
+
none: false
|
292
|
+
requirements:
|
293
|
+
- - ! '>='
|
294
|
+
- !ruby/object:Gem::Version
|
295
|
+
version: '0'
|
296
|
+
type: :development
|
297
|
+
prerelease: false
|
298
|
+
version_requirements: !ruby/object:Gem::Requirement
|
299
|
+
none: false
|
300
|
+
requirements:
|
301
|
+
- - ! '>='
|
302
|
+
- !ruby/object:Gem::Version
|
303
|
+
version: '0'
|
304
|
+
- !ruby/object:Gem::Dependency
|
305
|
+
name: htmlentities
|
306
|
+
requirement: !ruby/object:Gem::Requirement
|
172
307
|
none: false
|
173
308
|
requirements:
|
174
309
|
- - ! '>='
|
@@ -176,18 +311,23 @@ dependencies:
|
|
176
311
|
version: '0'
|
177
312
|
type: :development
|
178
313
|
prerelease: false
|
179
|
-
version_requirements:
|
314
|
+
version_requirements: !ruby/object:Gem::Requirement
|
315
|
+
none: false
|
316
|
+
requirements:
|
317
|
+
- - ! '>='
|
318
|
+
- !ruby/object:Gem::Version
|
319
|
+
version: '0'
|
180
320
|
description: ! " Treat your dataset like a:\n\n * stream of lines when it's
|
181
321
|
efficient to process by lines\n * stream of field arrays when it's efficient
|
182
322
|
to deal directly with fields\n * stream of lightweight objects when it's efficient
|
183
323
|
to deal with objects\n\n Wukong is friends with Hadoop the elephant, Pig the query
|
184
324
|
language, and the cat on your command line.\n"
|
185
|
-
email:
|
186
|
-
executables:
|
325
|
+
email: coders@infochimps.org
|
326
|
+
executables:
|
327
|
+
- wu-local
|
187
328
|
extensions: []
|
188
329
|
extra_rdoc_files: []
|
189
330
|
files:
|
190
|
-
- .document
|
191
331
|
- .gitignore
|
192
332
|
- .gitmodules
|
193
333
|
- .rspec
|
@@ -196,142 +336,137 @@ files:
|
|
196
336
|
- CHANGELOG.md
|
197
337
|
- Gemfile
|
198
338
|
- Guardfile
|
199
|
-
- LICENSE.
|
339
|
+
- LICENSE.md
|
200
340
|
- NOTES-travis.md
|
341
|
+
- README-old.md
|
201
342
|
- README.md
|
202
343
|
- Rakefile
|
203
344
|
- TODO.md
|
204
|
-
- VERSION
|
205
345
|
- bin/cutc
|
206
346
|
- bin/cuttab
|
207
347
|
- bin/greptrue
|
208
|
-
- bin/hdp-bin
|
209
|
-
- bin/hdp-bzip
|
210
|
-
- bin/hdp-cat
|
211
|
-
- bin/hdp-catd
|
212
|
-
- bin/hdp-cp
|
213
|
-
- bin/hdp-du
|
214
|
-
- bin/hdp-get
|
215
|
-
- bin/hdp-kill
|
216
|
-
- bin/hdp-kill-task
|
217
|
-
- bin/hdp-ls
|
218
|
-
- bin/hdp-mkdir
|
219
|
-
- bin/hdp-mkdirp
|
220
|
-
- bin/hdp-mv
|
221
|
-
- bin/hdp-parts_to_keys.rb
|
222
|
-
- bin/hdp-ps
|
223
|
-
- bin/hdp-put
|
224
|
-
- bin/hdp-rm
|
225
|
-
- bin/hdp-sort
|
226
|
-
- bin/hdp-stream
|
227
|
-
- bin/hdp-stream-flat
|
228
|
-
- bin/hdp-stream2
|
229
|
-
- bin/hdp-sync
|
230
|
-
- bin/hdp-wc
|
231
348
|
- bin/md5sort
|
232
349
|
- bin/setcat
|
233
350
|
- bin/tabchar
|
234
351
|
- bin/uniq-ord
|
235
352
|
- bin/uniqc
|
353
|
+
- bin/wu-clean-encoding
|
236
354
|
- bin/wu-date
|
237
355
|
- bin/wu-datetime
|
238
|
-
- bin/wu-flow
|
239
356
|
- bin/wu-hist
|
240
357
|
- bin/wu-lign
|
241
|
-
- bin/wu-
|
358
|
+
- bin/wu-local
|
242
359
|
- bin/wu-plus
|
243
|
-
- bin/wu-
|
360
|
+
- bin/wu-server
|
244
361
|
- bin/wu-sum
|
245
|
-
-
|
246
|
-
-
|
247
|
-
- data/graph/airfares.tsv
|
248
|
-
- data/log/sample_apache_log.log
|
249
|
-
- data/text/gift_of_the_magi.txt
|
250
|
-
- data/text/jabberwocky.txt
|
251
|
-
- data/text/rectification_of_names.txt
|
252
|
-
- data/twitter/a_atsigns_b.tsv
|
253
|
-
- data/twitter/a_follows_b.tsv
|
254
|
-
- data/twitter/tweet.tsv
|
255
|
-
- data/twitter/twitter_user.tsv
|
256
|
-
- data/wikipedia/dbpedia-sentences.tsv
|
257
|
-
- docpages/INSTALL.textile
|
258
|
-
- docpages/LICENSE.textile
|
259
|
-
- docpages/README-elastic_map_reduce.textile
|
260
|
-
- docpages/README-performance.textile
|
261
|
-
- docpages/README-wulign.textile
|
262
|
-
- docpages/UsingWukong-part1-get_ready.textile
|
263
|
-
- docpages/UsingWukong-part2-ThinkingBigData.textile
|
264
|
-
- docpages/UsingWukong-part3-parsing.textile
|
265
|
-
- docpages/_config.yml
|
266
|
-
- docpages/avro/avro_notes.textile
|
267
|
-
- docpages/avro/performance.textile
|
268
|
-
- docpages/avro/tethering.textile
|
269
|
-
- docpages/bigdata-tips.textile
|
270
|
-
- docpages/code/api_response_example.txt
|
271
|
-
- docpages/code/parser_skeleton.rb
|
272
|
-
- docpages/diagrams/MapReduceDiagram.graffle
|
273
|
-
- docpages/favicon.ico
|
274
|
-
- docpages/gem.css
|
275
|
-
- docpages/hadoop-tips.textile
|
276
|
-
- docpages/index.textile
|
277
|
-
- docpages/intro.textile
|
278
|
-
- docpages/moreinfo.textile
|
279
|
-
- docpages/news.html
|
280
|
-
- docpages/pig/PigLatinExpressionsList.txt
|
281
|
-
- docpages/pig/PigLatinReferenceManual.txt
|
282
|
-
- docpages/pig/commandline_params.txt
|
283
|
-
- docpages/pig/cookbook.html
|
284
|
-
- docpages/pig/images/hadoop-logo.jpg
|
285
|
-
- docpages/pig/images/instruction_arrow.png
|
286
|
-
- docpages/pig/images/pig-logo.gif
|
287
|
-
- docpages/pig/piglatin_ref1.html
|
288
|
-
- docpages/pig/piglatin_ref2.html
|
289
|
-
- docpages/pig/setup.html
|
290
|
-
- docpages/pig/skin/basic.css
|
291
|
-
- docpages/pig/skin/breadcrumbs.js
|
292
|
-
- docpages/pig/skin/fontsize.js
|
293
|
-
- docpages/pig/skin/getBlank.js
|
294
|
-
- docpages/pig/skin/getMenu.js
|
295
|
-
- docpages/pig/skin/images/chapter.gif
|
296
|
-
- docpages/pig/skin/images/chapter_open.gif
|
297
|
-
- docpages/pig/skin/images/current.gif
|
298
|
-
- docpages/pig/skin/images/external-link.gif
|
299
|
-
- docpages/pig/skin/images/header_white_line.gif
|
300
|
-
- docpages/pig/skin/images/page.gif
|
301
|
-
- docpages/pig/skin/images/pdfdoc.gif
|
302
|
-
- docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png
|
303
|
-
- docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png
|
304
|
-
- docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png
|
305
|
-
- docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png
|
306
|
-
- docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png
|
307
|
-
- docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png
|
308
|
-
- docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png
|
309
|
-
- docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png
|
310
|
-
- docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png
|
311
|
-
- docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png
|
312
|
-
- docpages/pig/skin/print.css
|
313
|
-
- docpages/pig/skin/profile.css
|
314
|
-
- docpages/pig/skin/screen.css
|
315
|
-
- docpages/pig/tutorial.html
|
316
|
-
- docpages/pig/udf.html
|
317
|
-
- docpages/tutorial.textile
|
318
|
-
- docpages/usage.textile
|
319
|
-
- docpages/wutils.textile
|
320
|
-
- examples/dataflow.rb
|
362
|
+
- examples/Gemfile
|
363
|
+
- examples/README.md
|
321
364
|
- examples/dataflow/apache_log_line.rb
|
322
|
-
- examples/dataflow/
|
323
|
-
- examples/dataflow/donuts.rb
|
365
|
+
- examples/dataflow/fibonacci_series.rb
|
324
366
|
- examples/dataflow/parse_apache_logs.rb
|
325
367
|
- examples/dataflow/pig_latinizer.rb
|
368
|
+
- examples/dataflow/scraper_macro_flow.rb
|
326
369
|
- examples/dataflow/simple.rb
|
327
370
|
- examples/dataflow/telegram.rb
|
328
371
|
- examples/examples_helper.rb
|
372
|
+
- examples/geo.rb
|
373
|
+
- examples/geo/geo_grids.numbers
|
374
|
+
- examples/geo/geolocated.rb
|
375
|
+
- examples/geo/quadtile.rb
|
376
|
+
- examples/geo/spec/geolocated_spec.rb
|
377
|
+
- examples/geo/tile_fetcher.rb
|
329
378
|
- examples/graph/minimum_spanning_tree.rb
|
330
379
|
- examples/graph/union_find.rb
|
380
|
+
- examples/jabberwocky.txt
|
381
|
+
- examples/models/wikipedia.rb
|
382
|
+
- examples/munging/Gemfile
|
383
|
+
- examples/munging/airline_flights/airline.rb
|
384
|
+
- examples/munging/airline_flights/airline_flights.rake
|
385
|
+
- examples/munging/airline_flights/airplane.rb
|
386
|
+
- examples/munging/airline_flights/airport.rb
|
387
|
+
- examples/munging/airline_flights/airport_id_unification.rb
|
388
|
+
- examples/munging/airline_flights/airport_ok_chars.rb
|
389
|
+
- examples/munging/airline_flights/flight.rb
|
390
|
+
- examples/munging/airline_flights/models.rb
|
391
|
+
- examples/munging/airline_flights/parse.rb
|
392
|
+
- examples/munging/airline_flights/reconcile_airports.rb
|
393
|
+
- examples/munging/airline_flights/route.rb
|
394
|
+
- examples/munging/airline_flights/tasks.rake
|
395
|
+
- examples/munging/airline_flights/timezone_fixup.rb
|
396
|
+
- examples/munging/airline_flights/topcities.rb
|
397
|
+
- examples/munging/airports/40_wbans.txt
|
398
|
+
- examples/munging/airports/filter_weather_reports.rb
|
399
|
+
- examples/munging/airports/join.pig
|
400
|
+
- examples/munging/airports/to_tsv.rb
|
401
|
+
- examples/munging/airports/usa_wbans.pig
|
402
|
+
- examples/munging/airports/usa_wbans.txt
|
403
|
+
- examples/munging/airports/wbans.pig
|
404
|
+
- examples/munging/airports/wbans.txt
|
405
|
+
- examples/munging/geo/geo_json.rb
|
406
|
+
- examples/munging/geo/geo_models.rb
|
407
|
+
- examples/munging/geo/geonames_models.rb
|
408
|
+
- examples/munging/geo/iso_codes.rb
|
409
|
+
- examples/munging/geo/reconcile_countries.rb
|
410
|
+
- examples/munging/geo/tasks.rake
|
411
|
+
- examples/munging/rake_helper.rb
|
412
|
+
- examples/munging/weather/.gitignore
|
413
|
+
- examples/munging/weather/Gemfile
|
414
|
+
- examples/munging/weather/Rakefile
|
415
|
+
- examples/munging/weather/extract_ish.rb
|
416
|
+
- examples/munging/weather/models/weather.rb
|
417
|
+
- examples/munging/weather/utils/noaa_downloader.rb
|
418
|
+
- examples/munging/wikipedia/README.md
|
419
|
+
- examples/munging/wikipedia/Rakefile
|
420
|
+
- examples/munging/wikipedia/articles/extract_articles-parsed.rb
|
421
|
+
- examples/munging/wikipedia/articles/extract_articles-templated.rb
|
422
|
+
- examples/munging/wikipedia/articles/textualize_articles.rb
|
423
|
+
- examples/munging/wikipedia/articles/verify_structure.rb
|
424
|
+
- examples/munging/wikipedia/articles/wp2txt-LICENSE.txt
|
425
|
+
- examples/munging/wikipedia/articles/wp2txt_article.rb
|
426
|
+
- examples/munging/wikipedia/articles/wp2txt_utils.rb
|
427
|
+
- examples/munging/wikipedia/dbpedia/dbpedia_common.rb
|
428
|
+
- examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb
|
429
|
+
- examples/munging/wikipedia/dbpedia/extract_links.rb
|
430
|
+
- examples/munging/wikipedia/dbpedia/sameas_extractor.rb
|
431
|
+
- examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig
|
432
|
+
- examples/munging/wikipedia/page_metadata/extract_page_metadata.rb
|
433
|
+
- examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old
|
434
|
+
- examples/munging/wikipedia/pagelinks/augment_pagelinks.pig
|
435
|
+
- examples/munging/wikipedia/pagelinks/extract_pagelinks.rb
|
436
|
+
- examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old
|
437
|
+
- examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig
|
438
|
+
- examples/munging/wikipedia/pageviews/augment_pageviews.pig
|
439
|
+
- examples/munging/wikipedia/pageviews/extract_pageviews.rb
|
440
|
+
- examples/munging/wikipedia/pig_style_guide.md
|
441
|
+
- examples/munging/wikipedia/redirects/redirects_page_metadata.pig
|
442
|
+
- examples/munging/wikipedia/subuniverse/sub_articles.pig
|
443
|
+
- examples/munging/wikipedia/subuniverse/sub_page_metadata.pig
|
444
|
+
- examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig
|
445
|
+
- examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig
|
446
|
+
- examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig
|
447
|
+
- examples/munging/wikipedia/subuniverse/sub_pageviews.pig
|
448
|
+
- examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig
|
449
|
+
- examples/munging/wikipedia/utils/get_namespaces.rb
|
450
|
+
- examples/munging/wikipedia/utils/munging_utils.rb
|
451
|
+
- examples/munging/wikipedia/utils/namespaces.json
|
452
|
+
- examples/rake_helper.rb
|
453
|
+
- examples/server_logs/geo_ip_mapping/munge_geolite.rb
|
454
|
+
- examples/server_logs/logline.rb
|
455
|
+
- examples/server_logs/models.rb
|
456
|
+
- examples/server_logs/page_counts.pig
|
457
|
+
- examples/server_logs/server_logs-01-parse-script.rb
|
458
|
+
- examples/server_logs/server_logs-02-histograms-full.rb
|
459
|
+
- examples/server_logs/server_logs-02-histograms-mapper.rb
|
460
|
+
- examples/server_logs/server_logs-03-breadcrumbs-full.rb
|
461
|
+
- examples/server_logs/server_logs-04-page_page_edges-full.rb
|
462
|
+
- examples/string_reverser.rb
|
331
463
|
- examples/text/latinize_text.rb
|
332
464
|
- examples/text/pig_latin.rb
|
465
|
+
- examples/text/regional_flavor/README.md
|
466
|
+
- examples/text/regional_flavor/article_wordbags.pig
|
467
|
+
- examples/text/regional_flavor/j01-article_wordbags.rb
|
468
|
+
- examples/text/regional_flavor/simple_pig_script.pig
|
333
469
|
- examples/tiny_count.rb
|
334
|
-
- examples/tiny_count/jabberwocky_output.tsv
|
335
470
|
- examples/twitter/locations.rb
|
336
471
|
- examples/twitter/models.rb
|
337
472
|
- examples/twitter/pt1-fiddle.pig
|
@@ -341,211 +476,99 @@ files:
|
|
341
476
|
- examples/twitter/pt4-strong_links.rb
|
342
477
|
- examples/twitter/pt5-lnglat_and_strong_links.pig
|
343
478
|
- examples/twitter/states.tsv
|
344
|
-
- examples/word_count.rb
|
479
|
+
- examples/word_count/accumulator.rb
|
480
|
+
- examples/word_count/tokenizer.rb
|
481
|
+
- examples/word_count/word_count.rb
|
482
|
+
- examples/workflow/cherry_pie.dot
|
345
483
|
- examples/workflow/cherry_pie.md
|
484
|
+
- examples/workflow/cherry_pie.png
|
346
485
|
- examples/workflow/cherry_pie.rb
|
347
|
-
- examples/workflow/fiddle.rb
|
348
486
|
- examples/workflow/package_gem.rb
|
349
|
-
- lib/away/escapement.rb
|
350
|
-
- lib/away/exe.rb
|
351
|
-
- lib/away/experimental.rb
|
352
|
-
- lib/away/from_file.rb
|
353
|
-
- lib/away/job.rb
|
354
|
-
- lib/away/job/rake_compat.rb
|
355
|
-
- lib/away/registry.rb
|
356
|
-
- lib/away/runner.rb
|
357
|
-
- lib/away/runner/execute.rb
|
358
|
-
- lib/away/script.rb
|
359
|
-
- lib/away/script/hadoop_command.rb
|
360
|
-
- lib/away/source/file_list_source.rb
|
361
|
-
- lib/away/source/looper.rb
|
362
|
-
- lib/away/task.rb
|
363
487
|
- lib/hanuman.rb
|
364
|
-
- lib/hanuman/action.rb
|
365
|
-
- lib/hanuman/chain.rb
|
366
488
|
- lib/hanuman/graph.rb
|
367
|
-
- lib/hanuman/graphviz.rb
|
368
489
|
- lib/hanuman/graphvizzer.rb
|
369
|
-
- lib/hanuman/
|
370
|
-
- lib/hanuman/
|
371
|
-
- lib/hanuman/
|
490
|
+
- lib/hanuman/graphvizzer/gv_models.rb
|
491
|
+
- lib/hanuman/graphvizzer/gv_presenter.rb
|
492
|
+
- lib/hanuman/link.rb
|
493
|
+
- lib/hanuman/registry.rb
|
372
494
|
- lib/hanuman/stage.rb
|
373
495
|
- lib/wukong.rb
|
374
|
-
- lib/wukong/
|
496
|
+
- lib/wukong/boot.rb
|
497
|
+
- lib/wukong/configuration.rb
|
375
498
|
- lib/wukong/dataflow.rb
|
376
|
-
- lib/wukong/
|
377
|
-
- lib/wukong/
|
378
|
-
- lib/wukong/mapred.rb
|
499
|
+
- lib/wukong/driver.rb
|
500
|
+
- lib/wukong/emitter.rb
|
379
501
|
- lib/wukong/model/faker.rb
|
502
|
+
- lib/wukong/model/flatpack_parser/flat.rb
|
503
|
+
- lib/wukong/model/flatpack_parser/flatpack.rb
|
504
|
+
- lib/wukong/model/flatpack_parser/lang.rb
|
505
|
+
- lib/wukong/model/flatpack_parser/parser.rb
|
506
|
+
- lib/wukong/model/flatpack_parser/tokens.rb
|
380
507
|
- lib/wukong/processor.rb
|
381
|
-
- lib/wukong/
|
382
|
-
- lib/wukong/
|
508
|
+
- lib/wukong/spec_helpers.rb
|
509
|
+
- lib/wukong/spec_helpers/integration_driver.rb
|
510
|
+
- lib/wukong/spec_helpers/integration_driver_matchers.rb
|
511
|
+
- lib/wukong/spec_helpers/processor_helpers.rb
|
512
|
+
- lib/wukong/spec_helpers/processor_methods.rb
|
513
|
+
- lib/wukong/spec_helpers/shared_examples.rb
|
514
|
+
- lib/wukong/spec_helpers/spec_driver.rb
|
515
|
+
- lib/wukong/spec_helpers/spec_driver_matchers.rb
|
383
516
|
- lib/wukong/version.rb
|
384
|
-
- lib/wukong/widget/
|
385
|
-
- lib/wukong/widget/
|
386
|
-
- lib/wukong/widget/
|
387
|
-
- lib/wukong/widget/
|
517
|
+
- lib/wukong/widget/filters.rb
|
518
|
+
- lib/wukong/widget/processors.rb
|
519
|
+
- lib/wukong/widget/reducers.rb
|
520
|
+
- lib/wukong/widget/reducers/accumulator.rb
|
521
|
+
- lib/wukong/widget/reducers/bin.rb
|
522
|
+
- lib/wukong/widget/reducers/count.rb
|
523
|
+
- lib/wukong/widget/reducers/group.rb
|
524
|
+
- lib/wukong/widget/reducers/group_concat.rb
|
525
|
+
- lib/wukong/widget/reducers/moments.rb
|
526
|
+
- lib/wukong/widget/reducers/sort.rb
|
527
|
+
- lib/wukong/widget/serializers.rb
|
388
528
|
- lib/wukong/widget/sink.rb
|
389
529
|
- lib/wukong/widget/source.rb
|
390
|
-
- lib/wukong/widget/
|
391
|
-
- lib/wukong/
|
392
|
-
-
|
393
|
-
- old/config/emr-example.yaml
|
394
|
-
- old/examples/README.txt
|
395
|
-
- old/examples/contrib/jeans/README.markdown
|
396
|
-
- old/examples/contrib/jeans/data/normalized_sizes
|
397
|
-
- old/examples/contrib/jeans/data/orders.tsv
|
398
|
-
- old/examples/contrib/jeans/data/sizes
|
399
|
-
- old/examples/contrib/jeans/normalize.rb
|
400
|
-
- old/examples/contrib/jeans/sizes.rb
|
401
|
-
- old/examples/corpus/bnc_word_freq.rb
|
402
|
-
- old/examples/corpus/bucket_counter.rb
|
403
|
-
- old/examples/corpus/dbpedia_abstract_to_sentences.rb
|
404
|
-
- old/examples/corpus/sentence_bigrams.rb
|
405
|
-
- old/examples/corpus/sentence_coocurrence.rb
|
406
|
-
- old/examples/corpus/stopwords.rb
|
407
|
-
- old/examples/corpus/words_to_bigrams.rb
|
408
|
-
- old/examples/emr/README.textile
|
409
|
-
- old/examples/emr/dot_wukong_dir/credentials.json
|
410
|
-
- old/examples/emr/dot_wukong_dir/emr.yaml
|
411
|
-
- old/examples/emr/dot_wukong_dir/emr_bootstrap.sh
|
412
|
-
- old/examples/emr/elastic_mapreduce_example.rb
|
413
|
-
- old/examples/network_graph/adjacency_list.rb
|
414
|
-
- old/examples/network_graph/breadth_first_search.rb
|
415
|
-
- old/examples/network_graph/gen_2paths.rb
|
416
|
-
- old/examples/network_graph/gen_multi_edge.rb
|
417
|
-
- old/examples/network_graph/gen_symmetric_links.rb
|
418
|
-
- old/examples/pagerank/README.textile
|
419
|
-
- old/examples/pagerank/gen_initial_pagerank_graph.pig
|
420
|
-
- old/examples/pagerank/pagerank.rb
|
421
|
-
- old/examples/pagerank/pagerank_initialize.rb
|
422
|
-
- old/examples/pagerank/run_pagerank.sh
|
423
|
-
- old/examples/sample_records.rb
|
424
|
-
- old/examples/server_logs/apache_log_parser.rb
|
425
|
-
- old/examples/server_logs/breadcrumbs.rb
|
426
|
-
- old/examples/server_logs/nook.rb
|
427
|
-
- old/examples/server_logs/nook/faraday_dummy_adapter.rb
|
428
|
-
- old/examples/server_logs/user_agent.rb
|
429
|
-
- old/examples/simple_word_count.rb
|
430
|
-
- old/examples/size.rb
|
431
|
-
- old/examples/stats/avg_value_frequency.rb
|
432
|
-
- old/examples/stats/binning_percentile_estimator.rb
|
433
|
-
- old/examples/stats/data/avg_value_frequency.tsv
|
434
|
-
- old/examples/stats/rank_and_bin.rb
|
435
|
-
- old/examples/stupidly_simple_filter.rb
|
436
|
-
- old/examples/word_count.rb
|
437
|
-
- old/graph/graphviz_builder.rb
|
438
|
-
- old/graph_easy/Attributes.pm
|
439
|
-
- old/graph_easy/Graphviz.pm
|
440
|
-
- old/wukong.rb
|
441
|
-
- old/wukong/and_pig.rb
|
442
|
-
- old/wukong/bad_record.rb
|
443
|
-
- old/wukong/datatypes.rb
|
444
|
-
- old/wukong/datatypes/enum.rb
|
445
|
-
- old/wukong/datatypes/fake_types.rb
|
446
|
-
- old/wukong/decorator.rb
|
447
|
-
- old/wukong/encoding/asciize.rb
|
448
|
-
- old/wukong/extensions.rb
|
449
|
-
- old/wukong/extensions/array.rb
|
450
|
-
- old/wukong/extensions/blank.rb
|
451
|
-
- old/wukong/extensions/class.rb
|
452
|
-
- old/wukong/extensions/date_time.rb
|
453
|
-
- old/wukong/extensions/emittable.rb
|
454
|
-
- old/wukong/extensions/enumerable.rb
|
455
|
-
- old/wukong/extensions/hash.rb
|
456
|
-
- old/wukong/extensions/hash_keys.rb
|
457
|
-
- old/wukong/extensions/hash_like.rb
|
458
|
-
- old/wukong/extensions/hashlike_class.rb
|
459
|
-
- old/wukong/extensions/module.rb
|
460
|
-
- old/wukong/extensions/pathname.rb
|
461
|
-
- old/wukong/extensions/string.rb
|
462
|
-
- old/wukong/extensions/struct.rb
|
463
|
-
- old/wukong/extensions/symbol.rb
|
464
|
-
- old/wukong/filename_pattern.rb
|
465
|
-
- old/wukong/helper.rb
|
466
|
-
- old/wukong/helper/stopwords.rb
|
467
|
-
- old/wukong/helper/tokenize.rb
|
468
|
-
- old/wukong/logger.rb
|
469
|
-
- old/wukong/periodic_monitor.rb
|
470
|
-
- old/wukong/schema.rb
|
471
|
-
- old/wukong/script.rb
|
472
|
-
- old/wukong/script/avro_command.rb
|
473
|
-
- old/wukong/script/cassandra_loader_script.rb
|
474
|
-
- old/wukong/script/emr_command.rb
|
475
|
-
- old/wukong/script/hadoop_command.rb
|
476
|
-
- old/wukong/script/local_command.rb
|
477
|
-
- old/wukong/store.rb
|
478
|
-
- old/wukong/store/base.rb
|
479
|
-
- old/wukong/store/cassandra.rb
|
480
|
-
- old/wukong/store/cassandra/streaming.rb
|
481
|
-
- old/wukong/store/cassandra/struct_loader.rb
|
482
|
-
- old/wukong/store/cassandra_model.rb
|
483
|
-
- old/wukong/store/chh_chunked_flat_file_store.rb
|
484
|
-
- old/wukong/store/chunked_flat_file_store.rb
|
485
|
-
- old/wukong/store/conditional_store.rb
|
486
|
-
- old/wukong/store/factory.rb
|
487
|
-
- old/wukong/store/flat_file_store.rb
|
488
|
-
- old/wukong/store/key_store.rb
|
489
|
-
- old/wukong/store/null_store.rb
|
490
|
-
- old/wukong/store/read_thru_store.rb
|
491
|
-
- old/wukong/store/tokyo_tdb_key_store.rb
|
492
|
-
- old/wukong/store/tyrant_rdb_key_store.rb
|
493
|
-
- old/wukong/store/tyrant_tdb_key_store.rb
|
494
|
-
- old/wukong/streamer.rb
|
495
|
-
- old/wukong/streamer/accumulating_reducer.rb
|
496
|
-
- old/wukong/streamer/base.rb
|
497
|
-
- old/wukong/streamer/counting_reducer.rb
|
498
|
-
- old/wukong/streamer/filter.rb
|
499
|
-
- old/wukong/streamer/instance_streamer.rb
|
500
|
-
- old/wukong/streamer/json_streamer.rb
|
501
|
-
- old/wukong/streamer/line_streamer.rb
|
502
|
-
- old/wukong/streamer/list_reducer.rb
|
503
|
-
- old/wukong/streamer/rank_and_bin_reducer.rb
|
504
|
-
- old/wukong/streamer/record_streamer.rb
|
505
|
-
- old/wukong/streamer/reducer.rb
|
506
|
-
- old/wukong/streamer/set_reducer.rb
|
507
|
-
- old/wukong/streamer/struct_streamer.rb
|
508
|
-
- old/wukong/streamer/summing_reducer.rb
|
509
|
-
- old/wukong/streamer/uniq_by_last_reducer.rb
|
510
|
-
- old/wukong/typed_struct.rb
|
511
|
-
- spec/away/encoding_spec.rb
|
512
|
-
- spec/away/exe_spec.rb
|
513
|
-
- spec/away/flow_spec.rb
|
514
|
-
- spec/away/graph_spec.rb
|
515
|
-
- spec/away/job_spec.rb
|
516
|
-
- spec/away/rake_compat_spec.rb
|
517
|
-
- spec/away/script_spec.rb
|
530
|
+
- lib/wukong/widget/utils.rb
|
531
|
+
- lib/wukong/widgets.rb
|
532
|
+
- spec/examples/dataflow/fibonacci_series_spec.rb
|
518
533
|
- spec/examples/dataflow/parse_apache_logs_spec.rb
|
519
534
|
- spec/examples/dataflow/parsing_spec.rb
|
520
535
|
- spec/examples/dataflow/simple_spec.rb
|
521
536
|
- spec/examples/dataflow/telegram_spec.rb
|
522
537
|
- spec/examples/graph/minimum_spanning_tree_spec.rb
|
538
|
+
- spec/examples/munging/airline_flights/identifiers_spec.rb
|
539
|
+
- spec/examples/munging/airline_flights_spec.rb
|
523
540
|
- spec/examples/text/pig_latin_spec.rb
|
524
541
|
- spec/examples/workflow/cherry_pie_spec.rb
|
525
542
|
- spec/hanuman/graph_spec.rb
|
526
|
-
- spec/hanuman/
|
527
|
-
- spec/hanuman/
|
543
|
+
- spec/hanuman/hanuman_spec.rb
|
544
|
+
- spec/hanuman/registry_spec.rb
|
528
545
|
- spec/hanuman/stage_spec.rb
|
529
546
|
- spec/spec.opts
|
530
547
|
- spec/spec_helper.rb
|
531
|
-
- spec/support/examples_helper.rb
|
532
548
|
- spec/support/hanuman_test_helpers.rb
|
533
|
-
- spec/support/
|
549
|
+
- spec/support/shared_context_for_reducers.rb
|
550
|
+
- spec/support/shared_examples_for_builders.rb
|
551
|
+
- spec/support/shared_examples_for_shortcuts.rb
|
534
552
|
- spec/support/wukong_test_helpers.rb
|
535
|
-
- spec/support/wukong_widget_helpers.rb
|
536
553
|
- spec/wukong/dataflow_spec.rb
|
537
554
|
- spec/wukong/local_runner_spec.rb
|
538
555
|
- spec/wukong/model/faker_spec.rb
|
539
|
-
- spec/wukong/processor_spec.rb
|
540
556
|
- spec/wukong/runner_spec.rb
|
541
|
-
- spec/wukong/widget/
|
557
|
+
- spec/wukong/widget/filters_spec.rb
|
558
|
+
- spec/wukong/widget/processors_spec.rb
|
559
|
+
- spec/wukong/widget/reducers/bin_spec.rb
|
560
|
+
- spec/wukong/widget/reducers/count_spec.rb
|
561
|
+
- spec/wukong/widget/reducers/group_spec.rb
|
562
|
+
- spec/wukong/widget/reducers/moments_spec.rb
|
563
|
+
- spec/wukong/widget/reducers/sort_spec.rb
|
564
|
+
- spec/wukong/widget/serializers_spec.rb
|
542
565
|
- spec/wukong/widget/sink_spec.rb
|
543
566
|
- spec/wukong/widget/source_spec.rb
|
544
|
-
- spec/wukong/
|
545
|
-
- spec/wukong/workflow/command_spec.rb
|
567
|
+
- spec/wukong/wukong_spec.rb
|
546
568
|
- wukong.gemspec
|
547
569
|
homepage: https://github.com/infochimps-labs/wukong
|
548
|
-
licenses:
|
570
|
+
licenses:
|
571
|
+
- Apache 2.0
|
549
572
|
post_install_message:
|
550
573
|
rdoc_options: []
|
551
574
|
require_paths:
|
@@ -564,45 +587,46 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
564
587
|
version: 1.3.1
|
565
588
|
requirements: []
|
566
589
|
rubyforge_project:
|
567
|
-
rubygems_version: 1.8.
|
590
|
+
rubygems_version: 1.8.23
|
568
591
|
signing_key:
|
569
592
|
specification_version: 3
|
570
593
|
summary: Hadoop Streaming for Ruby. Wukong makes Hadoop so easy a chimpanzee can use
|
571
594
|
it, yet handles terabyte-scale computation with ease.
|
572
595
|
test_files:
|
573
|
-
- spec/
|
574
|
-
- spec/away/exe_spec.rb
|
575
|
-
- spec/away/flow_spec.rb
|
576
|
-
- spec/away/graph_spec.rb
|
577
|
-
- spec/away/job_spec.rb
|
578
|
-
- spec/away/rake_compat_spec.rb
|
579
|
-
- spec/away/script_spec.rb
|
596
|
+
- spec/examples/dataflow/fibonacci_series_spec.rb
|
580
597
|
- spec/examples/dataflow/parse_apache_logs_spec.rb
|
581
598
|
- spec/examples/dataflow/parsing_spec.rb
|
582
599
|
- spec/examples/dataflow/simple_spec.rb
|
583
600
|
- spec/examples/dataflow/telegram_spec.rb
|
584
601
|
- spec/examples/graph/minimum_spanning_tree_spec.rb
|
602
|
+
- spec/examples/munging/airline_flights/identifiers_spec.rb
|
603
|
+
- spec/examples/munging/airline_flights_spec.rb
|
585
604
|
- spec/examples/text/pig_latin_spec.rb
|
586
605
|
- spec/examples/workflow/cherry_pie_spec.rb
|
587
606
|
- spec/hanuman/graph_spec.rb
|
588
|
-
- spec/hanuman/
|
589
|
-
- spec/hanuman/
|
607
|
+
- spec/hanuman/hanuman_spec.rb
|
608
|
+
- spec/hanuman/registry_spec.rb
|
590
609
|
- spec/hanuman/stage_spec.rb
|
591
610
|
- spec/spec.opts
|
592
611
|
- spec/spec_helper.rb
|
593
|
-
- spec/support/examples_helper.rb
|
594
612
|
- spec/support/hanuman_test_helpers.rb
|
595
|
-
- spec/support/
|
613
|
+
- spec/support/shared_context_for_reducers.rb
|
614
|
+
- spec/support/shared_examples_for_builders.rb
|
615
|
+
- spec/support/shared_examples_for_shortcuts.rb
|
596
616
|
- spec/support/wukong_test_helpers.rb
|
597
|
-
- spec/support/wukong_widget_helpers.rb
|
598
617
|
- spec/wukong/dataflow_spec.rb
|
599
618
|
- spec/wukong/local_runner_spec.rb
|
600
619
|
- spec/wukong/model/faker_spec.rb
|
601
|
-
- spec/wukong/processor_spec.rb
|
602
620
|
- spec/wukong/runner_spec.rb
|
603
|
-
- spec/wukong/widget/
|
621
|
+
- spec/wukong/widget/filters_spec.rb
|
622
|
+
- spec/wukong/widget/processors_spec.rb
|
623
|
+
- spec/wukong/widget/reducers/bin_spec.rb
|
624
|
+
- spec/wukong/widget/reducers/count_spec.rb
|
625
|
+
- spec/wukong/widget/reducers/group_spec.rb
|
626
|
+
- spec/wukong/widget/reducers/moments_spec.rb
|
627
|
+
- spec/wukong/widget/reducers/sort_spec.rb
|
628
|
+
- spec/wukong/widget/serializers_spec.rb
|
604
629
|
- spec/wukong/widget/sink_spec.rb
|
605
630
|
- spec/wukong/widget/source_spec.rb
|
606
|
-
- spec/wukong/
|
607
|
-
- spec/wukong/workflow/command_spec.rb
|
631
|
+
- spec/wukong/wukong_spec.rb
|
608
632
|
has_rdoc:
|