wukong 3.0.0.pre → 3.0.0.pre2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +46 -33
- data/.gitmodules +3 -0
- data/.rspec +1 -1
- data/.travis.yml +8 -1
- data/.yardopts +0 -13
- data/Guardfile +4 -6
- data/{LICENSE.textile → LICENSE.md} +43 -55
- data/README-old.md +422 -0
- data/README.md +279 -418
- data/Rakefile +21 -5
- data/TODO.md +6 -6
- data/bin/wu-clean-encoding +31 -0
- data/bin/wu-lign +2 -2
- data/bin/wu-local +69 -0
- data/bin/wu-server +70 -0
- data/examples/Gemfile +38 -0
- data/examples/README.md +9 -0
- data/examples/dataflow/apache_log_line.rb +64 -25
- data/examples/dataflow/fibonacci_series.rb +101 -0
- data/examples/dataflow/parse_apache_logs.rb +37 -7
- data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
- data/examples/dataflow/simple.rb +4 -4
- data/examples/geo.rb +4 -0
- data/examples/geo/geo_grids.numbers +0 -0
- data/examples/geo/geolocated.rb +331 -0
- data/examples/geo/quadtile.rb +69 -0
- data/examples/geo/spec/geolocated_spec.rb +247 -0
- data/examples/geo/tile_fetcher.rb +77 -0
- data/examples/graph/minimum_spanning_tree.rb +61 -61
- data/examples/jabberwocky.txt +36 -0
- data/examples/models/wikipedia.rb +20 -0
- data/examples/munging/Gemfile +8 -0
- data/examples/munging/airline_flights/airline.rb +57 -0
- data/examples/munging/airline_flights/airline_flights.rake +83 -0
- data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
- data/examples/munging/airline_flights/airport.rb +211 -0
- data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
- data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
- data/examples/munging/airline_flights/flight.rb +156 -0
- data/examples/munging/airline_flights/models.rb +4 -0
- data/examples/munging/airline_flights/parse.rb +26 -0
- data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
- data/examples/munging/airline_flights/route.rb +35 -0
- data/examples/munging/airline_flights/tasks.rake +83 -0
- data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
- data/examples/munging/airline_flights/topcities.rb +167 -0
- data/examples/munging/airports/40_wbans.txt +40 -0
- data/examples/munging/airports/filter_weather_reports.rb +37 -0
- data/examples/munging/airports/join.pig +31 -0
- data/examples/munging/airports/to_tsv.rb +33 -0
- data/examples/munging/airports/usa_wbans.pig +19 -0
- data/examples/munging/airports/usa_wbans.txt +2157 -0
- data/examples/munging/airports/wbans.pig +19 -0
- data/examples/munging/airports/wbans.txt +2310 -0
- data/examples/munging/geo/geo_json.rb +54 -0
- data/examples/munging/geo/geo_models.rb +69 -0
- data/examples/munging/geo/geonames_models.rb +78 -0
- data/examples/munging/geo/iso_codes.rb +172 -0
- data/examples/munging/geo/reconcile_countries.rb +124 -0
- data/examples/munging/geo/tasks.rake +71 -0
- data/examples/munging/rake_helper.rb +62 -0
- data/examples/munging/weather/.gitignore +1 -0
- data/examples/munging/weather/Gemfile +4 -0
- data/examples/munging/weather/Rakefile +28 -0
- data/examples/munging/weather/extract_ish.rb +13 -0
- data/examples/munging/weather/models/weather.rb +119 -0
- data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
- data/examples/munging/wikipedia/README.md +34 -0
- data/examples/munging/wikipedia/Rakefile +193 -0
- data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
- data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
- data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
- data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
- data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
- data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
- data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
- data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
- data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
- data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
- data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
- data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
- data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
- data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
- data/examples/munging/wikipedia/pig_style_guide.md +25 -0
- data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
- data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
- data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
- data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
- data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
- data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
- data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
- data/examples/munging/wikipedia/utils/namespaces.json +1 -0
- data/examples/rake_helper.rb +85 -0
- data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
- data/examples/server_logs/logline.rb +95 -0
- data/examples/server_logs/models.rb +66 -0
- data/examples/server_logs/page_counts.pig +48 -0
- data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
- data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
- data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
- data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
- data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
- data/examples/string_reverser.rb +26 -0
- data/examples/text/pig_latin.rb +2 -2
- data/examples/text/regional_flavor/README.md +14 -0
- data/examples/text/regional_flavor/article_wordbags.pig +39 -0
- data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
- data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
- data/examples/word_count/accumulator.rb +26 -0
- data/examples/word_count/tokenizer.rb +13 -0
- data/examples/word_count/word_count.rb +6 -0
- data/examples/workflow/cherry_pie.dot +97 -0
- data/examples/workflow/cherry_pie.png +0 -0
- data/examples/workflow/cherry_pie.rb +61 -26
- data/lib/hanuman.rb +34 -7
- data/lib/hanuman/graph.rb +55 -31
- data/lib/hanuman/graphvizzer.rb +199 -178
- data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
- data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
- data/lib/hanuman/link.rb +35 -0
- data/lib/hanuman/registry.rb +46 -0
- data/lib/hanuman/stage.rb +76 -32
- data/lib/wukong.rb +23 -24
- data/lib/wukong/boot.rb +87 -0
- data/lib/wukong/configuration.rb +8 -0
- data/lib/wukong/dataflow.rb +45 -78
- data/lib/wukong/driver.rb +99 -0
- data/lib/wukong/emitter.rb +22 -0
- data/lib/wukong/model/faker.rb +24 -24
- data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
- data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
- data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
- data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
- data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
- data/lib/wukong/processor.rb +60 -114
- data/lib/wukong/spec_helpers.rb +81 -0
- data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
- data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
- data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
- data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
- data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
- data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
- data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
- data/lib/wukong/version.rb +2 -1
- data/lib/wukong/widget/filters.rb +311 -0
- data/lib/wukong/widget/processors.rb +156 -0
- data/lib/wukong/widget/reducers.rb +7 -0
- data/lib/wukong/widget/reducers/accumulator.rb +73 -0
- data/lib/wukong/widget/reducers/bin.rb +318 -0
- data/lib/wukong/widget/reducers/count.rb +61 -0
- data/lib/wukong/widget/reducers/group.rb +85 -0
- data/lib/wukong/widget/reducers/group_concat.rb +70 -0
- data/lib/wukong/widget/reducers/moments.rb +72 -0
- data/lib/wukong/widget/reducers/sort.rb +130 -0
- data/lib/wukong/widget/serializers.rb +287 -0
- data/lib/wukong/widget/sink.rb +10 -52
- data/lib/wukong/widget/source.rb +7 -113
- data/lib/wukong/widget/utils.rb +46 -0
- data/lib/wukong/widgets.rb +6 -0
- data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
- data/spec/examples/dataflow/parsing_spec.rb +12 -11
- data/spec/examples/dataflow/simple_spec.rb +32 -6
- data/spec/examples/dataflow/telegram_spec.rb +36 -36
- data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
- data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
- data/spec/examples/munging/airline_flights_spec.rb +202 -0
- data/spec/examples/text/pig_latin_spec.rb +13 -16
- data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
- data/spec/hanuman/graph_spec.rb +27 -2
- data/spec/hanuman/hanuman_spec.rb +10 -0
- data/spec/hanuman/registry_spec.rb +123 -0
- data/spec/hanuman/stage_spec.rb +61 -7
- data/spec/spec_helper.rb +29 -19
- data/spec/support/hanuman_test_helpers.rb +14 -12
- data/spec/support/shared_context_for_reducers.rb +37 -0
- data/spec/support/shared_examples_for_builders.rb +101 -0
- data/spec/support/shared_examples_for_shortcuts.rb +57 -0
- data/spec/support/wukong_test_helpers.rb +37 -11
- data/spec/wukong/dataflow_spec.rb +77 -55
- data/spec/wukong/local_runner_spec.rb +24 -24
- data/spec/wukong/model/faker_spec.rb +132 -131
- data/spec/wukong/runner_spec.rb +8 -8
- data/spec/wukong/widget/filters_spec.rb +61 -0
- data/spec/wukong/widget/processors_spec.rb +126 -0
- data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
- data/spec/wukong/widget/reducers/count_spec.rb +11 -0
- data/spec/wukong/widget/reducers/group_spec.rb +20 -0
- data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
- data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
- data/spec/wukong/widget/serializers_spec.rb +92 -0
- data/spec/wukong/widget/sink_spec.rb +15 -15
- data/spec/wukong/widget/source_spec.rb +65 -41
- data/spec/wukong/wukong_spec.rb +10 -0
- data/wukong.gemspec +17 -10
- metadata +359 -335
- data/.document +0 -5
- data/VERSION +0 -1
- data/bin/hdp-bin +0 -44
- data/bin/hdp-bzip +0 -23
- data/bin/hdp-cat +0 -3
- data/bin/hdp-catd +0 -3
- data/bin/hdp-cp +0 -3
- data/bin/hdp-du +0 -86
- data/bin/hdp-get +0 -3
- data/bin/hdp-kill +0 -3
- data/bin/hdp-kill-task +0 -3
- data/bin/hdp-ls +0 -11
- data/bin/hdp-mkdir +0 -2
- data/bin/hdp-mkdirp +0 -12
- data/bin/hdp-mv +0 -3
- data/bin/hdp-parts_to_keys.rb +0 -77
- data/bin/hdp-ps +0 -3
- data/bin/hdp-put +0 -3
- data/bin/hdp-rm +0 -32
- data/bin/hdp-sort +0 -40
- data/bin/hdp-stream +0 -40
- data/bin/hdp-stream-flat +0 -22
- data/bin/hdp-stream2 +0 -39
- data/bin/hdp-sync +0 -17
- data/bin/hdp-wc +0 -67
- data/bin/wu-flow +0 -10
- data/bin/wu-map +0 -17
- data/bin/wu-red +0 -17
- data/bin/wukong +0 -17
- data/data/CREDITS.md +0 -355
- data/data/graph/airfares.tsv +0 -2174
- data/data/text/gift_of_the_magi.txt +0 -225
- data/data/text/jabberwocky.txt +0 -36
- data/data/text/rectification_of_names.txt +0 -33
- data/data/twitter/a_atsigns_b.tsv +0 -64
- data/data/twitter/a_follows_b.tsv +0 -53
- data/data/twitter/tweet.tsv +0 -167
- data/data/twitter/twitter_user.tsv +0 -55
- data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
- data/docpages/INSTALL.textile +0 -92
- data/docpages/LICENSE.textile +0 -107
- data/docpages/README-elastic_map_reduce.textile +0 -377
- data/docpages/README-performance.textile +0 -90
- data/docpages/README-wulign.textile +0 -65
- data/docpages/UsingWukong-part1-get_ready.textile +0 -17
- data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
- data/docpages/UsingWukong-part3-parsing.textile +0 -138
- data/docpages/_config.yml +0 -39
- data/docpages/avro/avro_notes.textile +0 -56
- data/docpages/avro/performance.textile +0 -36
- data/docpages/avro/tethering.textile +0 -19
- data/docpages/bigdata-tips.textile +0 -143
- data/docpages/code/api_response_example.txt +0 -20
- data/docpages/code/parser_skeleton.rb +0 -38
- data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
- data/docpages/favicon.ico +0 -0
- data/docpages/gem.css +0 -16
- data/docpages/hadoop-tips.textile +0 -83
- data/docpages/index.textile +0 -92
- data/docpages/intro.textile +0 -8
- data/docpages/moreinfo.textile +0 -174
- data/docpages/news.html +0 -24
- data/docpages/pig/PigLatinExpressionsList.txt +0 -122
- data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
- data/docpages/pig/commandline_params.txt +0 -26
- data/docpages/pig/cookbook.html +0 -481
- data/docpages/pig/images/hadoop-logo.jpg +0 -0
- data/docpages/pig/images/instruction_arrow.png +0 -0
- data/docpages/pig/images/pig-logo.gif +0 -0
- data/docpages/pig/piglatin_ref1.html +0 -1103
- data/docpages/pig/piglatin_ref2.html +0 -14340
- data/docpages/pig/setup.html +0 -505
- data/docpages/pig/skin/basic.css +0 -166
- data/docpages/pig/skin/breadcrumbs.js +0 -237
- data/docpages/pig/skin/fontsize.js +0 -166
- data/docpages/pig/skin/getBlank.js +0 -40
- data/docpages/pig/skin/getMenu.js +0 -45
- data/docpages/pig/skin/images/chapter.gif +0 -0
- data/docpages/pig/skin/images/chapter_open.gif +0 -0
- data/docpages/pig/skin/images/current.gif +0 -0
- data/docpages/pig/skin/images/external-link.gif +0 -0
- data/docpages/pig/skin/images/header_white_line.gif +0 -0
- data/docpages/pig/skin/images/page.gif +0 -0
- data/docpages/pig/skin/images/pdfdoc.gif +0 -0
- data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/print.css +0 -54
- data/docpages/pig/skin/profile.css +0 -181
- data/docpages/pig/skin/screen.css +0 -587
- data/docpages/pig/tutorial.html +0 -1059
- data/docpages/pig/udf.html +0 -1509
- data/docpages/tutorial.textile +0 -283
- data/docpages/usage.textile +0 -195
- data/docpages/wutils.textile +0 -263
- data/examples/dataflow/complex.rb +0 -11
- data/examples/dataflow/donuts.rb +0 -13
- data/examples/tiny_count/jabberwocky_output.tsv +0 -92
- data/examples/word_count.rb +0 -48
- data/examples/workflow/fiddle.rb +0 -24
- data/lib/away/escapement.rb +0 -129
- data/lib/away/exe.rb +0 -11
- data/lib/away/experimental.rb +0 -5
- data/lib/away/from_file.rb +0 -52
- data/lib/away/job.rb +0 -56
- data/lib/away/job/rake_compat.rb +0 -17
- data/lib/away/registry.rb +0 -79
- data/lib/away/runner.rb +0 -276
- data/lib/away/runner/execute.rb +0 -121
- data/lib/away/script.rb +0 -161
- data/lib/away/script/hadoop_command.rb +0 -240
- data/lib/away/source/file_list_source.rb +0 -15
- data/lib/away/source/looper.rb +0 -18
- data/lib/away/task.rb +0 -219
- data/lib/hanuman/action.rb +0 -21
- data/lib/hanuman/chain.rb +0 -4
- data/lib/hanuman/graphviz.rb +0 -74
- data/lib/hanuman/resource.rb +0 -6
- data/lib/hanuman/slot.rb +0 -87
- data/lib/hanuman/slottable.rb +0 -220
- data/lib/wukong/bad_record.rb +0 -15
- data/lib/wukong/event.rb +0 -44
- data/lib/wukong/local_runner.rb +0 -55
- data/lib/wukong/mapred.rb +0 -3
- data/lib/wukong/universe.rb +0 -48
- data/lib/wukong/widget/filter.rb +0 -81
- data/lib/wukong/widget/gibberish.rb +0 -123
- data/lib/wukong/widget/monitor.rb +0 -26
- data/lib/wukong/widget/reducer.rb +0 -66
- data/lib/wukong/widget/stringifier.rb +0 -50
- data/lib/wukong/workflow.rb +0 -22
- data/lib/wukong/workflow/command.rb +0 -42
- data/old/config/emr-example.yaml +0 -48
- data/old/examples/README.txt +0 -17
- data/old/examples/contrib/jeans/README.markdown +0 -165
- data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
- data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
- data/old/examples/contrib/jeans/data/sizes +0 -3
- data/old/examples/contrib/jeans/normalize.rb +0 -20
- data/old/examples/contrib/jeans/sizes.rb +0 -55
- data/old/examples/corpus/bnc_word_freq.rb +0 -44
- data/old/examples/corpus/bucket_counter.rb +0 -47
- data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
- data/old/examples/corpus/sentence_bigrams.rb +0 -53
- data/old/examples/corpus/sentence_coocurrence.rb +0 -66
- data/old/examples/corpus/stopwords.rb +0 -138
- data/old/examples/corpus/words_to_bigrams.rb +0 -53
- data/old/examples/emr/README.textile +0 -110
- data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
- data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
- data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
- data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
- data/old/examples/network_graph/adjacency_list.rb +0 -74
- data/old/examples/network_graph/breadth_first_search.rb +0 -72
- data/old/examples/network_graph/gen_2paths.rb +0 -68
- data/old/examples/network_graph/gen_multi_edge.rb +0 -112
- data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
- data/old/examples/pagerank/README.textile +0 -6
- data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
- data/old/examples/pagerank/pagerank.rb +0 -72
- data/old/examples/pagerank/pagerank_initialize.rb +0 -42
- data/old/examples/pagerank/run_pagerank.sh +0 -21
- data/old/examples/sample_records.rb +0 -33
- data/old/examples/server_logs/apache_log_parser.rb +0 -15
- data/old/examples/server_logs/nook.rb +0 -48
- data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
- data/old/examples/server_logs/user_agent.rb +0 -40
- data/old/examples/simple_word_count.rb +0 -82
- data/old/examples/size.rb +0 -61
- data/old/examples/stats/avg_value_frequency.rb +0 -86
- data/old/examples/stats/binning_percentile_estimator.rb +0 -140
- data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
- data/old/examples/stats/rank_and_bin.rb +0 -173
- data/old/examples/stupidly_simple_filter.rb +0 -40
- data/old/examples/word_count.rb +0 -75
- data/old/graph/graphviz_builder.rb +0 -580
- data/old/graph_easy/Attributes.pm +0 -4181
- data/old/graph_easy/Graphviz.pm +0 -2232
- data/old/wukong.rb +0 -18
- data/old/wukong/and_pig.rb +0 -38
- data/old/wukong/bad_record.rb +0 -18
- data/old/wukong/datatypes.rb +0 -24
- data/old/wukong/datatypes/enum.rb +0 -127
- data/old/wukong/datatypes/fake_types.rb +0 -17
- data/old/wukong/decorator.rb +0 -28
- data/old/wukong/encoding/asciize.rb +0 -108
- data/old/wukong/extensions.rb +0 -16
- data/old/wukong/extensions/array.rb +0 -18
- data/old/wukong/extensions/blank.rb +0 -93
- data/old/wukong/extensions/class.rb +0 -189
- data/old/wukong/extensions/date_time.rb +0 -53
- data/old/wukong/extensions/emittable.rb +0 -69
- data/old/wukong/extensions/enumerable.rb +0 -79
- data/old/wukong/extensions/hash.rb +0 -167
- data/old/wukong/extensions/hash_keys.rb +0 -16
- data/old/wukong/extensions/hash_like.rb +0 -150
- data/old/wukong/extensions/hashlike_class.rb +0 -47
- data/old/wukong/extensions/module.rb +0 -2
- data/old/wukong/extensions/pathname.rb +0 -27
- data/old/wukong/extensions/string.rb +0 -65
- data/old/wukong/extensions/struct.rb +0 -17
- data/old/wukong/extensions/symbol.rb +0 -11
- data/old/wukong/filename_pattern.rb +0 -74
- data/old/wukong/helper.rb +0 -7
- data/old/wukong/helper/stopwords.rb +0 -195
- data/old/wukong/helper/tokenize.rb +0 -35
- data/old/wukong/logger.rb +0 -38
- data/old/wukong/periodic_monitor.rb +0 -72
- data/old/wukong/schema.rb +0 -269
- data/old/wukong/script.rb +0 -286
- data/old/wukong/script/avro_command.rb +0 -5
- data/old/wukong/script/cassandra_loader_script.rb +0 -40
- data/old/wukong/script/emr_command.rb +0 -168
- data/old/wukong/script/hadoop_command.rb +0 -237
- data/old/wukong/script/local_command.rb +0 -41
- data/old/wukong/store.rb +0 -10
- data/old/wukong/store/base.rb +0 -27
- data/old/wukong/store/cassandra.rb +0 -10
- data/old/wukong/store/cassandra/streaming.rb +0 -75
- data/old/wukong/store/cassandra/struct_loader.rb +0 -21
- data/old/wukong/store/cassandra_model.rb +0 -91
- data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
- data/old/wukong/store/chunked_flat_file_store.rb +0 -48
- data/old/wukong/store/conditional_store.rb +0 -57
- data/old/wukong/store/factory.rb +0 -8
- data/old/wukong/store/flat_file_store.rb +0 -89
- data/old/wukong/store/key_store.rb +0 -51
- data/old/wukong/store/null_store.rb +0 -15
- data/old/wukong/store/read_thru_store.rb +0 -22
- data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
- data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
- data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
- data/old/wukong/streamer.rb +0 -30
- data/old/wukong/streamer/accumulating_reducer.rb +0 -83
- data/old/wukong/streamer/base.rb +0 -126
- data/old/wukong/streamer/counting_reducer.rb +0 -25
- data/old/wukong/streamer/filter.rb +0 -20
- data/old/wukong/streamer/instance_streamer.rb +0 -15
- data/old/wukong/streamer/json_streamer.rb +0 -21
- data/old/wukong/streamer/line_streamer.rb +0 -12
- data/old/wukong/streamer/list_reducer.rb +0 -31
- data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
- data/old/wukong/streamer/record_streamer.rb +0 -14
- data/old/wukong/streamer/reducer.rb +0 -11
- data/old/wukong/streamer/set_reducer.rb +0 -14
- data/old/wukong/streamer/struct_streamer.rb +0 -48
- data/old/wukong/streamer/summing_reducer.rb +0 -29
- data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
- data/old/wukong/typed_struct.rb +0 -12
- data/spec/away/encoding_spec.rb +0 -32
- data/spec/away/exe_spec.rb +0 -20
- data/spec/away/flow_spec.rb +0 -82
- data/spec/away/graph_spec.rb +0 -6
- data/spec/away/job_spec.rb +0 -15
- data/spec/away/rake_compat_spec.rb +0 -9
- data/spec/away/script_spec.rb +0 -81
- data/spec/hanuman/graphviz_spec.rb +0 -29
- data/spec/hanuman/slot_spec.rb +0 -2
- data/spec/support/examples_helper.rb +0 -10
- data/spec/support/streamer_test_helpers.rb +0 -6
- data/spec/support/wukong_widget_helpers.rb +0 -66
- data/spec/wukong/processor_spec.rb +0 -109
- data/spec/wukong/widget/filter_spec.rb +0 -99
- data/spec/wukong/widget/stringifier_spec.rb +0 -51
- data/spec/wukong/workflow/command_spec.rb +0 -5
data/spec/wukong/runner_spec.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
-
require 'wukong'
|
2
|
+
# require 'wukong'
|
3
3
|
|
4
|
-
describe :runner, :helpers => true do
|
4
|
+
# describe :runner, :helpers => true do
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
# context 'tiny_count example script' do
|
7
|
+
# it 'is shorter than a tweet' do
|
8
|
+
# example_script_contents('tiny_count.rb').length.should < 140
|
9
|
+
# end
|
10
10
|
|
11
|
-
|
12
|
-
end
|
11
|
+
# end
|
12
|
+
# end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Filters" do
|
4
|
+
|
5
|
+
context :null do
|
6
|
+
it_behaves_like 'a processor', :named => :null
|
7
|
+
it "should not pass anything, ever" do
|
8
|
+
processor.given('', 3, 'hi', nil).should emit(0).records
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
context :identity do
|
13
|
+
it_behaves_like 'a processor', :named => :identity
|
14
|
+
it "should pass everything, always" do
|
15
|
+
processor.given('', 3, 'hi', nil).should emit('', 3, 'hi', nil)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
context :regexp do
|
20
|
+
it_behaves_like 'a processor', :named => :regexp
|
21
|
+
it "should pass everything given no 'match' argument" do
|
22
|
+
processor.given('snap', 'crackle', 'pop').should emit('snap', 'crackle', 'pop')
|
23
|
+
end
|
24
|
+
it "should pass everything its 'match' argument matches" do
|
25
|
+
processor(match: /a/).given('snap', 'crackle', 'pop').should emit('snap', 'crackle')
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
context :not_regexp do
|
30
|
+
it_behaves_like 'a processor', :named => :not_regexp
|
31
|
+
it "should pass everything given no 'match' argument" do
|
32
|
+
processor.given('snap', 'crackle', 'pop').should emit('snap', 'crackle', 'pop')
|
33
|
+
end
|
34
|
+
it "should pass everything its 'match' argument matches" do
|
35
|
+
processor(match: /a/).given('snap', 'crackle', 'pop').should emit('pop')
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context :limit do
|
40
|
+
it_behaves_like 'a processor', :named => :limit
|
41
|
+
it "should pass everything given no 'max' argument" do
|
42
|
+
processor.given('snap', 'crackle', 'pop').should emit('snap', 'crackle', 'pop')
|
43
|
+
end
|
44
|
+
it "should pass only as many records as its 'max' argument" do
|
45
|
+
processor(max: 2).given('snap', 'crackle', 'pop', 'whoa').should emit('snap', 'crackle')
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
context :sample do
|
50
|
+
it_behaves_like 'a processor', :named => :sample
|
51
|
+
it "should pass everything given no 'fraction' argument" do
|
52
|
+
processor.given('snap', 'crackle', 'pop').should emit('snap', 'crackle', 'pop')
|
53
|
+
end
|
54
|
+
it "should pass everything given no 'fraction' argument" do
|
55
|
+
processor(:fraction => 0.5).tap do |proc|
|
56
|
+
proc.should_receive(:rand).and_return(0.7, 0.1, 0.6)
|
57
|
+
end.given('snap', 'crackle', 'pop').should emit('crackle')
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wukong::Processor do
|
4
|
+
|
5
|
+
let(:hsh) { { "hi" => "there", "top" => { "lower" => { "lowest" => "value" } } } }
|
6
|
+
let(:ary) { ['1', 2, 'three'] }
|
7
|
+
|
8
|
+
context :logger do
|
9
|
+
it_behaves_like "a processor", :named => :logger
|
10
|
+
|
11
|
+
it "logs each event at the 'info' level by default" do
|
12
|
+
log = mock("logger")
|
13
|
+
log.should_receive(:info).with('hi there')
|
14
|
+
log.should_receive(:info).with('buddy')
|
15
|
+
processor(:logger) do
|
16
|
+
stub!(:log).and_return(log)
|
17
|
+
end.given('hi there', 'buddy').should emit(0).records
|
18
|
+
end
|
19
|
+
|
20
|
+
it "logs each event at the a desired level set with an argument" do
|
21
|
+
log = mock("logger")
|
22
|
+
log.should_receive(:debug).with('hi there')
|
23
|
+
log.should_receive(:debug).with('buddy')
|
24
|
+
processor(:logger, level: :debug) do
|
25
|
+
stub!(:log).and_return(log)
|
26
|
+
end.given('hi there', 'buddy').should emit(0).records
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
context :extract do
|
32
|
+
subject { processor(:extract) }
|
33
|
+
|
34
|
+
it_behaves_like 'a processor', :named => :extract
|
35
|
+
|
36
|
+
context "on a string" do
|
37
|
+
it "emits the string with no arguments" do
|
38
|
+
processor(:extract).given('hi there', 'buddy').should emit('hi there', 'buddy')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
context "on a Fixnum" do
|
42
|
+
it "emits the number with no arguments" do
|
43
|
+
processor(:extract).given(3, 3.0).should emit(3, 3.0)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
context "on a Hash" do
|
47
|
+
it "emits the hash with no arguments" do
|
48
|
+
processor(:extract).given(hsh).should emit(hsh)
|
49
|
+
end
|
50
|
+
it "can extract a key" do
|
51
|
+
processor(:extract, part: 'hi').given(hsh).should emit('there')
|
52
|
+
end
|
53
|
+
it "emits nil when the value of the key is nil" do
|
54
|
+
processor(:extract, part: 'bye').given(hsh).should emit(nil)
|
55
|
+
end
|
56
|
+
it "can extract a nested key" do
|
57
|
+
processor(:extract, part: 'top.lower.lowest').given(hsh).should emit('value')
|
58
|
+
end
|
59
|
+
it "emits nil when the value of this nested key is nil" do
|
60
|
+
processor(:extract, part: 'foo.bar.baz').given(hsh).should emit(nil)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
context "on an Array" do
|
64
|
+
it "emits the array with no arguments" do
|
65
|
+
processor(:extract).given(ary).should emit(ary)
|
66
|
+
end
|
67
|
+
it "can extract the nth value with an integer argument" do
|
68
|
+
processor(:extract, part: 2).given(ary).should emit(2)
|
69
|
+
end
|
70
|
+
it "can extract the nth value with a string argument" do
|
71
|
+
processor(:extract, part: '2').given(ary).should emit(2)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
context "on JSON" do
|
75
|
+
let(:garbage) { '{"239823:' }
|
76
|
+
it "emits the JSON with no arguments" do
|
77
|
+
processor(:extract).given_json(hsh).should emit_json(hsh)
|
78
|
+
end
|
79
|
+
it "will skip badly formed records" do
|
80
|
+
processor(:extract).given(garbage).should emit(garbage)
|
81
|
+
end
|
82
|
+
it "can extract a key" do
|
83
|
+
processor(:extract, part: 'hi').given_json(hsh).should emit('there')
|
84
|
+
end
|
85
|
+
it "can extract a nested key" do
|
86
|
+
processor(:extract, part: 'top.lower.lowest').given_json(hsh).should emit('value')
|
87
|
+
end
|
88
|
+
it "emits nil when the record is missing the key" do
|
89
|
+
processor(:extract, part: 'foo.bar.baz').given_json(hsh).should emit(nil)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
context "on delimited data" do
|
93
|
+
it "emits the row with no arguments" do
|
94
|
+
processor(:extract).given_delimited('|', ary).should emit(ary.map(&:to_s).join('|'))
|
95
|
+
end
|
96
|
+
it "can extract the nth value with an integer argument" do
|
97
|
+
processor(:extract, part: 2, separator: '|').given_delimited('|', ary).should emit('2')
|
98
|
+
end
|
99
|
+
it "can extract nth value with a string argument" do
|
100
|
+
processor(:extract, part: '2', separator: '|').given_delimited('|', ary).should emit('2')
|
101
|
+
end
|
102
|
+
end
|
103
|
+
context "on TSV" do
|
104
|
+
it "emits the TSV with no arguments" do
|
105
|
+
processor(:extract).given_tsv(ary).should emit(ary.map(&:to_s).join("\t"))
|
106
|
+
end
|
107
|
+
it "can extract the nth value with an integer argument" do
|
108
|
+
processor(:extract, part: 2).given_tsv(ary).should emit('2')
|
109
|
+
end
|
110
|
+
it "can extract the nth value with a string argument" do
|
111
|
+
processor(:extract, part: '2').given_tsv(ary).should emit('2')
|
112
|
+
end
|
113
|
+
end
|
114
|
+
context "on CSV" do
|
115
|
+
it "emits the CSV with no arguments" do
|
116
|
+
processor(:extract).given_csv(ary).should emit(ary.map(&:to_s).join(","))
|
117
|
+
end
|
118
|
+
it "can extract the nth value with an integer argument" do
|
119
|
+
processor(:extract, part: 2, separator: ',').given_csv(ary).should emit('2')
|
120
|
+
end
|
121
|
+
it "can extract the nth value with a string argument" do
|
122
|
+
processor(:extract, part: '2', separator: ',').given_csv(ary).should emit('2')
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wukong::Processor do
|
4
|
+
describe :bin do
|
5
|
+
include_context "reducers"
|
6
|
+
it_behaves_like 'a processor', :named => :bin
|
7
|
+
|
8
|
+
let(:bins) {
|
9
|
+
[
|
10
|
+
['0.0', '2.000', '9.000'],
|
11
|
+
['2.000', '4.000', '9.000'],
|
12
|
+
['4.000', '6.000', '8.000'],
|
13
|
+
['6.000', '8.000', '11.000'],
|
14
|
+
['8.000', '10.000', '13.000']
|
15
|
+
]
|
16
|
+
}
|
17
|
+
|
18
|
+
it "raises an error when called with a non-positive-definite number of bins" do
|
19
|
+
lambda { processor(num_bins: -1) }.should raise_error(Wukong::Error)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "raises an error when called with a a minimum that's less than or equal to the maximum" do
|
23
|
+
lambda { processor(min: 10, max: 0) }.should raise_error(Wukong::Error)
|
24
|
+
end
|
25
|
+
|
26
|
+
it "will bin 50 numbers into 7 bins (uses the square root)" do
|
27
|
+
processor.given(*nums).should emit(7).records
|
28
|
+
end
|
29
|
+
|
30
|
+
it "will bin 50 numbers into 5 bins if asked" do
|
31
|
+
processor(num_bins: 10).given(*nums).should emit(10).records
|
32
|
+
end
|
33
|
+
|
34
|
+
it "counts correctly in each bin" do
|
35
|
+
processor(num_bins: 5).given(*nums).should emit_tsv(*bins)
|
36
|
+
end
|
37
|
+
|
38
|
+
it "can express counts logarithmically" do
|
39
|
+
row = processor(num_bins: 5, log_counts: true).given(*nums).tsv_output.first
|
40
|
+
row.size.should == 3
|
41
|
+
row[2].to_f.should be_within(0.1).of(2.197)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "can add a normalized frequency" do
|
45
|
+
row = processor(num_bins: 5, normalize: true).given(*nums).tsv_output.first
|
46
|
+
row.size.should == 4
|
47
|
+
row[3].to_f.should be_within(0.1).of(0.18)
|
48
|
+
end
|
49
|
+
|
50
|
+
it "can add a normalized frequency and express counts logarithmically" do
|
51
|
+
row = processor(num_bins: 5, normalize: true, log_counts: true).given(*nums).tsv_output.first
|
52
|
+
row.size.should == 4
|
53
|
+
row[2].to_f.should be_within(0.1).of(2.197)
|
54
|
+
row[3].to_f.should be_within(0.1).of(-1.715)
|
55
|
+
end
|
56
|
+
|
57
|
+
it "can bin on the fly given min, max, and num_bins options" do
|
58
|
+
output = processor(min: -30, max: 30, num_bins: 3) do
|
59
|
+
# we can bin on the fly
|
60
|
+
values.should_not_receive(:<<)
|
61
|
+
should_not_receive(:bin!)
|
62
|
+
end.given(*nums).tsv_output
|
63
|
+
|
64
|
+
output.size.should == 3
|
65
|
+
output.first[0].to_f.should be_within(0.1).of(-30)
|
66
|
+
output.last[1].to_f.should be_within(0.1).of(30)
|
67
|
+
end
|
68
|
+
|
69
|
+
it "can bin on the fly given fixed bin edges" do
|
70
|
+
output = processor(edges: [0,1,5,10]) do
|
71
|
+
# we can bin on the fly
|
72
|
+
values.should_not_receive(:<<)
|
73
|
+
should_not_receive(:bin!)
|
74
|
+
end.given(*nums).tsv_output
|
75
|
+
output.size.should == 3
|
76
|
+
output[0][0].to_f.should be_within(0.1).of(0.0)
|
77
|
+
output[0][1].to_f.should be_within(0.1).of(1.0)
|
78
|
+
output[1][0].to_f.should be_within(0.1).of(1.0)
|
79
|
+
output[1][1].to_f.should be_within(0.1).of(5.0)
|
80
|
+
output[2][0].to_f.should be_within(0.1).of(5.0)
|
81
|
+
output[2][1].to_f.should be_within(0.1).of(10.0)
|
82
|
+
end
|
83
|
+
|
84
|
+
it "can extract the value to bin by from an object" do
|
85
|
+
output = processor(by: 'data.n', min: 0).given(*json).tsv_output
|
86
|
+
output.size.should == 2
|
87
|
+
output.first[0].to_f.should be_within(0.1).of(0.0)
|
88
|
+
output.last[1].to_f.should be_within(0.1).of(100.0)
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wukong::Processor do
|
4
|
+
describe :count do
|
5
|
+
include_context "reducers"
|
6
|
+
it_behaves_like 'a processor', :named => :count
|
7
|
+
it "should emit the total count of records" do
|
8
|
+
processor.given(*strings).should emit(4)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wukong::Processor do
|
4
|
+
describe :group do
|
5
|
+
include_context "reducers"
|
6
|
+
it_behaves_like 'a processor', :named => :group
|
7
|
+
|
8
|
+
let(:grouped_strings) { [['apple', '2'], ['banana', '1'], ['cookie', '1']] }
|
9
|
+
let(:grouped_nums) { [['', '2'], ['1', '1'], ['5', '1'], ['10', '1'], ['100', '1']] }
|
10
|
+
it "will group single values" do
|
11
|
+
processor(:group).given(*strings.sort).should emit_tsv(*grouped_strings)
|
12
|
+
end
|
13
|
+
it "can group from within a JSON hash" do
|
14
|
+
proc = processor(:group, by: 'data.n').given(*json_sorted_n).should emit_tsv(*grouped_nums)
|
15
|
+
end
|
16
|
+
it "can group from within a TSV row" do
|
17
|
+
proc = processor(:group, by: '3').given(*tsv_sorted).should emit_tsv(*grouped_nums)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wukong::Processor do
|
4
|
+
describe :moments do
|
5
|
+
include_context "reducers"
|
6
|
+
it_behaves_like 'a processor', :named => :moments
|
7
|
+
|
8
|
+
it "behaves like group when not called with any arguments" do
|
9
|
+
processor(:moments).given(*strings.sort).should emit(
|
10
|
+
{group: 'apple', count: 2, results: {}},
|
11
|
+
{group: 'banana', count: 1, results: {}},
|
12
|
+
{group: 'cookie', count: 1, results: {}}
|
13
|
+
)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "behaves calculates the moments of numeric fields" do
|
17
|
+
processor(:moments, group_by: 'outer', of: 'data.n').given(*json_sorted_outer).should emit(
|
18
|
+
{group: nil, count: 2, results: {"data.n" => {}}},
|
19
|
+
{group: 'apple', count: 2, results: {"data.n"=>{:count=>2, :mean=>3.0, :std_dev=>2.0}}},
|
20
|
+
{group: 'banana', count: 1, results: {"data.n"=>{:count=>1, :mean=>100.0, :std_dev=>0.0}}},
|
21
|
+
{group: 'cookie', count: 1, results: {"data.n"=>{:count=>1, :mean=>10.0, :std_dev=>0.0}}}
|
22
|
+
)
|
23
|
+
end
|
24
|
+
|
25
|
+
it "will leave off the standard deviation if desired" do
|
26
|
+
processor(:moments, group_by: 'outer', of: 'data.n', std_dev: false).given(*json_sorted_outer).should emit(
|
27
|
+
{group: nil, count: 2, results: {"data.n" => {}}},
|
28
|
+
{group: 'apple', count: 2, results: {"data.n"=>{:count=>2, :mean=>3.0 }}},
|
29
|
+
{group: 'banana', count: 1, results: {"data.n"=>{:count=>1, :mean=>100.0 }}},
|
30
|
+
{group: 'cookie', count: 1, results: {"data.n"=>{:count=>1, :mean=>10.0 }}}
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wukong::Processor do
|
4
|
+
describe :sort do
|
5
|
+
include_context "reducers"
|
6
|
+
it_behaves_like 'a processor', :named => :sort
|
7
|
+
it "will use ascending order by default" do
|
8
|
+
processor(:sort).given(*strings).should emit(*strings.sort)
|
9
|
+
end
|
10
|
+
it "can sort in reversed (descending) order" do
|
11
|
+
processor(:sort, reverse: true).given(*strings).should emit(*strings.sort.reverse)
|
12
|
+
end
|
13
|
+
it "will use lexical order by default" do
|
14
|
+
processor(:sort).given(*nums).should emit(*nums.sort)
|
15
|
+
end
|
16
|
+
it "can sort in numerical order" do
|
17
|
+
processor(:sort, numeric: true).given(*nums).should emit(*nums.map(&:to_i).sort.map(&:to_s))
|
18
|
+
end
|
19
|
+
it "can sort from within a JSON hash" do
|
20
|
+
proc = processor(:sort, numeric: true, on: 'data.n').given(*json).should emit(*json_sorted_n)
|
21
|
+
end
|
22
|
+
it "can sort from within a TSV row" do
|
23
|
+
proc = processor(:sort, numeric: true, on: '3').given(*tsv).should emit(*tsv_sorted)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Serializing" do
|
4
|
+
|
5
|
+
context :to_json do
|
6
|
+
|
7
|
+
let(:emittable) { {"hi" => "there"} }
|
8
|
+
let(:not_emittable) { {"n" => Float::INFINITY} }
|
9
|
+
|
10
|
+
it_behaves_like 'a processor', :named => :to_json
|
11
|
+
|
12
|
+
it "should handle valid records" do
|
13
|
+
processor.given(emittable).should emit_json(emittable)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should skip bad records" do
|
17
|
+
processor.given(not_emittable).should emit(0).records
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
context :to_tsv do
|
23
|
+
let(:emittable) { ["foo", 2, :a] }
|
24
|
+
let(:not_emittable) { nil }
|
25
|
+
|
26
|
+
it_behaves_like 'a processor', :named => :to_tsv
|
27
|
+
|
28
|
+
it "should handle valid records" do
|
29
|
+
processor.given(emittable).should emit_tsv(emittable.map(&:to_s))
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should skip bad records" do
|
33
|
+
processor.given(not_emittable).should emit(0).records
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
describe "Deserializing" do
|
39
|
+
|
40
|
+
context :from_json do
|
41
|
+
let(:parseable) { '{"hi": "there"}' }
|
42
|
+
let(:not_parseable) { '{"832323:' }
|
43
|
+
|
44
|
+
it_behaves_like 'a processor', :named => :from_json
|
45
|
+
|
46
|
+
it "should handle valid records" do
|
47
|
+
processor.given(parseable).should emit({'hi' => 'there'})
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should skip bad records" do
|
51
|
+
processor.given(not_parseable).should emit(0).records
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
context :from_tsv do
|
56
|
+
|
57
|
+
let(:parseable) { "foo\t2\ta" }
|
58
|
+
let(:not_parseable) { nil }
|
59
|
+
|
60
|
+
it_behaves_like 'a processor', :named => :from_tsv
|
61
|
+
|
62
|
+
it "should handle valid records" do
|
63
|
+
processor.given(parseable).should emit(parseable.split("\t"))
|
64
|
+
end
|
65
|
+
|
66
|
+
it "should skip bad records" do
|
67
|
+
processor.given(not_parseable).should emit(0).records
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
describe "Pretty printing" do
|
73
|
+
|
74
|
+
context "JSON" do
|
75
|
+
let(:parseable) { '{"hi": "there"}' }
|
76
|
+
let(:not_parseable) { '{"832323:' }
|
77
|
+
|
78
|
+
it_behaves_like 'a processor', :named => :pretty
|
79
|
+
|
80
|
+
it "should prettify parseable records" do
|
81
|
+
processor(:pretty).given(parseable).should emit_json({'hi' => 'there'})
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should pass on non parseable records" do
|
85
|
+
processor(:pretty).given(not_parseable).should emit(not_parseable)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should pass on everything else" do
|
90
|
+
processor(:pretty).given('foobar').should emit('foobar')
|
91
|
+
end
|
92
|
+
end
|