wukong 3.0.0.pre → 3.0.0.pre2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +46 -33
- data/.gitmodules +3 -0
- data/.rspec +1 -1
- data/.travis.yml +8 -1
- data/.yardopts +0 -13
- data/Guardfile +4 -6
- data/{LICENSE.textile → LICENSE.md} +43 -55
- data/README-old.md +422 -0
- data/README.md +279 -418
- data/Rakefile +21 -5
- data/TODO.md +6 -6
- data/bin/wu-clean-encoding +31 -0
- data/bin/wu-lign +2 -2
- data/bin/wu-local +69 -0
- data/bin/wu-server +70 -0
- data/examples/Gemfile +38 -0
- data/examples/README.md +9 -0
- data/examples/dataflow/apache_log_line.rb +64 -25
- data/examples/dataflow/fibonacci_series.rb +101 -0
- data/examples/dataflow/parse_apache_logs.rb +37 -7
- data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
- data/examples/dataflow/simple.rb +4 -4
- data/examples/geo.rb +4 -0
- data/examples/geo/geo_grids.numbers +0 -0
- data/examples/geo/geolocated.rb +331 -0
- data/examples/geo/quadtile.rb +69 -0
- data/examples/geo/spec/geolocated_spec.rb +247 -0
- data/examples/geo/tile_fetcher.rb +77 -0
- data/examples/graph/minimum_spanning_tree.rb +61 -61
- data/examples/jabberwocky.txt +36 -0
- data/examples/models/wikipedia.rb +20 -0
- data/examples/munging/Gemfile +8 -0
- data/examples/munging/airline_flights/airline.rb +57 -0
- data/examples/munging/airline_flights/airline_flights.rake +83 -0
- data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
- data/examples/munging/airline_flights/airport.rb +211 -0
- data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
- data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
- data/examples/munging/airline_flights/flight.rb +156 -0
- data/examples/munging/airline_flights/models.rb +4 -0
- data/examples/munging/airline_flights/parse.rb +26 -0
- data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
- data/examples/munging/airline_flights/route.rb +35 -0
- data/examples/munging/airline_flights/tasks.rake +83 -0
- data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
- data/examples/munging/airline_flights/topcities.rb +167 -0
- data/examples/munging/airports/40_wbans.txt +40 -0
- data/examples/munging/airports/filter_weather_reports.rb +37 -0
- data/examples/munging/airports/join.pig +31 -0
- data/examples/munging/airports/to_tsv.rb +33 -0
- data/examples/munging/airports/usa_wbans.pig +19 -0
- data/examples/munging/airports/usa_wbans.txt +2157 -0
- data/examples/munging/airports/wbans.pig +19 -0
- data/examples/munging/airports/wbans.txt +2310 -0
- data/examples/munging/geo/geo_json.rb +54 -0
- data/examples/munging/geo/geo_models.rb +69 -0
- data/examples/munging/geo/geonames_models.rb +78 -0
- data/examples/munging/geo/iso_codes.rb +172 -0
- data/examples/munging/geo/reconcile_countries.rb +124 -0
- data/examples/munging/geo/tasks.rake +71 -0
- data/examples/munging/rake_helper.rb +62 -0
- data/examples/munging/weather/.gitignore +1 -0
- data/examples/munging/weather/Gemfile +4 -0
- data/examples/munging/weather/Rakefile +28 -0
- data/examples/munging/weather/extract_ish.rb +13 -0
- data/examples/munging/weather/models/weather.rb +119 -0
- data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
- data/examples/munging/wikipedia/README.md +34 -0
- data/examples/munging/wikipedia/Rakefile +193 -0
- data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
- data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
- data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
- data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
- data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
- data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
- data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
- data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
- data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
- data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
- data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
- data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
- data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
- data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
- data/examples/munging/wikipedia/pig_style_guide.md +25 -0
- data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
- data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
- data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
- data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
- data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
- data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
- data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
- data/examples/munging/wikipedia/utils/namespaces.json +1 -0
- data/examples/rake_helper.rb +85 -0
- data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
- data/examples/server_logs/logline.rb +95 -0
- data/examples/server_logs/models.rb +66 -0
- data/examples/server_logs/page_counts.pig +48 -0
- data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
- data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
- data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
- data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
- data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
- data/examples/string_reverser.rb +26 -0
- data/examples/text/pig_latin.rb +2 -2
- data/examples/text/regional_flavor/README.md +14 -0
- data/examples/text/regional_flavor/article_wordbags.pig +39 -0
- data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
- data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
- data/examples/word_count/accumulator.rb +26 -0
- data/examples/word_count/tokenizer.rb +13 -0
- data/examples/word_count/word_count.rb +6 -0
- data/examples/workflow/cherry_pie.dot +97 -0
- data/examples/workflow/cherry_pie.png +0 -0
- data/examples/workflow/cherry_pie.rb +61 -26
- data/lib/hanuman.rb +34 -7
- data/lib/hanuman/graph.rb +55 -31
- data/lib/hanuman/graphvizzer.rb +199 -178
- data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
- data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
- data/lib/hanuman/link.rb +35 -0
- data/lib/hanuman/registry.rb +46 -0
- data/lib/hanuman/stage.rb +76 -32
- data/lib/wukong.rb +23 -24
- data/lib/wukong/boot.rb +87 -0
- data/lib/wukong/configuration.rb +8 -0
- data/lib/wukong/dataflow.rb +45 -78
- data/lib/wukong/driver.rb +99 -0
- data/lib/wukong/emitter.rb +22 -0
- data/lib/wukong/model/faker.rb +24 -24
- data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
- data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
- data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
- data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
- data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
- data/lib/wukong/processor.rb +60 -114
- data/lib/wukong/spec_helpers.rb +81 -0
- data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
- data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
- data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
- data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
- data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
- data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
- data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
- data/lib/wukong/version.rb +2 -1
- data/lib/wukong/widget/filters.rb +311 -0
- data/lib/wukong/widget/processors.rb +156 -0
- data/lib/wukong/widget/reducers.rb +7 -0
- data/lib/wukong/widget/reducers/accumulator.rb +73 -0
- data/lib/wukong/widget/reducers/bin.rb +318 -0
- data/lib/wukong/widget/reducers/count.rb +61 -0
- data/lib/wukong/widget/reducers/group.rb +85 -0
- data/lib/wukong/widget/reducers/group_concat.rb +70 -0
- data/lib/wukong/widget/reducers/moments.rb +72 -0
- data/lib/wukong/widget/reducers/sort.rb +130 -0
- data/lib/wukong/widget/serializers.rb +287 -0
- data/lib/wukong/widget/sink.rb +10 -52
- data/lib/wukong/widget/source.rb +7 -113
- data/lib/wukong/widget/utils.rb +46 -0
- data/lib/wukong/widgets.rb +6 -0
- data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
- data/spec/examples/dataflow/parsing_spec.rb +12 -11
- data/spec/examples/dataflow/simple_spec.rb +32 -6
- data/spec/examples/dataflow/telegram_spec.rb +36 -36
- data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
- data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
- data/spec/examples/munging/airline_flights_spec.rb +202 -0
- data/spec/examples/text/pig_latin_spec.rb +13 -16
- data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
- data/spec/hanuman/graph_spec.rb +27 -2
- data/spec/hanuman/hanuman_spec.rb +10 -0
- data/spec/hanuman/registry_spec.rb +123 -0
- data/spec/hanuman/stage_spec.rb +61 -7
- data/spec/spec_helper.rb +29 -19
- data/spec/support/hanuman_test_helpers.rb +14 -12
- data/spec/support/shared_context_for_reducers.rb +37 -0
- data/spec/support/shared_examples_for_builders.rb +101 -0
- data/spec/support/shared_examples_for_shortcuts.rb +57 -0
- data/spec/support/wukong_test_helpers.rb +37 -11
- data/spec/wukong/dataflow_spec.rb +77 -55
- data/spec/wukong/local_runner_spec.rb +24 -24
- data/spec/wukong/model/faker_spec.rb +132 -131
- data/spec/wukong/runner_spec.rb +8 -8
- data/spec/wukong/widget/filters_spec.rb +61 -0
- data/spec/wukong/widget/processors_spec.rb +126 -0
- data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
- data/spec/wukong/widget/reducers/count_spec.rb +11 -0
- data/spec/wukong/widget/reducers/group_spec.rb +20 -0
- data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
- data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
- data/spec/wukong/widget/serializers_spec.rb +92 -0
- data/spec/wukong/widget/sink_spec.rb +15 -15
- data/spec/wukong/widget/source_spec.rb +65 -41
- data/spec/wukong/wukong_spec.rb +10 -0
- data/wukong.gemspec +17 -10
- metadata +359 -335
- data/.document +0 -5
- data/VERSION +0 -1
- data/bin/hdp-bin +0 -44
- data/bin/hdp-bzip +0 -23
- data/bin/hdp-cat +0 -3
- data/bin/hdp-catd +0 -3
- data/bin/hdp-cp +0 -3
- data/bin/hdp-du +0 -86
- data/bin/hdp-get +0 -3
- data/bin/hdp-kill +0 -3
- data/bin/hdp-kill-task +0 -3
- data/bin/hdp-ls +0 -11
- data/bin/hdp-mkdir +0 -2
- data/bin/hdp-mkdirp +0 -12
- data/bin/hdp-mv +0 -3
- data/bin/hdp-parts_to_keys.rb +0 -77
- data/bin/hdp-ps +0 -3
- data/bin/hdp-put +0 -3
- data/bin/hdp-rm +0 -32
- data/bin/hdp-sort +0 -40
- data/bin/hdp-stream +0 -40
- data/bin/hdp-stream-flat +0 -22
- data/bin/hdp-stream2 +0 -39
- data/bin/hdp-sync +0 -17
- data/bin/hdp-wc +0 -67
- data/bin/wu-flow +0 -10
- data/bin/wu-map +0 -17
- data/bin/wu-red +0 -17
- data/bin/wukong +0 -17
- data/data/CREDITS.md +0 -355
- data/data/graph/airfares.tsv +0 -2174
- data/data/text/gift_of_the_magi.txt +0 -225
- data/data/text/jabberwocky.txt +0 -36
- data/data/text/rectification_of_names.txt +0 -33
- data/data/twitter/a_atsigns_b.tsv +0 -64
- data/data/twitter/a_follows_b.tsv +0 -53
- data/data/twitter/tweet.tsv +0 -167
- data/data/twitter/twitter_user.tsv +0 -55
- data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
- data/docpages/INSTALL.textile +0 -92
- data/docpages/LICENSE.textile +0 -107
- data/docpages/README-elastic_map_reduce.textile +0 -377
- data/docpages/README-performance.textile +0 -90
- data/docpages/README-wulign.textile +0 -65
- data/docpages/UsingWukong-part1-get_ready.textile +0 -17
- data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
- data/docpages/UsingWukong-part3-parsing.textile +0 -138
- data/docpages/_config.yml +0 -39
- data/docpages/avro/avro_notes.textile +0 -56
- data/docpages/avro/performance.textile +0 -36
- data/docpages/avro/tethering.textile +0 -19
- data/docpages/bigdata-tips.textile +0 -143
- data/docpages/code/api_response_example.txt +0 -20
- data/docpages/code/parser_skeleton.rb +0 -38
- data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
- data/docpages/favicon.ico +0 -0
- data/docpages/gem.css +0 -16
- data/docpages/hadoop-tips.textile +0 -83
- data/docpages/index.textile +0 -92
- data/docpages/intro.textile +0 -8
- data/docpages/moreinfo.textile +0 -174
- data/docpages/news.html +0 -24
- data/docpages/pig/PigLatinExpressionsList.txt +0 -122
- data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
- data/docpages/pig/commandline_params.txt +0 -26
- data/docpages/pig/cookbook.html +0 -481
- data/docpages/pig/images/hadoop-logo.jpg +0 -0
- data/docpages/pig/images/instruction_arrow.png +0 -0
- data/docpages/pig/images/pig-logo.gif +0 -0
- data/docpages/pig/piglatin_ref1.html +0 -1103
- data/docpages/pig/piglatin_ref2.html +0 -14340
- data/docpages/pig/setup.html +0 -505
- data/docpages/pig/skin/basic.css +0 -166
- data/docpages/pig/skin/breadcrumbs.js +0 -237
- data/docpages/pig/skin/fontsize.js +0 -166
- data/docpages/pig/skin/getBlank.js +0 -40
- data/docpages/pig/skin/getMenu.js +0 -45
- data/docpages/pig/skin/images/chapter.gif +0 -0
- data/docpages/pig/skin/images/chapter_open.gif +0 -0
- data/docpages/pig/skin/images/current.gif +0 -0
- data/docpages/pig/skin/images/external-link.gif +0 -0
- data/docpages/pig/skin/images/header_white_line.gif +0 -0
- data/docpages/pig/skin/images/page.gif +0 -0
- data/docpages/pig/skin/images/pdfdoc.gif +0 -0
- data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/print.css +0 -54
- data/docpages/pig/skin/profile.css +0 -181
- data/docpages/pig/skin/screen.css +0 -587
- data/docpages/pig/tutorial.html +0 -1059
- data/docpages/pig/udf.html +0 -1509
- data/docpages/tutorial.textile +0 -283
- data/docpages/usage.textile +0 -195
- data/docpages/wutils.textile +0 -263
- data/examples/dataflow/complex.rb +0 -11
- data/examples/dataflow/donuts.rb +0 -13
- data/examples/tiny_count/jabberwocky_output.tsv +0 -92
- data/examples/word_count.rb +0 -48
- data/examples/workflow/fiddle.rb +0 -24
- data/lib/away/escapement.rb +0 -129
- data/lib/away/exe.rb +0 -11
- data/lib/away/experimental.rb +0 -5
- data/lib/away/from_file.rb +0 -52
- data/lib/away/job.rb +0 -56
- data/lib/away/job/rake_compat.rb +0 -17
- data/lib/away/registry.rb +0 -79
- data/lib/away/runner.rb +0 -276
- data/lib/away/runner/execute.rb +0 -121
- data/lib/away/script.rb +0 -161
- data/lib/away/script/hadoop_command.rb +0 -240
- data/lib/away/source/file_list_source.rb +0 -15
- data/lib/away/source/looper.rb +0 -18
- data/lib/away/task.rb +0 -219
- data/lib/hanuman/action.rb +0 -21
- data/lib/hanuman/chain.rb +0 -4
- data/lib/hanuman/graphviz.rb +0 -74
- data/lib/hanuman/resource.rb +0 -6
- data/lib/hanuman/slot.rb +0 -87
- data/lib/hanuman/slottable.rb +0 -220
- data/lib/wukong/bad_record.rb +0 -15
- data/lib/wukong/event.rb +0 -44
- data/lib/wukong/local_runner.rb +0 -55
- data/lib/wukong/mapred.rb +0 -3
- data/lib/wukong/universe.rb +0 -48
- data/lib/wukong/widget/filter.rb +0 -81
- data/lib/wukong/widget/gibberish.rb +0 -123
- data/lib/wukong/widget/monitor.rb +0 -26
- data/lib/wukong/widget/reducer.rb +0 -66
- data/lib/wukong/widget/stringifier.rb +0 -50
- data/lib/wukong/workflow.rb +0 -22
- data/lib/wukong/workflow/command.rb +0 -42
- data/old/config/emr-example.yaml +0 -48
- data/old/examples/README.txt +0 -17
- data/old/examples/contrib/jeans/README.markdown +0 -165
- data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
- data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
- data/old/examples/contrib/jeans/data/sizes +0 -3
- data/old/examples/contrib/jeans/normalize.rb +0 -20
- data/old/examples/contrib/jeans/sizes.rb +0 -55
- data/old/examples/corpus/bnc_word_freq.rb +0 -44
- data/old/examples/corpus/bucket_counter.rb +0 -47
- data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
- data/old/examples/corpus/sentence_bigrams.rb +0 -53
- data/old/examples/corpus/sentence_coocurrence.rb +0 -66
- data/old/examples/corpus/stopwords.rb +0 -138
- data/old/examples/corpus/words_to_bigrams.rb +0 -53
- data/old/examples/emr/README.textile +0 -110
- data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
- data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
- data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
- data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
- data/old/examples/network_graph/adjacency_list.rb +0 -74
- data/old/examples/network_graph/breadth_first_search.rb +0 -72
- data/old/examples/network_graph/gen_2paths.rb +0 -68
- data/old/examples/network_graph/gen_multi_edge.rb +0 -112
- data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
- data/old/examples/pagerank/README.textile +0 -6
- data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
- data/old/examples/pagerank/pagerank.rb +0 -72
- data/old/examples/pagerank/pagerank_initialize.rb +0 -42
- data/old/examples/pagerank/run_pagerank.sh +0 -21
- data/old/examples/sample_records.rb +0 -33
- data/old/examples/server_logs/apache_log_parser.rb +0 -15
- data/old/examples/server_logs/nook.rb +0 -48
- data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
- data/old/examples/server_logs/user_agent.rb +0 -40
- data/old/examples/simple_word_count.rb +0 -82
- data/old/examples/size.rb +0 -61
- data/old/examples/stats/avg_value_frequency.rb +0 -86
- data/old/examples/stats/binning_percentile_estimator.rb +0 -140
- data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
- data/old/examples/stats/rank_and_bin.rb +0 -173
- data/old/examples/stupidly_simple_filter.rb +0 -40
- data/old/examples/word_count.rb +0 -75
- data/old/graph/graphviz_builder.rb +0 -580
- data/old/graph_easy/Attributes.pm +0 -4181
- data/old/graph_easy/Graphviz.pm +0 -2232
- data/old/wukong.rb +0 -18
- data/old/wukong/and_pig.rb +0 -38
- data/old/wukong/bad_record.rb +0 -18
- data/old/wukong/datatypes.rb +0 -24
- data/old/wukong/datatypes/enum.rb +0 -127
- data/old/wukong/datatypes/fake_types.rb +0 -17
- data/old/wukong/decorator.rb +0 -28
- data/old/wukong/encoding/asciize.rb +0 -108
- data/old/wukong/extensions.rb +0 -16
- data/old/wukong/extensions/array.rb +0 -18
- data/old/wukong/extensions/blank.rb +0 -93
- data/old/wukong/extensions/class.rb +0 -189
- data/old/wukong/extensions/date_time.rb +0 -53
- data/old/wukong/extensions/emittable.rb +0 -69
- data/old/wukong/extensions/enumerable.rb +0 -79
- data/old/wukong/extensions/hash.rb +0 -167
- data/old/wukong/extensions/hash_keys.rb +0 -16
- data/old/wukong/extensions/hash_like.rb +0 -150
- data/old/wukong/extensions/hashlike_class.rb +0 -47
- data/old/wukong/extensions/module.rb +0 -2
- data/old/wukong/extensions/pathname.rb +0 -27
- data/old/wukong/extensions/string.rb +0 -65
- data/old/wukong/extensions/struct.rb +0 -17
- data/old/wukong/extensions/symbol.rb +0 -11
- data/old/wukong/filename_pattern.rb +0 -74
- data/old/wukong/helper.rb +0 -7
- data/old/wukong/helper/stopwords.rb +0 -195
- data/old/wukong/helper/tokenize.rb +0 -35
- data/old/wukong/logger.rb +0 -38
- data/old/wukong/periodic_monitor.rb +0 -72
- data/old/wukong/schema.rb +0 -269
- data/old/wukong/script.rb +0 -286
- data/old/wukong/script/avro_command.rb +0 -5
- data/old/wukong/script/cassandra_loader_script.rb +0 -40
- data/old/wukong/script/emr_command.rb +0 -168
- data/old/wukong/script/hadoop_command.rb +0 -237
- data/old/wukong/script/local_command.rb +0 -41
- data/old/wukong/store.rb +0 -10
- data/old/wukong/store/base.rb +0 -27
- data/old/wukong/store/cassandra.rb +0 -10
- data/old/wukong/store/cassandra/streaming.rb +0 -75
- data/old/wukong/store/cassandra/struct_loader.rb +0 -21
- data/old/wukong/store/cassandra_model.rb +0 -91
- data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
- data/old/wukong/store/chunked_flat_file_store.rb +0 -48
- data/old/wukong/store/conditional_store.rb +0 -57
- data/old/wukong/store/factory.rb +0 -8
- data/old/wukong/store/flat_file_store.rb +0 -89
- data/old/wukong/store/key_store.rb +0 -51
- data/old/wukong/store/null_store.rb +0 -15
- data/old/wukong/store/read_thru_store.rb +0 -22
- data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
- data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
- data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
- data/old/wukong/streamer.rb +0 -30
- data/old/wukong/streamer/accumulating_reducer.rb +0 -83
- data/old/wukong/streamer/base.rb +0 -126
- data/old/wukong/streamer/counting_reducer.rb +0 -25
- data/old/wukong/streamer/filter.rb +0 -20
- data/old/wukong/streamer/instance_streamer.rb +0 -15
- data/old/wukong/streamer/json_streamer.rb +0 -21
- data/old/wukong/streamer/line_streamer.rb +0 -12
- data/old/wukong/streamer/list_reducer.rb +0 -31
- data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
- data/old/wukong/streamer/record_streamer.rb +0 -14
- data/old/wukong/streamer/reducer.rb +0 -11
- data/old/wukong/streamer/set_reducer.rb +0 -14
- data/old/wukong/streamer/struct_streamer.rb +0 -48
- data/old/wukong/streamer/summing_reducer.rb +0 -29
- data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
- data/old/wukong/typed_struct.rb +0 -12
- data/spec/away/encoding_spec.rb +0 -32
- data/spec/away/exe_spec.rb +0 -20
- data/spec/away/flow_spec.rb +0 -82
- data/spec/away/graph_spec.rb +0 -6
- data/spec/away/job_spec.rb +0 -15
- data/spec/away/rake_compat_spec.rb +0 -9
- data/spec/away/script_spec.rb +0 -81
- data/spec/hanuman/graphviz_spec.rb +0 -29
- data/spec/hanuman/slot_spec.rb +0 -2
- data/spec/support/examples_helper.rb +0 -10
- data/spec/support/streamer_test_helpers.rb +0 -6
- data/spec/support/wukong_widget_helpers.rb +0 -66
- data/spec/wukong/processor_spec.rb +0 -109
- data/spec/wukong/widget/filter_spec.rb +0 -99
- data/spec/wukong/widget/stringifier_spec.rb +0 -51
- data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'wukong'
|
2
|
+
require 'wukong/boot'
|
3
|
+
require_relative('spec_helpers/processor_helpers')
|
4
|
+
require_relative('spec_helpers/processor_methods')
|
5
|
+
require_relative('spec_helpers/spec_driver_matchers')
|
6
|
+
require_relative('spec_helpers/integration_driver')
|
7
|
+
require_relative('spec_helpers/integration_driver_matchers')
|
8
|
+
require_relative('spec_helpers/shared_examples')
|
9
|
+
|
10
|
+
module Wukong
|
11
|
+
|
12
|
+
# This module lets you use write processor specs at a high level.
|
13
|
+
# Require it in your <tt>spec_helper.rb</tt> file:
|
14
|
+
#
|
15
|
+
# # in your spec/spec_helper.rb
|
16
|
+
# require 'wukong/spec_helpers'
|
17
|
+
# RSpec.configure do |config|
|
18
|
+
# include Wukong::SpecHelpers
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# Processors in a Wukong spec will have a collection of
|
22
|
+
# <tt>given_*</tt> methods you can use to (lazily) feed them records
|
23
|
+
# without having to have to build your own driver to run the
|
24
|
+
# processors.
|
25
|
+
#
|
26
|
+
# To each <tt>given_*</tt> method corresponds an <tt>emit_*</tt>
|
27
|
+
# matcher which will actually run the processor on the given
|
28
|
+
# inputs and compare against expected results. Here's an example,
|
29
|
+
# using a simple `tokenizer` processor.
|
30
|
+
#
|
31
|
+
# subject { processor(:tokenizer) }
|
32
|
+
#
|
33
|
+
# it "emits each word in a given string" do
|
34
|
+
# given("It was the best of times, it was the worst of times.").should emit(12).records
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
# # Give similar input and check against explicit expected
|
38
|
+
# # output.
|
39
|
+
# it "should ignore punctuation and capitalization" do
|
40
|
+
# processor.given("You're crazy!").should emit("youre", "crazy")
|
41
|
+
# end
|
42
|
+
#
|
43
|
+
# # Pass the input but transform to JSON first (delimited and
|
44
|
+
# # as_tsv also work).
|
45
|
+
# it "should tokenize the 'text' attribute of a record if given JSON" do
|
46
|
+
# processor.given("text" => "Will be cast to JSON").as_json.should emit("will", "be", "cast", "to", "json")
|
47
|
+
# end
|
48
|
+
#
|
49
|
+
# # Initialize the :tokenizer processor with arguments to test
|
50
|
+
# # behavior under different conditions.
|
51
|
+
# it "should output a single record when asked for JSON output" do
|
52
|
+
# processor(:json => true).given("It was the best of times, it was the worst of times.").should emit(1).records
|
53
|
+
# end
|
54
|
+
#
|
55
|
+
# # Initialize processor with arguments and express that the
|
56
|
+
# # expected output will be in JSON though given as an object.
|
57
|
+
# it "should output all the tokens for its input record with its JSON output" do
|
58
|
+
# processor(:json => true).given("You're crazy!").should emit("tokens" => ["youre", "crazy"]).as_json
|
59
|
+
# end
|
60
|
+
#
|
61
|
+
# # Initialize processor with arguments, and both input and
|
62
|
+
# # output will be serialized/deserialized to/from JSON
|
63
|
+
# # automatically.
|
64
|
+
# it "can read and write pure JSON" do
|
65
|
+
# processor(:json => true).given("text" => "You're crazy!").as_json.should emit("tokens" => ["youre", "crazy"]).as_json
|
66
|
+
# end
|
67
|
+
#
|
68
|
+
# # Use a processor outside the scope of the top-level :describe
|
69
|
+
# # block.
|
70
|
+
# it "has a friend which does the same thing" do
|
71
|
+
# processor(:similar_tokenizer, :json => true).given("hi there").should emit(2).records
|
72
|
+
# end
|
73
|
+
module SpecHelpers
|
74
|
+
include ProcessorHelpers
|
75
|
+
include SpecMatchers
|
76
|
+
include IntegrationRunner
|
77
|
+
include IntegrationMatchers
|
78
|
+
end
|
79
|
+
|
80
|
+
Processor.class_eval { include SpecHelpers::ProcessorSpecMethods }
|
81
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'open3'
|
2
|
+
|
3
|
+
module Wukong
|
4
|
+
module SpecHelpers
|
5
|
+
|
6
|
+
# Provides a `command` method for writing integration tests for
|
7
|
+
# commands.
|
8
|
+
module IntegrationRunner
|
9
|
+
|
10
|
+
# Spawn a command and capture its STDOUT, STDERR, and exit code.
|
11
|
+
#
|
12
|
+
# The `args` will be joined together into a command line.
|
13
|
+
#
|
14
|
+
# It is expected that you will use the matchers defined in
|
15
|
+
# IntegrationMatchers in your integration tests:
|
16
|
+
#
|
17
|
+
# @example Check output of 'ls' includes a string 'foo.txt'
|
18
|
+
# it "lists files" do
|
19
|
+
# command('ls').should have_output('foo.txt')
|
20
|
+
# end
|
21
|
+
#
|
22
|
+
# @example More complicated
|
23
|
+
# context "long format" do
|
24
|
+
# it "lists files with timestamps" do
|
25
|
+
# command('ls', '-l').should have_output('foo.txt', /\w+ \d+ \d+:\d+/)
|
26
|
+
# end
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# @param [Array<String>] args
|
30
|
+
#
|
31
|
+
# @overload command(*args, options={})
|
32
|
+
# If the last element of `args` is a Hash it will be used for
|
33
|
+
# options.
|
34
|
+
#
|
35
|
+
# The :env option specifies the command line environment to
|
36
|
+
# use for the command. By default this will be the value of
|
37
|
+
# the Ruby process's own `ENV` variable. If running in a
|
38
|
+
# context in which the `integration_env` method is defined,
|
39
|
+
# its return value will be merged on top of `ENV`. An
|
40
|
+
# explicitly provided :env option will again be merged on top.
|
41
|
+
#
|
42
|
+
# The :cwd option specifies the working directory to start in.
|
43
|
+
# It defaults to the value of <tt>Dir.pwd</tt>
|
44
|
+
#
|
45
|
+
# @param [Array<String>] args
|
46
|
+
# @param [Hash] options
|
47
|
+
# @option options [Hash] env the shell environment to spawn the command with
|
48
|
+
# @option options [Hash] cwd the directory to execute the command in
|
49
|
+
def command *args
|
50
|
+
a = args.flatten.compact
|
51
|
+
options = (a.last.is_a?(Hash) ? a.pop : {})
|
52
|
+
|
53
|
+
env = ENV.to_hash.dup
|
54
|
+
env.merge!(integration_env) if respond_to?(:integration_env)
|
55
|
+
env.merge!(options[:env] || {})
|
56
|
+
|
57
|
+
cwd = options[:cwd]
|
58
|
+
cwd ||= (respond_to?(:integration_cwd) ? integration_cwd : Dir.pwd)
|
59
|
+
|
60
|
+
IntegrationDriver.new(a, cwd: cwd, env: env)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# A driver for running commands in a subprocess.
|
65
|
+
class IntegrationDriver
|
66
|
+
|
67
|
+
# The command to execute
|
68
|
+
attr_accessor :cmd
|
69
|
+
|
70
|
+
# The directory in which to execute the command.
|
71
|
+
attr_accessor :cwd
|
72
|
+
|
73
|
+
# The ID of the spawned subprocess (while it was running).
|
74
|
+
attr_accessor :pid
|
75
|
+
|
76
|
+
# The STDOUT of the spawned process.
|
77
|
+
attr_accessor :stdout
|
78
|
+
|
79
|
+
# The STDERR of the spawned process.
|
80
|
+
attr_accessor :stderr
|
81
|
+
|
82
|
+
# The exit code of the spawned process.
|
83
|
+
attr_accessor :exit_code
|
84
|
+
|
85
|
+
# Run the command and capture its outputs and exit code.
|
86
|
+
#
|
87
|
+
# @return [true, false]
|
88
|
+
def run!
|
89
|
+
return false if ran?
|
90
|
+
Open3.popen3(env, cmd) do |i, o, e, wait_thr|
|
91
|
+
self.pid = wait_thr.pid
|
92
|
+
|
93
|
+
@inputs.each { |input| i.puts(input) }
|
94
|
+
i.close
|
95
|
+
|
96
|
+
self.stdout = o.read
|
97
|
+
self.stderr = e.read
|
98
|
+
self.exit_code = wait_thr.value.to_i
|
99
|
+
end
|
100
|
+
@ran = true
|
101
|
+
end
|
102
|
+
|
103
|
+
# Initialize a new IntegrationDriver to run a given command.
|
104
|
+
def initialize args, options
|
105
|
+
@args = args
|
106
|
+
@env = options[:env]
|
107
|
+
@cwd = options[:cwd]
|
108
|
+
@inputs = []
|
109
|
+
end
|
110
|
+
|
111
|
+
def cmd
|
112
|
+
@args.compact.map(&:to_s).join(' ')
|
113
|
+
end
|
114
|
+
|
115
|
+
def on *events
|
116
|
+
@inputs.concat(events)
|
117
|
+
self
|
118
|
+
end
|
119
|
+
|
120
|
+
def env
|
121
|
+
ENV.to_hash.merge(@env || {})
|
122
|
+
end
|
123
|
+
|
124
|
+
def ran?
|
125
|
+
@ran
|
126
|
+
end
|
127
|
+
|
128
|
+
def cmd_summary
|
129
|
+
[
|
130
|
+
cmd,
|
131
|
+
"with env #{env_summary}",
|
132
|
+
"in dir #{cwd}"
|
133
|
+
].join("\n")
|
134
|
+
end
|
135
|
+
|
136
|
+
def env_summary
|
137
|
+
{ "PATH" => env["PATH"], "RUBYLIB" => env["RUBYLIB"] }.inspect
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
|
@@ -0,0 +1,219 @@
|
|
1
|
+
module Wukong
|
2
|
+
module SpecHelpers
|
3
|
+
|
4
|
+
# Provides matchers for STDOUT, STDERR, and exit code when writing
|
5
|
+
# integration tests for Wukong's command-line APIs.
|
6
|
+
module IntegrationMatchers
|
7
|
+
|
8
|
+
# Checks that each `expectation` appears in the STDOUT of the
|
9
|
+
# command. Order is irrelevant and each `expectation` can be
|
10
|
+
# either a String to check for inclusion or a Regexp to match
|
11
|
+
# with.
|
12
|
+
#
|
13
|
+
# @param [Array<String,Regexp>] expectations
|
14
|
+
def have_stdout *expectations
|
15
|
+
StdoutMatcher.new(*expectations)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Checks that each `expectation` appears in the STDERR of the
|
19
|
+
# command. Order is irrelevant and each `expectation` can be
|
20
|
+
# either a String to check for inclusion or a Regexp to match
|
21
|
+
# with.
|
22
|
+
#
|
23
|
+
# @param [Array<String,Regexp>] expectations
|
24
|
+
def have_stderr *expectations
|
25
|
+
StderrMatcher.new(*expectations)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Checks that the command exits with the given `code`.
|
29
|
+
#
|
30
|
+
# @param [Integer] code
|
31
|
+
def exit_with code
|
32
|
+
ExitCodeMatcher.new(code)
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
# A class for running commands and capturing their STDOUT, STDERR,
|
38
|
+
# and exit code. This class is designed to work with the matchers
|
39
|
+
# defined in IntegrationMatchers.
|
40
|
+
class IntegrationMatcher
|
41
|
+
|
42
|
+
# The driver used to run the actual commands.
|
43
|
+
attr_accessor :driver
|
44
|
+
|
45
|
+
# An array of expectations about the output of the driver.
|
46
|
+
attr_accessor :expectations
|
47
|
+
|
48
|
+
# The expectation which caused failure.
|
49
|
+
attr_accessor :failed_expectation
|
50
|
+
|
51
|
+
# Return whether or not the given command's output matches
|
52
|
+
# expectations.
|
53
|
+
#
|
54
|
+
# If an expectation failes to match, the `failed_expectation`
|
55
|
+
# attribute will be set accordingly.
|
56
|
+
#
|
57
|
+
# @param [IntegrationDriver] driver
|
58
|
+
# @return [true, false]
|
59
|
+
def matches?(driver)
|
60
|
+
self.driver = driver
|
61
|
+
driver.run!
|
62
|
+
expectations.each do |expectation|
|
63
|
+
unless output.send(match_function(expectation), expectation)
|
64
|
+
self.failed_expectation = expectation
|
65
|
+
return false
|
66
|
+
end
|
67
|
+
end
|
68
|
+
true
|
69
|
+
end
|
70
|
+
|
71
|
+
# Create a matcher on the given expectations. Each expectation
|
72
|
+
# can be either a String or a Regexp. Strings will be tested
|
73
|
+
# for inclusion in the output, Regexps will be tested for a
|
74
|
+
# match against the output.
|
75
|
+
#
|
76
|
+
# @param [Array<String,Regexp>] expectations
|
77
|
+
def initialize *expectations
|
78
|
+
self.expectations = expectations
|
79
|
+
end
|
80
|
+
|
81
|
+
# :nodoc:
|
82
|
+
def failure_message
|
83
|
+
"Ran\n\n #{formatted_command}\n\nand expected #{output_description}\n\n#{formatted_output}\n\nto #{match_type}\n\n #{failed_expectation}"
|
84
|
+
end
|
85
|
+
|
86
|
+
# :nodoc:
|
87
|
+
def negative_failure_message
|
88
|
+
"Expected #{output_description} of #{driver.cmd}\n\n#{output}\n\nto NOT #{match_type}\n\n#{self.failed_expectation}."
|
89
|
+
end
|
90
|
+
|
91
|
+
# :nodoc:
|
92
|
+
def formatted_output
|
93
|
+
output.split("\n").map { |line| ' ' + line }.join("\n")
|
94
|
+
end
|
95
|
+
|
96
|
+
# :nodoc:
|
97
|
+
def formatted_command
|
98
|
+
"$ #{driver.cmd}"
|
99
|
+
end
|
100
|
+
|
101
|
+
# :nodoc:
|
102
|
+
def match_function expectation
|
103
|
+
expectation.is_a?(Regexp) ? :match : :include?
|
104
|
+
end
|
105
|
+
|
106
|
+
# :nodoc:
|
107
|
+
def match_type
|
108
|
+
failed_expectation.is_a?(Regexp) ? 'match' : 'include'
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
112
|
+
|
113
|
+
# A matcher for the STDOUT of a command.
|
114
|
+
class StdoutMatcher < IntegrationMatcher
|
115
|
+
|
116
|
+
# Picks the STDOUT of the command.
|
117
|
+
def output
|
118
|
+
driver.stdout
|
119
|
+
end
|
120
|
+
|
121
|
+
# :nodoc:
|
122
|
+
def output_description
|
123
|
+
"STDOUT"
|
124
|
+
end
|
125
|
+
|
126
|
+
def description
|
127
|
+
"have the correct #{output_description}"
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
131
|
+
|
132
|
+
# A matcher for the STDOUT of a command.
|
133
|
+
class StderrMatcher < IntegrationMatcher
|
134
|
+
|
135
|
+
# Picks the STDOUT of the command.
|
136
|
+
def output
|
137
|
+
driver.stderr
|
138
|
+
end
|
139
|
+
|
140
|
+
# :nodoc:
|
141
|
+
def output_description
|
142
|
+
"STDERR"
|
143
|
+
end
|
144
|
+
|
145
|
+
def description
|
146
|
+
"print an appropriate error message on #{output_description}"
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# A matcher for the exit code of a command.
|
151
|
+
class ExitCodeMatcher < IntegrationMatcher
|
152
|
+
|
153
|
+
# Initialize this matcher with the given `code`.
|
154
|
+
#
|
155
|
+
# If `code` is the symbol <tt>:non_zero</tt> then the
|
156
|
+
# expectation will be any non-zero exit code.
|
157
|
+
#
|
158
|
+
# @param [Integer,Symbol] code
|
159
|
+
def initialize code
|
160
|
+
if code == :non_zero
|
161
|
+
@expected_code = :non_zero
|
162
|
+
else
|
163
|
+
@expected_code = code.to_i
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
# Return whether or not the given command's exit code matches
|
168
|
+
# the expectation.
|
169
|
+
#
|
170
|
+
# @param [IntegrationDriver] driver
|
171
|
+
# @return [true, false]
|
172
|
+
def matches?(driver)
|
173
|
+
self.driver = driver
|
174
|
+
driver.run!
|
175
|
+
if non_zero_exit_code?
|
176
|
+
@failed = true if driver.exit_code == 0
|
177
|
+
else
|
178
|
+
@failed = true if driver.exit_code != expected_exit_code
|
179
|
+
end
|
180
|
+
@failed ? false : true
|
181
|
+
end
|
182
|
+
|
183
|
+
# :nodoc:
|
184
|
+
def failure_message
|
185
|
+
"Ran\n\n #{formatted_command}\n\nexpecting #{expected_exit_code_description} Got #{driver.exit_code} instead."
|
186
|
+
end
|
187
|
+
|
188
|
+
# :nodoc:
|
189
|
+
def negative_failure_message
|
190
|
+
"Ran\n\n #{formatted_command}\n\nNOT expecting #{expected_exit_code_description}."
|
191
|
+
end
|
192
|
+
|
193
|
+
# :nodoc:
|
194
|
+
def non_zero_exit_code?
|
195
|
+
@expected_code == :non_zero
|
196
|
+
end
|
197
|
+
|
198
|
+
# :nodoc:
|
199
|
+
def expected_exit_code
|
200
|
+
(@expected_code || 0).to_i
|
201
|
+
end
|
202
|
+
|
203
|
+
# :nodoc:
|
204
|
+
def expected_exit_code_description
|
205
|
+
if non_zero_exit_code?
|
206
|
+
"a non-zero exit code"
|
207
|
+
else
|
208
|
+
"an exit code of #{expected_exit_code}"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
# :nodoc:
|
213
|
+
def description
|
214
|
+
"exit with #{expected_exit_code_description}"
|
215
|
+
end
|
216
|
+
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module Wukong
|
2
|
+
module SpecHelpers
|
3
|
+
module ProcessorHelpers
|
4
|
+
|
5
|
+
# Creates a new processor in a variety of convenient ways.
|
6
|
+
#
|
7
|
+
# Most simply, called without args, will return a new instance of
|
8
|
+
# a the klass named in the containing `describe` or `context`:
|
9
|
+
#
|
10
|
+
# context MyApp::Tokenizer do
|
11
|
+
# it "uses whitespace as the default separator between tokens" do
|
12
|
+
# processor.separator.should == /\s+/
|
13
|
+
# end
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# if your processor has been registered (you created it with the
|
17
|
+
# <tt>Wukong.processor</tt> helper method or otherwise
|
18
|
+
# registered it yourself) then you can use its name:
|
19
|
+
#
|
20
|
+
# context :tokenizer do
|
21
|
+
# it "uses whitespace as the default separator between tokens" do
|
22
|
+
# processor.separator.should == /\s+/
|
23
|
+
# end
|
24
|
+
# end
|
25
|
+
#
|
26
|
+
# The `processor` method can also be used inside RSpec's
|
27
|
+
# `subject` and `let` methods:
|
28
|
+
#
|
29
|
+
# context "with no arguments" do
|
30
|
+
# subject { processor }
|
31
|
+
# it "uses whitespace as the default separator between tokens" do
|
32
|
+
# separator.should == /\s+/
|
33
|
+
# end
|
34
|
+
# end
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
# and you can easily pass arguments, just like you would on the
|
38
|
+
# command line or in a dataflow definition:
|
39
|
+
#
|
40
|
+
# context "with arguments" do
|
41
|
+
# subject { processor(separator: ' ') }
|
42
|
+
# it "uses whitespace as the default separator between tokens" do
|
43
|
+
# separator.should == ' '
|
44
|
+
# end
|
45
|
+
# end
|
46
|
+
# end
|
47
|
+
#
|
48
|
+
# You can even name the processor directly if you want to:
|
49
|
+
#
|
50
|
+
# context "tokenizers" do
|
51
|
+
# let(:default_tokenizer) { processor(:tokenizer) }
|
52
|
+
# let(:complex_tokenizer) { processor(:complex_tokenizer, stemming: true) }
|
53
|
+
# let(:french_tokenizer) { processor(:complex_tokenizer, stemming: true) }
|
54
|
+
# ...
|
55
|
+
# end
|
56
|
+
def processor *args, &block
|
57
|
+
case
|
58
|
+
when args.empty?
|
59
|
+
create_processor(self.class.description, {}, &block)
|
60
|
+
when args.first.is_a?(Hash)
|
61
|
+
create_processor(self.class.description, args.first, &block)
|
62
|
+
else
|
63
|
+
create_processor(args[0], (args[1] || {}), &block)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
alias_method :flow, :processor
|
67
|
+
|
68
|
+
# Is the given `klass` a Wukong::Processor?
|
69
|
+
#
|
70
|
+
# @param [Class] klass
|
71
|
+
# @return [true, false]
|
72
|
+
def processor? klass
|
73
|
+
klass.build.is_a?(Processor)
|
74
|
+
end
|
75
|
+
|
76
|
+
# :nodoc:
|
77
|
+
def create_processor name_or_klass, options={}, &block
|
78
|
+
if name_or_klass.is_a?(Class)
|
79
|
+
klass = name_or_klass
|
80
|
+
else
|
81
|
+
klass = Wukong.registry.retrieve(name_or_klass.to_s.to_sym)
|
82
|
+
raise Error.new("Could not find a Wukong::Processor class named '#{name_or_klass}'") if klass.nil?
|
83
|
+
end
|
84
|
+
raise Error.new("#{klass} is not a subclass of Wukong::Processor") unless processor?(klass)
|
85
|
+
settings = Configliere::Param.new
|
86
|
+
Wukong.boot!(settings)
|
87
|
+
proc = klass.build(settings.merge(options))
|
88
|
+
proc.setup
|
89
|
+
proc.instance_eval(&block) if block_given?
|
90
|
+
proc
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|