ul-wukong 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +60 -0
- data/.gitmodules +6 -0
- data/.rspec +2 -0
- data/.travis.yml +19 -0
- data/.yardopts +6 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +17 -0
- data/Guardfile +12 -0
- data/LICENSE.md +95 -0
- data/NOTES-travis.md +31 -0
- data/README-old.md +422 -0
- data/README.md +1308 -0
- data/Rakefile +28 -0
- data/TODO.md +99 -0
- data/bin/cutc +30 -0
- data/bin/cuttab +5 -0
- data/bin/greptrue +6 -0
- data/bin/md5sort +20 -0
- data/bin/setcat +11 -0
- data/bin/tabchar +5 -0
- data/bin/uniq-ord +59 -0
- data/bin/uniqc +3 -0
- data/bin/wu +34 -0
- data/bin/wu-clean-encoding +31 -0
- data/bin/wu-date +13 -0
- data/bin/wu-datetime +13 -0
- data/bin/wu-hist +3 -0
- data/bin/wu-lign +186 -0
- data/bin/wu-local +4 -0
- data/bin/wu-plus +9 -0
- data/bin/wu-source +5 -0
- data/bin/wu-sum +31 -0
- data/diagrams/wu_local.dot +39 -0
- data/diagrams/wu_local.dot.png +0 -0
- data/examples/Gemfile +38 -0
- data/examples/README.md +9 -0
- data/examples/basic/string_reverser.rb +23 -0
- data/examples/basic/tiny_count.rb +8 -0
- data/examples/basic/word_count/accumulator.rb +26 -0
- data/examples/basic/word_count/tokenizer.rb +13 -0
- data/examples/basic/word_count/word_count.rb +6 -0
- data/examples/dataflow/scraper_macro_flow.rb +28 -0
- data/examples/deploy_pack/Gemfile +6 -0
- data/examples/deploy_pack/README.md +6 -0
- data/examples/deploy_pack/a/b/c/.gitkeep +0 -0
- data/examples/deploy_pack/app/processors/string_reverser.rb +5 -0
- data/examples/deploy_pack/config/environment.rb +1 -0
- data/examples/dsl/dataflow/fibonacci_series.rb +101 -0
- data/examples/dsl/dataflow/scraper_macro_flow.rb +28 -0
- data/examples/dsl/dataflow/simple.rb +12 -0
- data/examples/dsl/dataflow/telegram.rb +45 -0
- data/examples/dsl/workflow/cherry_pie.dot +97 -0
- data/examples/dsl/workflow/cherry_pie.md +104 -0
- data/examples/dsl/workflow/cherry_pie.png +0 -0
- data/examples/dsl/workflow/cherry_pie.rb +101 -0
- data/examples/empty/.gitkeep +0 -0
- data/examples/examples_helper.rb +9 -0
- data/examples/geo.rb +4 -0
- data/examples/geo/geo_grids.numbers +0 -0
- data/examples/geo/geolocated.rb +331 -0
- data/examples/geo/quadtile.rb +69 -0
- data/examples/geo/spec/geolocated_spec.rb +247 -0
- data/examples/geo/tile_fetcher.rb +77 -0
- data/examples/graph/implied_geolocation/README.md +63 -0
- data/examples/graph/minimum_spanning_tree/airfares_graphviz.rb +73 -0
- data/examples/improver/tweet_summary.rb +73 -0
- data/examples/loadable.rb +2 -0
- data/examples/munging/airline_flights/airline_flights.rake +83 -0
- data/examples/munging/airline_flights/airplane.rb +0 -0
- data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
- data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
- data/examples/munging/airline_flights/indexable.rb +75 -0
- data/examples/munging/airline_flights/indexable_spec.rb +90 -0
- data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
- data/examples/munging/airline_flights/tasks.rake +83 -0
- data/examples/munging/airline_flights/topcities.rb +167 -0
- data/examples/munging/geo/geo_json.rb +54 -0
- data/examples/munging/geo/geo_models.rb +69 -0
- data/examples/munging/geo/geonames_models.rb +107 -0
- data/examples/munging/geo/iso_codes.rb +172 -0
- data/examples/munging/geo/reconcile_countries.rb +124 -0
- data/examples/munging/geo/tasks.rake +71 -0
- data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
- data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
- data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
- data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
- data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
- data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
- data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +5 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
- data/examples/munging/wikipedia/dbpedia/extract_links-cruft.rb +66 -0
- data/examples/munging/wikipedia/dbpedia/extract_links.rb +260 -0
- data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
- data/examples/rake_helper.rb +97 -0
- data/examples/ruby_project/Gemfile +6 -0
- data/examples/ruby_project/README.md +6 -0
- data/examples/ruby_project/a/b/c/.gitkeep +0 -0
- data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
- data/examples/server_logs/logline.rb +95 -0
- data/examples/server_logs/models.rb +66 -0
- data/examples/server_logs/page_counts.pig +48 -0
- data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
- data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
- data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
- data/examples/server_logs/server_logs-03-breadcrumbs-full.rb +71 -0
- data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
- data/examples/serverlogs/geo_ip_mapping/munge_geolite.rb +82 -0
- data/examples/serverlogs/models/logline.rb +102 -0
- data/examples/serverlogs/parser/apache_parser_widget.rb +46 -0
- data/examples/serverlogs/visit_paths/common.rb +4 -0
- data/examples/serverlogs/visit_paths/page_counts.pig +48 -0
- data/examples/serverlogs/visit_paths/serverlogs-01-parse-script.rb +11 -0
- data/examples/serverlogs/visit_paths/serverlogs-02-histograms-full.rb +31 -0
- data/examples/serverlogs/visit_paths/serverlogs-02-histograms-mapper.rb +12 -0
- data/examples/serverlogs/visit_paths/serverlogs-03-breadcrumbs-full.rb +67 -0
- data/examples/serverlogs/visit_paths/serverlogs-04-page_page_edges-full.rb +38 -0
- data/examples/splitter.rb +94 -0
- data/examples/string_reverser.rb +7 -0
- data/examples/text/pig_latin/pig_latinizer.rb +35 -0
- data/examples/text/pig_latin/pig_latinizer_widget.rb +16 -0
- data/examples/text/regional_flavor/README.md +14 -0
- data/examples/text/regional_flavor/article_wordbags.pig +39 -0
- data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
- data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
- data/examples/twitter.rb +5 -0
- data/lib/hanuman.rb +36 -0
- data/lib/hanuman/graph.rb +97 -0
- data/lib/hanuman/graphvizzer.rb +206 -0
- data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
- data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
- data/lib/hanuman/link.rb +35 -0
- data/lib/hanuman/registry.rb +46 -0
- data/lib/hanuman/stage.rb +128 -0
- data/lib/hanuman/tree.rb +67 -0
- data/lib/wu/geo.rb +4 -0
- data/lib/wu/geo/geo_grids.numbers +0 -0
- data/lib/wu/geo/geolocated.rb +331 -0
- data/lib/wu/geo/quadtile.rb +69 -0
- data/lib/wu/graph/union_find.rb +62 -0
- data/lib/wu/model/reconcilable.rb +63 -0
- data/lib/wu/munging.rb +71 -0
- data/lib/wu/social/models/twitter.rb +31 -0
- data/lib/wu/wikipedia/models.rb +20 -0
- data/lib/wukong.rb +54 -0
- data/lib/wukong/dataflow.rb +43 -0
- data/lib/wukong/doc_helpers.rb +14 -0
- data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
- data/lib/wukong/doc_helpers/field_handler.rb +91 -0
- data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
- data/lib/wukong/driver.rb +214 -0
- data/lib/wukong/driver/event_machine_driver.rb +15 -0
- data/lib/wukong/driver/wiring.rb +68 -0
- data/lib/wukong/local.rb +42 -0
- data/lib/wukong/local/runner.rb +96 -0
- data/lib/wukong/local/stdio_driver.rb +104 -0
- data/lib/wukong/logger.rb +102 -0
- data/lib/wukong/model/faker.rb +136 -0
- data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
- data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
- data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
- data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
- data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
- data/lib/wukong/plugin.rb +48 -0
- data/lib/wukong/processor.rb +110 -0
- data/lib/wukong/rake_helper.rb +6 -0
- data/lib/wukong/runner.rb +169 -0
- data/lib/wukong/runner/boot_sequence.rb +123 -0
- data/lib/wukong/runner/code_loader.rb +52 -0
- data/lib/wukong/runner/command_runner.rb +44 -0
- data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
- data/lib/wukong/runner/help_message.rb +42 -0
- data/lib/wukong/source.rb +33 -0
- data/lib/wukong/source/source_driver.rb +74 -0
- data/lib/wukong/source/source_runner.rb +38 -0
- data/lib/wukong/spec_helpers.rb +74 -0
- data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
- data/lib/wukong/spec_helpers/integration_tests/integration_test_matchers.rb +207 -0
- data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
- data/lib/wukong/spec_helpers/shared_examples.rb +22 -0
- data/lib/wukong/spec_helpers/unit_tests.rb +135 -0
- data/lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb +132 -0
- data/lib/wukong/spec_helpers/unit_tests/unit_test_matchers.rb +169 -0
- data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +60 -0
- data/lib/wukong/version.rb +3 -0
- data/lib/wukong/widget/echo.rb +55 -0
- data/lib/wukong/widget/extract.rb +122 -0
- data/lib/wukong/widget/filters.rb +452 -0
- data/lib/wukong/widget/logger.rb +56 -0
- data/lib/wukong/widget/operators.rb +82 -0
- data/lib/wukong/widget/reducers.rb +10 -0
- data/lib/wukong/widget/reducers/accumulator.rb +73 -0
- data/lib/wukong/widget/reducers/bin.rb +368 -0
- data/lib/wukong/widget/reducers/count.rb +73 -0
- data/lib/wukong/widget/reducers/group.rb +128 -0
- data/lib/wukong/widget/reducers/group_concat.rb +98 -0
- data/lib/wukong/widget/reducers/improver.rb +71 -0
- data/lib/wukong/widget/reducers/join_xml.rb +37 -0
- data/lib/wukong/widget/reducers/moments.rb +72 -0
- data/lib/wukong/widget/reducers/sort.rb +180 -0
- data/lib/wukong/widget/reducers/uniq.rb +91 -0
- data/lib/wukong/widget/serializers.rb +317 -0
- data/lib/wukong/widget/utils.rb +46 -0
- data/lib/wukong/widgets.rb +7 -0
- data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
- data/spec/examples/dataflow/parse_apache_logs_spec.rb +8 -0
- data/spec/examples/dataflow/parsing_spec.rb +14 -0
- data/spec/examples/dataflow/simple_spec.rb +34 -0
- data/spec/examples/dataflow/telegram_spec.rb +43 -0
- data/spec/examples/graph/minimum_spanning_tree_spec.rb +34 -0
- data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
- data/spec/examples/munging/airline_flights_spec.rb +202 -0
- data/spec/examples/text/pig_latin_spec.rb +18 -0
- data/spec/examples/workflow/cherry_pie_spec.rb +36 -0
- data/spec/hanuman/graph_spec.rb +119 -0
- data/spec/hanuman/hanuman_spec.rb +10 -0
- data/spec/hanuman/registry_spec.rb +123 -0
- data/spec/hanuman/stage_spec.rb +81 -0
- data/spec/hanuman/tree_spec.rb +119 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +43 -0
- data/spec/support/example_test_helpers.rb +95 -0
- data/spec/support/hanuman_test_helpers.rb +92 -0
- data/spec/support/integration_helper.rb +38 -0
- data/spec/support/model_test_helpers.rb +115 -0
- data/spec/support/shared_context_for_graphs.rb +57 -0
- data/spec/support/shared_context_for_reducers.rb +37 -0
- data/spec/support/shared_examples_for_builders.rb +94 -0
- data/spec/support/shared_examples_for_shortcuts.rb +57 -0
- data/spec/wu/model/reconcilable_spec.rb +152 -0
- data/spec/wukong/dataflow_spec.rb +87 -0
- data/spec/wukong/driver_spec.rb +154 -0
- data/spec/wukong/local/runner_spec.rb +29 -0
- data/spec/wukong/local/stdio_driver_spec.rb +73 -0
- data/spec/wukong/local_spec.rb +6 -0
- data/spec/wukong/logger_spec.rb +49 -0
- data/spec/wukong/model/faker_spec.rb +132 -0
- data/spec/wukong/processor_spec.rb +21 -0
- data/spec/wukong/runner_spec.rb +132 -0
- data/spec/wukong/source_spec.rb +6 -0
- data/spec/wukong/widget/extract_spec.rb +101 -0
- data/spec/wukong/widget/filters_spec.rb +79 -0
- data/spec/wukong/widget/logger_spec.rb +23 -0
- data/spec/wukong/widget/operators_spec.rb +25 -0
- data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
- data/spec/wukong/widget/reducers/count_spec.rb +11 -0
- data/spec/wukong/widget/reducers/group_spec.rb +21 -0
- data/spec/wukong/widget/reducers/join_xml_spec.rb +25 -0
- data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
- data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
- data/spec/wukong/widget/reducers/uniq_spec.rb +14 -0
- data/spec/wukong/widget/serializers_spec.rb +114 -0
- data/spec/wukong/widget/sink_spec.rb +19 -0
- data/spec/wukong/widget/source_spec.rb +65 -0
- data/spec/wukong/wu-local_spec.rb +109 -0
- data/spec/wukong/wu-source_spec.rb +32 -0
- data/spec/wukong/wu_spec.rb +14 -0
- data/spec/wukong/wukong_spec.rb +10 -0
- data/wukong.gemspec +35 -0
- metadata +465 -0
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'open3'
|
2
|
+
|
3
|
+
module Wukong
|
4
|
+
module SpecHelpers
|
5
|
+
|
6
|
+
# A runner for running commands in a subprocess.
|
7
|
+
class IntegrationTestRunner
|
8
|
+
|
9
|
+
# The command to execute
|
10
|
+
attr_accessor :cmd
|
11
|
+
|
12
|
+
# The directory in which to execute the command.
|
13
|
+
attr_accessor :cwd
|
14
|
+
|
15
|
+
# The ID of the spawned subprocess (while it was running).
|
16
|
+
attr_accessor :pid
|
17
|
+
|
18
|
+
# The STDOUT of the spawned process.
|
19
|
+
attr_accessor :stdout
|
20
|
+
|
21
|
+
# The STDERR of the spawned process.
|
22
|
+
attr_accessor :stderr
|
23
|
+
|
24
|
+
# The exit code of the spawned process.
|
25
|
+
attr_accessor :exit_code
|
26
|
+
|
27
|
+
# Run the command and capture its outputs and exit code.
|
28
|
+
#
|
29
|
+
# @return [true, false]
|
30
|
+
def run!
|
31
|
+
return false if ran?
|
32
|
+
FileUtils.cd(cwd) do
|
33
|
+
Open3.popen3(env, cmd) do |i, o, e, wait_thr|
|
34
|
+
self.pid = wait_thr.pid
|
35
|
+
|
36
|
+
@inputs.each { |input| i.puts(input) }
|
37
|
+
i.close
|
38
|
+
|
39
|
+
self.stdout = o.read
|
40
|
+
self.stderr = e.read
|
41
|
+
self.exit_code = wait_thr.value.to_i
|
42
|
+
end
|
43
|
+
end
|
44
|
+
@ran = true
|
45
|
+
end
|
46
|
+
|
47
|
+
# Initialize a new IntegrationTestRunner to run a given command.
|
48
|
+
def initialize args, options
|
49
|
+
@args = args
|
50
|
+
@env = options[:env]
|
51
|
+
@cwd = options[:cwd]
|
52
|
+
@inputs = []
|
53
|
+
end
|
54
|
+
|
55
|
+
def cmd
|
56
|
+
@args.compact.map(&:to_s).join(' ')
|
57
|
+
end
|
58
|
+
|
59
|
+
def on *events
|
60
|
+
@inputs.concat(events)
|
61
|
+
self
|
62
|
+
end
|
63
|
+
alias_method :<, :on
|
64
|
+
|
65
|
+
def in dir
|
66
|
+
@cwd = dir
|
67
|
+
self
|
68
|
+
end
|
69
|
+
|
70
|
+
def using env
|
71
|
+
@env = env
|
72
|
+
self
|
73
|
+
end
|
74
|
+
|
75
|
+
def env
|
76
|
+
ENV.to_hash.merge(@env || {})
|
77
|
+
end
|
78
|
+
|
79
|
+
def ran?
|
80
|
+
@ran
|
81
|
+
end
|
82
|
+
|
83
|
+
def cmd_summary
|
84
|
+
[
|
85
|
+
cmd,
|
86
|
+
"with env #{env_summary}",
|
87
|
+
"in dir #{cwd}"
|
88
|
+
].join("\n")
|
89
|
+
end
|
90
|
+
|
91
|
+
def env_summary
|
92
|
+
{ "PATH" => env["PATH"], "RUBYLIB" => env["RUBYLIB"] }.inspect
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
shared_examples_for 'a processor' do |options = {}|
|
2
|
+
it 'is registered' do
|
3
|
+
Wukong.registry.retrieve(options[:named].to_sym).should_not be_nil
|
4
|
+
end
|
5
|
+
it{ processor(options[:named]).processor.should respond_to(:setup) }
|
6
|
+
it{ processor(options[:named]).processor.should respond_to(:process) }
|
7
|
+
it{ processor(options[:named]).processor.should respond_to(:finalize) }
|
8
|
+
it{ processor(options[:named]).processor.should respond_to(:stop) }
|
9
|
+
end
|
10
|
+
|
11
|
+
shared_examples_for 'a plugin' do |options = {}|
|
12
|
+
it "is registered as a Wukong plugin " do
|
13
|
+
Wukong::PLUGINS.should include(subject)
|
14
|
+
end
|
15
|
+
it { should respond_to(:configure) }
|
16
|
+
it { should respond_to(:boot) }
|
17
|
+
end
|
18
|
+
|
19
|
+
shared_examples_for 'a model class' do |options = {}|
|
20
|
+
it { should respond_to(:receive) }
|
21
|
+
its(:new) { should respond_to(:to_wire) }
|
22
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
require_relative('unit_tests/unit_test_driver')
|
2
|
+
require_relative('unit_tests/unit_test_runner')
|
3
|
+
require_relative('unit_tests/unit_test_matchers')
|
4
|
+
|
5
|
+
module Wukong
|
6
|
+
module SpecHelpers
|
7
|
+
|
8
|
+
# This module defines helpers that are useful when running unit
|
9
|
+
# tests for processors.
|
10
|
+
module UnitTests
|
11
|
+
|
12
|
+
# Create and boot up a runner of the given `klass`.
|
13
|
+
#
|
14
|
+
# Options to the runner class are given in the `args` Array.
|
15
|
+
# The last element of this Array can be a Hash of options to
|
16
|
+
# directly pass to the runner (especially useful in unit tests).
|
17
|
+
# The rest of the elements are strings that will be parsed as
|
18
|
+
# though they were command-line arguments.
|
19
|
+
#
|
20
|
+
# @example Create a runner that simulates `wu-local` with a set of arguments
|
21
|
+
#
|
22
|
+
# runner Wukong::Local::LocalRunner, 'wu-local', '--foo=bar', '--baz=boof', wof: 'bing'
|
23
|
+
#
|
24
|
+
# A passed block will be eval'd in the context of the newlyl
|
25
|
+
# created runner instance. This can be used to interact with
|
26
|
+
# the runner's insides after initialization.
|
27
|
+
#
|
28
|
+
# @example Create a custom runner and set a property on it
|
29
|
+
#
|
30
|
+
# runner(CustomRunner, 'wu-custom', '--foo=bar') do
|
31
|
+
# # eval'd in scope of new runner instance
|
32
|
+
# do_some_special_thing!
|
33
|
+
# end
|
34
|
+
#
|
35
|
+
# @param [Class] klass
|
36
|
+
# @param [String] program_name
|
37
|
+
# @param [Array<String>, Hash] args
|
38
|
+
def runner klass, program_name, *args, &block
|
39
|
+
settings = args.extract_options!
|
40
|
+
|
41
|
+
ARGV.replace(args.map(&:to_s))
|
42
|
+
|
43
|
+
klass.new.tap do |the_runner|
|
44
|
+
the_runner.program_name = program_name
|
45
|
+
the_runner.instance_eval(&block) if block_given?
|
46
|
+
the_runner.boot!(settings)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Create a runner for unit tests in a variety of convenient
|
51
|
+
# ways.
|
52
|
+
#
|
53
|
+
# Most simply, called without args, will return a UnitTestRunner
|
54
|
+
# a the klass named in the containing `describe` or `context`:
|
55
|
+
#
|
56
|
+
# context MyApp::Tokenizer do
|
57
|
+
# it "uses whitespace as the default separator between tokens" do
|
58
|
+
# processor.separator.should == /\s+/
|
59
|
+
# end
|
60
|
+
# end
|
61
|
+
#
|
62
|
+
# if your processor has been registered (you created it with the
|
63
|
+
# <tt>Wukong.processor</tt> helper method or otherwise
|
64
|
+
# registered it yourself) then you can use its name:
|
65
|
+
#
|
66
|
+
# context :tokenizer do
|
67
|
+
# it "uses whitespace as the default separator between tokens" do
|
68
|
+
# processor.separator.should == /\s+/
|
69
|
+
# end
|
70
|
+
# end
|
71
|
+
#
|
72
|
+
# The `processor` method can also be used inside RSpec's
|
73
|
+
# `subject` and `let` methods:
|
74
|
+
#
|
75
|
+
# context "with no arguments" do
|
76
|
+
# subject { processor }
|
77
|
+
# it "uses whitespace as the default separator between tokens" do
|
78
|
+
# separator.should == /\s+/
|
79
|
+
# end
|
80
|
+
# end
|
81
|
+
# end
|
82
|
+
#
|
83
|
+
# and you can easily pass arguments, just like you would on the
|
84
|
+
# command line or in a dataflow definition:
|
85
|
+
#
|
86
|
+
# context "with arguments" do
|
87
|
+
# subject { processor(separator: ' ') }
|
88
|
+
# it "uses whitespace as the default separator between tokens" do
|
89
|
+
# separator.should == ' '
|
90
|
+
# end
|
91
|
+
# end
|
92
|
+
# end
|
93
|
+
#
|
94
|
+
# You can even name the processor directly if you want to:
|
95
|
+
#
|
96
|
+
# context "tokenizers" do
|
97
|
+
# let(:default_tokenizer) { processor(:tokenizer) }
|
98
|
+
# let(:complex_tokenizer) { processor(:complex_tokenizer, stemming: true) }
|
99
|
+
# let(:french_tokenizer) { processor(:complex_tokenizer, stemming: true, language: 'fr') }
|
100
|
+
# ...
|
101
|
+
# end
|
102
|
+
def unit_test_runner *args, &block
|
103
|
+
settings = args.extract_options!
|
104
|
+
name = (args.first || self.class.description)
|
105
|
+
UnitTestRunner.new(name, settings).tap do |the_runner|
|
106
|
+
the_runner.program_name = 'wu-local'
|
107
|
+
yield the_runner.driver.processor if block_given?
|
108
|
+
the_runner.boot!(settings)
|
109
|
+
end.driver
|
110
|
+
end
|
111
|
+
alias_method :processor, :unit_test_runner
|
112
|
+
|
113
|
+
def emit *expected
|
114
|
+
UnitTestMatcher.new(*expected)
|
115
|
+
end
|
116
|
+
|
117
|
+
def emit_json *expected
|
118
|
+
JsonMatcher.new(*expected)
|
119
|
+
end
|
120
|
+
|
121
|
+
def emit_delimited delimiter, *expected
|
122
|
+
DelimiterMatcher.new(delimiter, *expected)
|
123
|
+
end
|
124
|
+
|
125
|
+
def emit_tsv *expected
|
126
|
+
TsvMatcher.new(*expected)
|
127
|
+
end
|
128
|
+
|
129
|
+
def emit_csv *expected
|
130
|
+
CsvMatcher.new(*expected)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
module Wukong
|
2
|
+
module SpecHelpers
|
3
|
+
class UnitTestDriver < Array
|
4
|
+
|
5
|
+
include Wukong::DriverMethods
|
6
|
+
|
7
|
+
def initialize label, settings={}
|
8
|
+
super()
|
9
|
+
construct_dataflow(label, settings)
|
10
|
+
setup_dataflow
|
11
|
+
end
|
12
|
+
|
13
|
+
def process output
|
14
|
+
self << output
|
15
|
+
end
|
16
|
+
|
17
|
+
def run
|
18
|
+
return false unless dataflow
|
19
|
+
given_records.each do |input|
|
20
|
+
send_through_dataflow(input)
|
21
|
+
end
|
22
|
+
finalize_and_stop_dataflow
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
def processor
|
27
|
+
dataflow.root
|
28
|
+
end
|
29
|
+
|
30
|
+
# An array of accumulated records to process come match-time.
|
31
|
+
attr_reader :given_records
|
32
|
+
|
33
|
+
# Give a collection of records to the processor.
|
34
|
+
#
|
35
|
+
# @param [Array] records
|
36
|
+
def given *records
|
37
|
+
@given_records ||= []
|
38
|
+
@given_records.concat(records)
|
39
|
+
self # for chaining
|
40
|
+
end
|
41
|
+
|
42
|
+
# Give a collection of records to the processor but turn each
|
43
|
+
# to JSON first.
|
44
|
+
#
|
45
|
+
# @param [Array] records
|
46
|
+
def given_json *records
|
47
|
+
self.given(*records.map { |record| MultiJson.dump(record) })
|
48
|
+
end
|
49
|
+
|
50
|
+
# Give a collection of records to the processor but join each
|
51
|
+
# in a delimited format first.
|
52
|
+
#
|
53
|
+
# @param [Array] records
|
54
|
+
def given_delimited delimiter, *records
|
55
|
+
self.given(*records.map do |record|
|
56
|
+
record.map(&:to_s).join(delimiter)
|
57
|
+
end.join("\n"))
|
58
|
+
end
|
59
|
+
|
60
|
+
# Give a collection of records to the processor but join each
|
61
|
+
# in TSV format first.
|
62
|
+
#
|
63
|
+
# @param [Array] records
|
64
|
+
def given_tsv *records
|
65
|
+
self.given_delimited("\t", *records)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Give a collection of records to the processor but join each
|
69
|
+
# in CSV format first.
|
70
|
+
#
|
71
|
+
# @param [Array] records
|
72
|
+
def given_csv *records
|
73
|
+
self.given_delimited(",", *records)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Return the output of the processor on the given records.
|
77
|
+
#
|
78
|
+
# Calling this method, like passing the processor to an `emit`
|
79
|
+
# matcher, will trigger processing of all the given records.
|
80
|
+
#
|
81
|
+
# Returns a UnitTestDriver, which is a subclass of array, so the
|
82
|
+
# usual matchers like `include` and so on should work, as well
|
83
|
+
# as explicitly indexing to introspect on particular records.
|
84
|
+
#
|
85
|
+
# @return [UnitTestDriver]
|
86
|
+
def output
|
87
|
+
run
|
88
|
+
self
|
89
|
+
end
|
90
|
+
|
91
|
+
# Return the output of the processor on the given records,
|
92
|
+
# parsing as a string with the given `delimiter` first.
|
93
|
+
#
|
94
|
+
# @param [String] delimiter
|
95
|
+
# @see #output
|
96
|
+
# @return [Array<String>]
|
97
|
+
def delimited_output(delimiter)
|
98
|
+
output.map { |record| record.split(delimiter) }
|
99
|
+
end
|
100
|
+
|
101
|
+
# Return the output of the processor on the given records,
|
102
|
+
# parsing as TSV first.
|
103
|
+
#
|
104
|
+
# @see #output
|
105
|
+
# @see #delimited_output
|
106
|
+
# @return [Array<String>]
|
107
|
+
def tsv_output
|
108
|
+
delimited_output("\t")
|
109
|
+
end
|
110
|
+
|
111
|
+
# Return the output of the processor on the given records,
|
112
|
+
# parsing as CSV first.
|
113
|
+
#
|
114
|
+
# @see #output
|
115
|
+
# @see #delimited_output
|
116
|
+
# @return [Array<String>]
|
117
|
+
def csv_output
|
118
|
+
delimited_output(",")
|
119
|
+
end
|
120
|
+
|
121
|
+
# Return the output of the processor on the given records,
|
122
|
+
# parsing as JSONS first.
|
123
|
+
#
|
124
|
+
# @see #output
|
125
|
+
# @return [Hash,Array]
|
126
|
+
def json_output
|
127
|
+
output.map { |record| MultiJson.load(record) }
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
module Wukong
|
2
|
+
module SpecHelpers
|
3
|
+
class UnitTestMatcher
|
4
|
+
|
5
|
+
attr_accessor :driver, :expected, :reason, :expected_record, :actual_record, :mismatched_index
|
6
|
+
|
7
|
+
def matches?(driver)
|
8
|
+
self.driver = driver
|
9
|
+
driver.run
|
10
|
+
if actual_size != expected_size
|
11
|
+
self.reason = :size
|
12
|
+
return false
|
13
|
+
end
|
14
|
+
return true if just_count?
|
15
|
+
expected.each_with_index do |expectation, index|
|
16
|
+
actual = output[index]
|
17
|
+
if actual != expectation
|
18
|
+
self.reason = :element
|
19
|
+
self.expected_record = expectation
|
20
|
+
self.actual_record = actual
|
21
|
+
self.mismatched_index = index
|
22
|
+
return false
|
23
|
+
end
|
24
|
+
end
|
25
|
+
true
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize *expected
|
29
|
+
self.expected = expected
|
30
|
+
end
|
31
|
+
|
32
|
+
def failure_message
|
33
|
+
if reason == :size
|
34
|
+
"Expected #{expected_size} records, got #{actual_size}:\n\n#{pretty_output}"
|
35
|
+
else
|
36
|
+
"Expected the #{ordinalize(mismatched_index)} record to be#{parse_modifier}\n\n#{expected_record}\n\nbut got\n\n#{pretty_output}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def negative_failure_message
|
41
|
+
if reason == :size
|
42
|
+
"Expected to NOT get #{expected_size} records:\n\n#{output}"
|
43
|
+
else
|
44
|
+
"Expected the #{ordinalize(mismatched_index)} record to NOT be#{parse_modifier}\n\n#{pretty_output}"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def records
|
49
|
+
@just_count = true
|
50
|
+
self # chaining
|
51
|
+
end
|
52
|
+
alias_method :record, :records
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def just_count?
|
57
|
+
@just_count
|
58
|
+
end
|
59
|
+
|
60
|
+
def actual_size
|
61
|
+
driver.size
|
62
|
+
end
|
63
|
+
|
64
|
+
def expected_size
|
65
|
+
just_count? ? expected.first.to_i : expected.size
|
66
|
+
end
|
67
|
+
|
68
|
+
def output
|
69
|
+
driver
|
70
|
+
end
|
71
|
+
|
72
|
+
def parse_modifier
|
73
|
+
end
|
74
|
+
|
75
|
+
def pretty_output
|
76
|
+
[].tap do |pretty|
|
77
|
+
output.each_with_index do |record, index|
|
78
|
+
s = (record.is_a?(String) ? record : record.inspect)
|
79
|
+
prefix = case
|
80
|
+
when output.size > 1 && index == mismatched_index
|
81
|
+
" => "
|
82
|
+
when output.size > 1
|
83
|
+
" "
|
84
|
+
else
|
85
|
+
''
|
86
|
+
end
|
87
|
+
pretty << [prefix,s].join('')
|
88
|
+
end
|
89
|
+
end.join("\n")
|
90
|
+
end
|
91
|
+
|
92
|
+
# http://stackoverflow.com/questions/1081926/how-do-i-format-a-date-in-ruby-to-include-rd-as-in-3rd
|
93
|
+
def ordinalize array_index
|
94
|
+
n = array_index + 1
|
95
|
+
if (11..13).include?(n % 100)
|
96
|
+
"#{n}th"
|
97
|
+
else
|
98
|
+
case n % 10
|
99
|
+
when 1; "#{n}st"
|
100
|
+
when 2; "#{n}nd"
|
101
|
+
when 3; "#{n}rd"
|
102
|
+
else "#{n}th"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
class JsonMatcher < UnitTestMatcher
|
109
|
+
def output
|
110
|
+
driver.map do |record|
|
111
|
+
begin
|
112
|
+
MultiJson.load(record)
|
113
|
+
rescue => e
|
114
|
+
raise Error.new("Could not parse output of dataflow as JSON: \n\n#{record}")
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
def parse_modifier
|
119
|
+
' (after parsing as JSON)'
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
class DelimitedMatcher < UnitTestMatcher
|
124
|
+
|
125
|
+
attr_accessor :delimiter
|
126
|
+
|
127
|
+
def initialize delimiter, *expected
|
128
|
+
self.delimiter = delimiter
|
129
|
+
super(*expected)
|
130
|
+
end
|
131
|
+
|
132
|
+
def output
|
133
|
+
driver.map do |record|
|
134
|
+
begin
|
135
|
+
record.to_s.split(delimiter)
|
136
|
+
rescue => e
|
137
|
+
raise Error.new("Could not parse as #{delimited_type}': \n\n#{record}")
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def delimited_type
|
143
|
+
"'#{delimiter}-delimited'"
|
144
|
+
end
|
145
|
+
|
146
|
+
def parse_modifier
|
147
|
+
" (after parsing as #{delimited_type})"
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
class TsvMatcher < DelimitedMatcher
|
152
|
+
def initialize *expected
|
153
|
+
super("\t", *expected)
|
154
|
+
end
|
155
|
+
def delimited_type
|
156
|
+
"TSV"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
class CsvMatcher < DelimitedMatcher
|
161
|
+
def initialize *expected
|
162
|
+
super(",", *expected)
|
163
|
+
end
|
164
|
+
def delimited_type
|
165
|
+
"CSV"
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|