wukong 3.0.0.pre3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/Gemfile +1 -0
  2. data/README.md +689 -50
  3. data/bin/wu-local +1 -74
  4. data/diagrams/wu_local.dot +39 -0
  5. data/diagrams/wu_local.dot.png +0 -0
  6. data/examples/loadable.rb +2 -0
  7. data/examples/string_reverser.rb +7 -0
  8. data/lib/hanuman/stage.rb +2 -2
  9. data/lib/wukong.rb +21 -10
  10. data/lib/wukong/dataflow.rb +2 -5
  11. data/lib/wukong/doc_helpers.rb +14 -0
  12. data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
  13. data/lib/wukong/doc_helpers/field_handler.rb +91 -0
  14. data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
  15. data/lib/wukong/driver.rb +11 -1
  16. data/lib/wukong/local.rb +40 -0
  17. data/lib/wukong/local/event_machine_driver.rb +27 -0
  18. data/lib/wukong/local/runner.rb +98 -0
  19. data/lib/wukong/local/stdio_driver.rb +44 -0
  20. data/lib/wukong/local/tcp_driver.rb +47 -0
  21. data/lib/wukong/logger.rb +16 -7
  22. data/lib/wukong/plugin.rb +48 -0
  23. data/lib/wukong/processor.rb +57 -15
  24. data/lib/wukong/rake_helper.rb +6 -0
  25. data/lib/wukong/runner.rb +151 -128
  26. data/lib/wukong/runner/boot_sequence.rb +123 -0
  27. data/lib/wukong/runner/code_loader.rb +52 -0
  28. data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
  29. data/lib/wukong/runner/help_message.rb +42 -0
  30. data/lib/wukong/spec_helpers.rb +4 -12
  31. data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
  32. data/lib/wukong/spec_helpers/{integration_driver_matchers.rb → integration_tests/integration_test_matchers.rb} +28 -62
  33. data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
  34. data/lib/wukong/spec_helpers/shared_examples.rb +19 -10
  35. data/lib/wukong/spec_helpers/unit_tests.rb +134 -0
  36. data/lib/wukong/spec_helpers/{processor_methods.rb → unit_tests/unit_test_driver.rb} +42 -8
  37. data/lib/wukong/spec_helpers/{spec_driver_matchers.rb → unit_tests/unit_test_matchers.rb} +6 -32
  38. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +54 -0
  39. data/lib/wukong/version.rb +1 -1
  40. data/lib/wukong/widget/filters.rb +134 -8
  41. data/lib/wukong/widget/processors.rb +64 -5
  42. data/lib/wukong/widget/reducers/bin.rb +68 -18
  43. data/lib/wukong/widget/reducers/count.rb +12 -0
  44. data/lib/wukong/widget/reducers/group.rb +48 -5
  45. data/lib/wukong/widget/reducers/group_concat.rb +30 -2
  46. data/lib/wukong/widget/reducers/moments.rb +4 -4
  47. data/lib/wukong/widget/reducers/sort.rb +53 -3
  48. data/lib/wukong/widget/serializers.rb +37 -12
  49. data/lib/wukong/widget/utils.rb +1 -1
  50. data/spec/spec_helper.rb +20 -2
  51. data/spec/wukong/driver_spec.rb +2 -0
  52. data/spec/wukong/local/runner_spec.rb +40 -0
  53. data/spec/wukong/local_spec.rb +6 -0
  54. data/spec/wukong/logger_spec.rb +49 -0
  55. data/spec/wukong/processor_spec.rb +22 -0
  56. data/spec/wukong/runner_spec.rb +128 -8
  57. data/spec/wukong/widget/filters_spec.rb +28 -10
  58. data/spec/wukong/widget/processors_spec.rb +5 -5
  59. data/spec/wukong/widget/reducers/bin_spec.rb +14 -14
  60. data/spec/wukong/widget/reducers/count_spec.rb +1 -1
  61. data/spec/wukong/widget/reducers/group_spec.rb +7 -6
  62. data/spec/wukong/widget/reducers/moments_spec.rb +2 -2
  63. data/spec/wukong/widget/reducers/sort_spec.rb +1 -1
  64. data/spec/wukong/widget/serializers_spec.rb +84 -88
  65. data/spec/wukong/wu-local_spec.rb +109 -0
  66. metadata +43 -20
  67. data/bin/wu-server +0 -70
  68. data/lib/wukong/boot.rb +0 -96
  69. data/lib/wukong/configuration.rb +0 -8
  70. data/lib/wukong/emitter.rb +0 -22
  71. data/lib/wukong/server.rb +0 -119
  72. data/lib/wukong/spec_helpers/integration_driver.rb +0 -157
  73. data/lib/wukong/spec_helpers/processor_helpers.rb +0 -89
  74. data/lib/wukong/spec_helpers/spec_driver.rb +0 -28
  75. data/spec/wukong/local_runner_spec.rb +0 -31
  76. data/spec/wukong/wu_local_spec.rb +0 -125
@@ -0,0 +1,97 @@
1
+ require 'open3'
2
+
3
+ module Wukong
4
+ module SpecHelpers
5
+
6
+ # A runner for running commands in a subprocess.
7
+ class IntegrationTestRunner
8
+
9
+ # The command to execute
10
+ attr_accessor :cmd
11
+
12
+ # The directory in which to execute the command.
13
+ attr_accessor :cwd
14
+
15
+ # The ID of the spawned subprocess (while it was running).
16
+ attr_accessor :pid
17
+
18
+ # The STDOUT of the spawned process.
19
+ attr_accessor :stdout
20
+
21
+ # The STDERR of the spawned process.
22
+ attr_accessor :stderr
23
+
24
+ # The exit code of the spawned process.
25
+ attr_accessor :exit_code
26
+
27
+ # Run the command and capture its outputs and exit code.
28
+ #
29
+ # @return [true, false]
30
+ def run!
31
+ return false if ran?
32
+ FileUtils.cd(cwd) do
33
+ Open3.popen3(env, cmd) do |i, o, e, wait_thr|
34
+ self.pid = wait_thr.pid
35
+
36
+ @inputs.each { |input| i.puts(input) }
37
+ i.close
38
+
39
+ self.stdout = o.read
40
+ self.stderr = e.read
41
+ self.exit_code = wait_thr.value.to_i
42
+ end
43
+ end
44
+ @ran = true
45
+ end
46
+
47
+ # Initialize a new IntegrationTestRunner to run a given command.
48
+ def initialize args, options
49
+ @args = args
50
+ @env = options[:env]
51
+ @cwd = options[:cwd]
52
+ @inputs = []
53
+ end
54
+
55
+ def cmd
56
+ @args.compact.map(&:to_s).join(' ')
57
+ end
58
+
59
+ def on *events
60
+ @inputs.concat(events)
61
+ self
62
+ end
63
+ alias_method :<, :on
64
+
65
+ def in dir
66
+ @cwd = dir
67
+ self
68
+ end
69
+
70
+ def using env
71
+ @env = env
72
+ self
73
+ end
74
+
75
+ def env
76
+ ENV.to_hash.merge(@env || {})
77
+ end
78
+
79
+ def ran?
80
+ @ran
81
+ end
82
+
83
+ def cmd_summary
84
+ [
85
+ cmd,
86
+ "with env #{env_summary}",
87
+ "in dir #{cwd}"
88
+ ].join("\n")
89
+ end
90
+
91
+ def env_summary
92
+ { "PATH" => env["PATH"], "RUBYLIB" => env["RUBYLIB"] }.inspect
93
+ end
94
+
95
+ end
96
+ end
97
+ end
@@ -1,14 +1,23 @@
1
1
  shared_examples_for 'a processor' do |options = {}|
2
- let(:processor_name){ options[:named] || self.class.top_level_description }
3
- subject { create_processor(processor_name, on_error: :skip) }
4
-
5
2
  it 'is registered' do
6
- Wukong.registry.retrieve(processor_name.to_sym).should_not be_nil
3
+ Wukong.registry.retrieve(options[:named].to_sym).should_not be_nil
4
+ end
5
+ it{ processor(options[:named]).processor.should respond_to(:setup) }
6
+ it{ processor(options[:named]).processor.should respond_to(:process) }
7
+ it{ processor(options[:named]).processor.should respond_to(:finalize) }
8
+ it{ processor(options[:named]).processor.should respond_to(:stop) }
9
+ it{ processor(options[:named]).processor.should respond_to(:notify) }
10
+ end
11
+
12
+ shared_examples_for 'a plugin' do |options = {}|
13
+ it "is registered as a Wukong plugin " do
14
+ Wukong::PLUGINS.should include(subject)
7
15
  end
8
-
9
- it{ should respond_to(:setup) }
10
- it{ should respond_to(:process) }
11
- it{ should respond_to(:finalize) }
12
- it{ should respond_to(:stop) }
13
- it{ should respond_to(:notify) }
16
+ it { should respond_to(:configure) }
17
+ it { should respond_to(:boot) }
18
+ end
19
+
20
+ shared_examples_for 'a model class' do |options = {}|
21
+ it { should respond_to(:receive) }
22
+ its(:new) { should respond_to(:to_wire) }
14
23
  end
@@ -0,0 +1,134 @@
1
+ require_relative('unit_tests/unit_test_driver')
2
+ require_relative('unit_tests/unit_test_runner')
3
+ require_relative('unit_tests/unit_test_matchers')
4
+
5
+ module Wukong
6
+ module SpecHelpers
7
+
8
+ # This module defines helpers that are useful when running unit
9
+ # tests for processors.
10
+ module UnitTests
11
+
12
+ # Create and boot up a runner of the given `klass`.
13
+ #
14
+ # Options to the runner class are given in the `args` Array.
15
+ # The last element of this Array can be a Hash of options to
16
+ # directly pass to the runner (especially useful in unit tests).
17
+ # The rest of the elements are strings that will be parsed as
18
+ # though they were command-line arguments.
19
+ #
20
+ # @example Create a runner that simulates `wu-local` with a set of arguments
21
+ #
22
+ # runner Wukong::Local::LocalRunner, 'wu-local', '--foo=bar', '--baz=boof', wof: 'bing'
23
+ #
24
+ # A passed block will be eval'd in the context of the newlyl
25
+ # created runner instance. This can be used to interact with
26
+ # the runner's insides after initialization.
27
+ #
28
+ # @example Create a custom runner and set a property on it
29
+ #
30
+ # runner(CustomRunner, 'wu-custom', '--foo=bar') do
31
+ # # eval'd in scope of new runner instance
32
+ # do_some_special_thing!
33
+ # end
34
+ #
35
+ # @param [Class] klass
36
+ # @param [String] program_name
37
+ # @param [Array<String>, Hash] args
38
+ def runner klass, program_name, *args, &block
39
+ settings = args.extract_options!
40
+
41
+ ARGV.replace(args.map(&:to_s))
42
+
43
+ klass.new.tap do |the_runner|
44
+ the_runner.program_name = program_name
45
+ the_runner.instance_eval(&block) if block_given?
46
+ the_runner.boot!(settings)
47
+ end
48
+ end
49
+
50
+ # Create a runner for unit tests in a variety of convenient
51
+ # ways.
52
+ #
53
+ # Most simply, called without args, will return a UnitTestRunner
54
+ # a the klass named in the containing `describe` or `context`:
55
+ #
56
+ # context MyApp::Tokenizer do
57
+ # it "uses whitespace as the default separator between tokens" do
58
+ # processor.separator.should == /\s+/
59
+ # end
60
+ # end
61
+ #
62
+ # if your processor has been registered (you created it with the
63
+ # <tt>Wukong.processor</tt> helper method or otherwise
64
+ # registered it yourself) then you can use its name:
65
+ #
66
+ # context :tokenizer do
67
+ # it "uses whitespace as the default separator between tokens" do
68
+ # processor.separator.should == /\s+/
69
+ # end
70
+ # end
71
+ #
72
+ # The `processor` method can also be used inside RSpec's
73
+ # `subject` and `let` methods:
74
+ #
75
+ # context "with no arguments" do
76
+ # subject { processor }
77
+ # it "uses whitespace as the default separator between tokens" do
78
+ # separator.should == /\s+/
79
+ # end
80
+ # end
81
+ # end
82
+ #
83
+ # and you can easily pass arguments, just like you would on the
84
+ # command line or in a dataflow definition:
85
+ #
86
+ # context "with arguments" do
87
+ # subject { processor(separator: ' ') }
88
+ # it "uses whitespace as the default separator between tokens" do
89
+ # separator.should == ' '
90
+ # end
91
+ # end
92
+ # end
93
+ #
94
+ # You can even name the processor directly if you want to:
95
+ #
96
+ # context "tokenizers" do
97
+ # let(:default_tokenizer) { processor(:tokenizer) }
98
+ # let(:complex_tokenizer) { processor(:complex_tokenizer, stemming: true) }
99
+ # let(:french_tokenizer) { processor(:complex_tokenizer, stemming: true, language: 'fr') }
100
+ # ...
101
+ # end
102
+ def unit_test_runner *args
103
+ settings = args.extract_options!
104
+ name = (args.first || self.class.description)
105
+ runner = UnitTestRunner.new(name, settings)
106
+ yield runner.driver.processor if block_given?
107
+ runner.boot!
108
+ runner.driver
109
+ end
110
+ alias_method :processor, :unit_test_runner
111
+
112
+ def emit *expected
113
+ UnitTestMatcher.new(*expected)
114
+ end
115
+
116
+ def emit_json *expected
117
+ JsonMatcher.new(*expected)
118
+ end
119
+
120
+ def emit_delimited delimiter, *expected
121
+ DelimiterMatcher.new(delimiter, *expected)
122
+ end
123
+
124
+ def emit_tsv *expected
125
+ TsvMatcher.new(*expected)
126
+ end
127
+
128
+ def emit_csv *expected
129
+ CsvMatcher.new(*expected)
130
+ end
131
+ end
132
+
133
+ end
134
+ end
@@ -1,9 +1,42 @@
1
1
  module Wukong
2
2
  module SpecHelpers
3
- # This module defines methods to be included into the
4
- # Wukong::Processor class.
5
- module ProcessorSpecMethods
6
-
3
+ class UnitTestDriver < Array
4
+
5
+ include Wukong::DriverMethods
6
+
7
+ def initialize label, settings
8
+ super()
9
+ @settings = settings
10
+ @dataflow = construct_dataflow(label, settings)
11
+ setup_dataflow
12
+ end
13
+
14
+ def setup
15
+ end
16
+
17
+ def finalize
18
+ end
19
+
20
+ def stop
21
+ end
22
+
23
+ def process output
24
+ self << output
25
+ end
26
+
27
+ def run
28
+ return false unless dataflow
29
+ given_records.each do |input|
30
+ driver.send_through_dataflow(input)
31
+ end
32
+ finalize_and_stop_dataflow
33
+ self
34
+ end
35
+
36
+ def processor
37
+ dataflow.first
38
+ end
39
+
7
40
  # An array of accumulated records to process come match-time.
8
41
  attr_reader :given_records
9
42
 
@@ -55,13 +88,14 @@ module Wukong
55
88
  # Calling this method, like passing the processor to an `emit`
56
89
  # matcher, will trigger processing of all the given records.
57
90
  #
58
- # Returns a SpecDriver, which is a subclass of array, so the
91
+ # Returns a UnitTestDriver, which is a subclass of array, so the
59
92
  # usual matchers like `include` and so on should work, as well
60
93
  # as explicitly indexing to introspect on particular records.
61
94
  #
62
- # @return [SpecDriver]
95
+ # @return [UnitTestDriver]
63
96
  def output
64
- SpecDriver.new(self).run
97
+ run
98
+ self
65
99
  end
66
100
 
67
101
  # Return the output of the processor on the given records,
@@ -102,7 +136,7 @@ module Wukong
102
136
  def json_output
103
137
  output.map { |record| MultiJson.load(record) }
104
138
  end
105
-
139
+
106
140
  end
107
141
  end
108
142
  end
@@ -1,37 +1,11 @@
1
- require_relative('spec_driver')
2
-
3
1
  module Wukong
4
2
  module SpecHelpers
5
-
6
- module SpecMatchers
7
-
8
- def emit *expected
9
- EmitMatcher.new(*expected)
10
- end
11
-
12
- def emit_json *expected
13
- JsonMatcher.new(*expected)
14
- end
15
-
16
- def emit_delimited delimiter, *expected
17
- DelimiterMatcher.new(delimiter, *expected)
18
- end
19
-
20
- def emit_tsv *expected
21
- TsvMatcher.new(*expected)
22
- end
23
-
24
- def emit_csv *expected
25
- CsvMatcher.new(*expected)
26
- end
27
- end
28
-
29
- class EmitMatcher
3
+ class UnitTestMatcher
30
4
 
31
5
  attr_accessor :driver, :expected, :reason, :expected_record, :actual_record, :mismatched_index
32
6
 
33
- def matches?(processor)
34
- self.driver = SpecDriver.new(processor)
7
+ def matches?(driver)
8
+ self.driver = driver
35
9
  driver.run
36
10
  if actual_size != expected_size
37
11
  self.reason = :size
@@ -131,13 +105,13 @@ module Wukong
131
105
  end
132
106
  end
133
107
 
134
- class JsonMatcher < EmitMatcher
108
+ class JsonMatcher < UnitTestMatcher
135
109
  def output
136
110
  driver.map do |record|
137
111
  begin
138
112
  MultiJson.load(record)
139
113
  rescue => e
140
- raise Error.new("Could not parse output of processor as JSON: \n\n#{record}")
114
+ raise Error.new("Could not parse output of dataflow as JSON: \n\n#{record}")
141
115
  end
142
116
  end
143
117
  end
@@ -146,7 +120,7 @@ module Wukong
146
120
  end
147
121
  end
148
122
 
149
- class DelimitedMatcher < EmitMatcher
123
+ class DelimitedMatcher < UnitTestMatcher
150
124
 
151
125
  attr_accessor :delimiter
152
126
 
@@ -0,0 +1,54 @@
1
+ module Wukong
2
+ module SpecHelpers
3
+
4
+ # A class for controlling the Wukong boot sequence from within
5
+ # unit tests.
6
+ #
7
+ # Subclasses the Wukong::Local::LocalRunner with which it shares
8
+ # most of its behavior:
9
+ #
10
+ # * Initialization is slightly different, to allow for each
11
+ # separate unit test in a suite to use a different
12
+ # Configliere::Param object for settings
13
+ #
14
+ # * The driver is the UnitTestDriver instead of the usual driver
15
+ # to allow for easily passing in records and getting them back
16
+ # out
17
+ #
18
+ # * The `run` method is a no-op so that control flow will exit out
19
+ # of the unit test back into the test suite
20
+ class UnitTestRunner < Wukong::Local::LocalRunner
21
+
22
+ # The processor this runner will create in the same way as
23
+ # `wu-local`.
24
+ attr_accessor :processor
25
+
26
+ # Initialize a new UnitTestRunner for the processor with the
27
+ # given `label` and `settings`.
28
+ #
29
+ # @param [Symbol] label
30
+ # @param [Hash] settings
31
+ def initialize label, settings
32
+ self.processor = label
33
+ params = Configliere::Param.new
34
+ params.use(:commandline)
35
+ params.merge!(settings)
36
+ super(params)
37
+ end
38
+
39
+ # Override the LocalDriver with the UnitTestDriver so we can
40
+ # more easily pass in and retrieve processed records.
41
+ #
42
+ # @return [UnitTestDriver]
43
+ def driver
44
+ @driver ||= UnitTestDriver.new(processor, settings)
45
+ end
46
+
47
+ # Do nothing. This prevents control flow within the Ruby
48
+ # interpreter from staying within this runner, as it would
49
+ # ordinarly do for `wu-local`.
50
+ def run
51
+ end
52
+ end
53
+ end
54
+ end