RubyGems - wukong - Versions diffs - 3.0.0.pre3 → 3.0.0 - Mend

wukong 3.0.0.pre3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

data/Gemfile +1 -0
data/README.md +689 -50
data/bin/wu-local +1 -74
data/diagrams/wu_local.dot +39 -0
data/diagrams/wu_local.dot.png +0 -0
data/examples/loadable.rb +2 -0
data/examples/string_reverser.rb +7 -0
data/lib/hanuman/stage.rb +2 -2
data/lib/wukong.rb +21 -10
data/lib/wukong/dataflow.rb +2 -5
data/lib/wukong/doc_helpers.rb +14 -0
data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
data/lib/wukong/doc_helpers/field_handler.rb +91 -0
data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
data/lib/wukong/driver.rb +11 -1
data/lib/wukong/local.rb +40 -0
data/lib/wukong/local/event_machine_driver.rb +27 -0
data/lib/wukong/local/runner.rb +98 -0
data/lib/wukong/local/stdio_driver.rb +44 -0
data/lib/wukong/local/tcp_driver.rb +47 -0
data/lib/wukong/logger.rb +16 -7
data/lib/wukong/plugin.rb +48 -0
data/lib/wukong/processor.rb +57 -15
data/lib/wukong/rake_helper.rb +6 -0
data/lib/wukong/runner.rb +151 -128
data/lib/wukong/runner/boot_sequence.rb +123 -0
data/lib/wukong/runner/code_loader.rb +52 -0
data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
data/lib/wukong/runner/help_message.rb +42 -0
data/lib/wukong/spec_helpers.rb +4 -12
data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
data/lib/wukong/spec_helpers/{integration_driver_matchers.rb → integration_tests/integration_test_matchers.rb} +28 -62
data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
data/lib/wukong/spec_helpers/shared_examples.rb +19 -10
data/lib/wukong/spec_helpers/unit_tests.rb +134 -0
data/lib/wukong/spec_helpers/{processor_methods.rb → unit_tests/unit_test_driver.rb} +42 -8
data/lib/wukong/spec_helpers/{spec_driver_matchers.rb → unit_tests/unit_test_matchers.rb} +6 -32
data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +54 -0
data/lib/wukong/version.rb +1 -1
data/lib/wukong/widget/filters.rb +134 -8
data/lib/wukong/widget/processors.rb +64 -5
data/lib/wukong/widget/reducers/bin.rb +68 -18
data/lib/wukong/widget/reducers/count.rb +12 -0
data/lib/wukong/widget/reducers/group.rb +48 -5
data/lib/wukong/widget/reducers/group_concat.rb +30 -2
data/lib/wukong/widget/reducers/moments.rb +4 -4
data/lib/wukong/widget/reducers/sort.rb +53 -3
data/lib/wukong/widget/serializers.rb +37 -12
data/lib/wukong/widget/utils.rb +1 -1
data/spec/spec_helper.rb +20 -2
data/spec/wukong/driver_spec.rb +2 -0
data/spec/wukong/local/runner_spec.rb +40 -0
data/spec/wukong/local_spec.rb +6 -0
data/spec/wukong/logger_spec.rb +49 -0
data/spec/wukong/processor_spec.rb +22 -0
data/spec/wukong/runner_spec.rb +128 -8
data/spec/wukong/widget/filters_spec.rb +28 -10
data/spec/wukong/widget/processors_spec.rb +5 -5
data/spec/wukong/widget/reducers/bin_spec.rb +14 -14
data/spec/wukong/widget/reducers/count_spec.rb +1 -1
data/spec/wukong/widget/reducers/group_spec.rb +7 -6
data/spec/wukong/widget/reducers/moments_spec.rb +2 -2
data/spec/wukong/widget/reducers/sort_spec.rb +1 -1
data/spec/wukong/widget/serializers_spec.rb +84 -88
data/spec/wukong/wu-local_spec.rb +109 -0
metadata +43 -20
data/bin/wu-server +0 -70
data/lib/wukong/boot.rb +0 -96
data/lib/wukong/configuration.rb +0 -8
data/lib/wukong/emitter.rb +0 -22
data/lib/wukong/server.rb +0 -119
data/lib/wukong/spec_helpers/integration_driver.rb +0 -157
data/lib/wukong/spec_helpers/processor_helpers.rb +0 -89
data/lib/wukong/spec_helpers/spec_driver.rb +0 -28
data/spec/wukong/local_runner_spec.rb +0 -31
data/spec/wukong/wu_local_spec.rb +0 -125

data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb ADDED

@@ -0,0 +1,97 @@
+require 'open3'
+module Wukong
+  module SpecHelpers
+    # A runner for running commands in a subprocess.
+    class IntegrationTestRunner
+      # The command to execute
+      attr_accessor :cmd
+      # The directory in which to execute the command.
+      attr_accessor :cwd
+      # The ID of the spawned subprocess (while it was running).
+      attr_accessor :pid
+      # The STDOUT of the spawned process.
+      attr_accessor :stdout
+      # The STDERR of the spawned process.
+      attr_accessor :stderr
+      # The exit code of the spawned process.
+      attr_accessor :exit_code
+      # Run the command and capture its outputs and exit code.
+      #
+      # @return [true, false]
+      def run!
+        return false if ran?
+        FileUtils.cd(cwd) do
+          Open3.popen3(env, cmd) do |i, o, e, wait_thr|
+            self.pid = wait_thr.pid
+            @inputs.each { |input| i.puts(input) }
+            i.close
+            self.stdout    = o.read
+            self.stderr    = e.read
+            self.exit_code = wait_thr.value.to_i
+          end
+        end
+        @ran = true
+      end
+      # Initialize a new IntegrationTestRunner to run a given command.
+      def initialize args, options
+        @args   = args
+        @env    = options[:env]
+        @cwd    = options[:cwd]
+        @inputs = []
+      end
+      def cmd
+        @args.compact.map(&:to_s).join(' ')
+      end
+      def on *events
+        @inputs.concat(events)
+        self
+      end
+      alias_method :<, :on
+      def in dir
+        @cwd = dir
+        self
+      end
+      def using env
+        @env = env
+        self
+      end
+      def env
+        ENV.to_hash.merge(@env || {})
+      end
+      def ran?
+        @ran
+      end
+      def cmd_summary
+        [
+         cmd,
+         "with env #{env_summary}",
+         "in dir #{cwd}"
+        ].join("\n")
+      end
+      def env_summary
+        { "PATH" => env["PATH"], "RUBYLIB" => env["RUBYLIB"] }.inspect
+      end
+    end
+  end
+end

data/lib/wukong/spec_helpers/shared_examples.rb CHANGED

@@ -1,14 +1,23 @@
 shared_examples_for 'a processor' do |options = {}|
-  let(:processor_name){ options[:named] || self.class.top_level_description }
-  subject             { create_processor(processor_name, on_error: :skip)   }
   it 'is registered' do
-    Wukong.registry.retrieve(processor_name.to_sym).should_not be_nil
+    Wukong.registry.retrieve(options[:named].to_sym).should_not be_nil
+  end
+  it{ processor(options[:named]).processor.should respond_to(:setup)    }
+  it{ processor(options[:named]).processor.should respond_to(:process)  }
+  it{ processor(options[:named]).processor.should respond_to(:finalize) }
+  it{ processor(options[:named]).processor.should respond_to(:stop)     }
+  it{ processor(options[:named]).processor.should respond_to(:notify)   }
+end
+shared_examples_for 'a plugin' do |options = {}|
+  it "is registered as a Wukong plugin " do
+    Wukong::PLUGINS.should include(subject)
   end
-  it{ should respond_to(:setup)    }
-  it{ should respond_to(:process)  }
-  it{ should respond_to(:finalize) }
-  it{ should respond_to(:stop)     }
-  it{ should respond_to(:notify)   }
+  it { should respond_to(:configure) }
+  it { should respond_to(:boot)      }
+end
+shared_examples_for 'a model class' do |options = {}|
+  it        { should respond_to(:receive) }
+  its(:new) { should respond_to(:to_wire) }
 end

data/lib/wukong/spec_helpers/unit_tests.rb ADDED

@@ -0,0 +1,134 @@
+require_relative('unit_tests/unit_test_driver')
+require_relative('unit_tests/unit_test_runner')
+require_relative('unit_tests/unit_test_matchers')
+module Wukong
+  module SpecHelpers
+    # This module defines helpers that are useful when running unit
+    # tests for processors.
+    module UnitTests
+      # Create and boot up a runner of the given `klass`.
+      #
+      # Options to the runner class are given in the `args` Array.
+      # The last element of this Array can be a Hash of options to
+      # directly pass to the runner (especially useful in unit tests).
+      # The rest of the elements are strings that will be parsed as
+      # though they were command-line arguments.
+      #
+      # @example Create a runner that simulates `wu-local` with a set of arguments
+      #
+      #   runner Wukong::Local::LocalRunner, 'wu-local', '--foo=bar', '--baz=boof', wof: 'bing'
+      #
+      # A passed block will be eval'd in the context of the newlyl
+      # created runner instance.  This can be used to interact with
+      # the runner's insides after initialization.
+      #
+      # @example Create a custom runner and set a property on it
+      #
+      #   runner(CustomRunner, 'wu-custom', '--foo=bar') do
+      #     # eval'd in scope of new runner instance
+      #     do_some_special_thing!
+      #   end
+      #
+      # @param [Class] klass
+      # @param [String] program_name
+      # @param [Array<String>, Hash] args
+      def runner klass, program_name, *args, &block
+        settings = args.extract_options!
+        ARGV.replace(args.map(&:to_s))
+        klass.new.tap do |the_runner|
+          the_runner.program_name = program_name
+          the_runner.instance_eval(&block) if block_given?
+          the_runner.boot!(settings)
+        end
+      end
+      # Create a runner for unit tests in a variety of convenient
+      # ways.
+      #
+      # Most simply, called without args, will return a UnitTestRunner
+      # a the klass named in the containing `describe` or `context`:
+      #
+      #   context MyApp::Tokenizer do
+      #     it "uses whitespace as the default separator between tokens" do
+      #       processor.separator.should == /\s+/
+      #     end
+      #   end
+      #
+      # if your processor has been registered (you created it with the
+      # <tt>Wukong.processor</tt> helper method or otherwise
+      # registered it yourself) then you can use its name:
+      #
+      #   context :tokenizer do
+      #     it "uses whitespace as the default separator between tokens" do
+      #       processor.separator.should == /\s+/
+      #     end
+      #   end
+      #
+      # The `processor` method can also be used inside RSpec's
+      # `subject` and `let` methods:
+      #
+      #   context "with no arguments" do
+      #     subject { processor }
+      #       it "uses whitespace as the default separator between tokens" do
+      #         separator.should == /\s+/
+      #       end
+      #     end
+      #   end
+      #
+      # and you can easily pass arguments, just like you would on the
+      # command line or in a dataflow definition:
+      #
+      #   context "with arguments" do
+      #     subject { processor(separator: ' ') }
+      #       it "uses whitespace as the default separator between tokens" do
+      #         separator.should == ' '
+      #       end
+      #     end
+      #   end
+      #
+      # You can even name the processor directly if you want to:
+      #
+      #   context "tokenizers" do
+      #     let(:default_tokenizer) { processor(:tokenizer)                                          }
+      #     let(:complex_tokenizer) { processor(:complex_tokenizer, stemming: true)                  }
+      #     let(:french_tokenizer)  { processor(:complex_tokenizer, stemming: true, language: 'fr')  }
+      #     ...
+      #   end
+      def unit_test_runner *args
+        settings = args.extract_options!
+        name     = (args.first || self.class.description)
+        runner   = UnitTestRunner.new(name, settings)
+        yield runner.driver.processor if block_given?
+        runner.boot!
+        runner.driver
+      end
+      alias_method :processor, :unit_test_runner
+      def emit *expected
+        UnitTestMatcher.new(*expected)
+      end
+      def emit_json *expected
+        JsonMatcher.new(*expected)
+      end
+      def emit_delimited delimiter, *expected
+        DelimiterMatcher.new(delimiter, *expected)
+      end
+      def emit_tsv *expected
+        TsvMatcher.new(*expected)
+      end
+      def emit_csv *expected
+        CsvMatcher.new(*expected)
+      end
+    end
+  end
+end

data/lib/wukong/spec_helpers/{processor_methods.rb → unit_tests/unit_test_driver.rb} RENAMED

@@ -1,9 +1,42 @@
 module Wukong
   module SpecHelpers
-    # This module defines methods to be included into the
-    # Wukong::Processor class.
-    module ProcessorSpecMethods
+    class UnitTestDriver < Array
+      include Wukong::DriverMethods
+      def initialize label, settings
+        super()
+        @settings = settings
+        @dataflow = construct_dataflow(label, settings)
+        setup_dataflow
+      end
+      def setup
+      end
+      def finalize
+      end
+      def stop
+      end
+      def process output
+        self << output
+      end
+      def run
+        return false unless dataflow
+        given_records.each do |input|
+          driver.send_through_dataflow(input)
+        end
+        finalize_and_stop_dataflow
+        self
+      end
+      def processor
+        dataflow.first
+      end
       # An array of accumulated records to process come match-time.
       attr_reader :given_records
@@ -55,13 +88,14 @@ module Wukong
       # Calling this method, like passing the processor to an `emit`
       # matcher, will trigger processing of all the given records.
       #
-      # Returns a SpecDriver, which is a subclass of array, so the
+      # Returns a UnitTestDriver, which is a subclass of array, so the
       # usual matchers like `include` and so on should work, as well
       # as explicitly indexing to introspect on particular records.
       #
-      # @return [SpecDriver]
+      # @return [UnitTestDriver]
       def output
-        SpecDriver.new(self).run
+        run
+        self
       end
       # Return the output of the processor on the given records,
@@ -102,7 +136,7 @@ module Wukong
       def json_output
         output.map { |record| MultiJson.load(record) }
       end
     end
   end
 end

data/lib/wukong/spec_helpers/{spec_driver_matchers.rb → unit_tests/unit_test_matchers.rb} RENAMED

@@ -1,37 +1,11 @@
-require_relative('spec_driver')
 module Wukong
   module SpecHelpers
-    module SpecMatchers
-      def emit *expected
-        EmitMatcher.new(*expected)
-      end
-      def emit_json *expected
-        JsonMatcher.new(*expected)
-      end
-      def emit_delimited delimiter, *expected
-        DelimiterMatcher.new(delimiter, *expected)
-      end
-      def emit_tsv *expected
-        TsvMatcher.new(*expected)
-      end
-      def emit_csv *expected
-        CsvMatcher.new(*expected)
-      end
-    end
-    class EmitMatcher
+    class UnitTestMatcher
       attr_accessor :driver, :expected, :reason, :expected_record, :actual_record, :mismatched_index
-      def matches?(processor)
-        self.driver = SpecDriver.new(processor)
+      def matches?(driver)
+        self.driver = driver
         driver.run
         if actual_size != expected_size
           self.reason = :size
@@ -131,13 +105,13 @@ module Wukong
       end
     end
-    class JsonMatcher < EmitMatcher
+    class JsonMatcher < UnitTestMatcher
       def output
         driver.map do |record|
           begin
             MultiJson.load(record)
           rescue => e
-            raise Error.new("Could not parse output of processor as JSON: \n\n#{record}")
+            raise Error.new("Could not parse output of dataflow as JSON: \n\n#{record}")
           end
         end
       end
@@ -146,7 +120,7 @@ module Wukong
       end
     end
-    class DelimitedMatcher < EmitMatcher
+    class DelimitedMatcher < UnitTestMatcher
       attr_accessor :delimiter

data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb ADDED

@@ -0,0 +1,54 @@
+module Wukong
+  module SpecHelpers
+    # A class for controlling the Wukong boot sequence from within
+    # unit tests.
+    #
+    # Subclasses the Wukong::Local::LocalRunner with which it shares
+    # most of its behavior:
+    #
+    # * Initialization is slightly different, to allow for each
+    #   separate unit test in a suite to use a different
+    #   Configliere::Param object for settings
+    #
+    # * The driver is the UnitTestDriver instead of the usual driver
+    #   to allow for easily passing in records and getting them back
+    #   out
+    #
+    # * The `run` method is a no-op so that control flow will exit out
+    #   of the unit test back into the test suite
+    class UnitTestRunner < Wukong::Local::LocalRunner
+      # The processor this runner will create in the same way as
+      # `wu-local`.
+      attr_accessor :processor
+      # Initialize a new UnitTestRunner for the processor with the
+      # given `label` and `settings`.
+      #
+      # @param [Symbol] label
+      # @param [Hash] settings
+      def initialize label, settings
+        self.processor = label
+        params = Configliere::Param.new
+        params.use(:commandline)
+        params.merge!(settings)
+        super(params)
+      end
+      # Override the LocalDriver with the UnitTestDriver so we can
+      # more easily pass in and retrieve processed records.
+      #
+      # @return [UnitTestDriver]
+      def driver
+        @driver ||= UnitTestDriver.new(processor, settings)
+      end
+      # Do nothing.  This prevents control flow within the Ruby
+      # interpreter from staying within this runner, as it would
+      # ordinarly do for `wu-local`.
+      def run
+      end
+    end
+  end
+end