wukong 3.0.0.pre3 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/README.md +689 -50
- data/bin/wu-local +1 -74
- data/diagrams/wu_local.dot +39 -0
- data/diagrams/wu_local.dot.png +0 -0
- data/examples/loadable.rb +2 -0
- data/examples/string_reverser.rb +7 -0
- data/lib/hanuman/stage.rb +2 -2
- data/lib/wukong.rb +21 -10
- data/lib/wukong/dataflow.rb +2 -5
- data/lib/wukong/doc_helpers.rb +14 -0
- data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
- data/lib/wukong/doc_helpers/field_handler.rb +91 -0
- data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
- data/lib/wukong/driver.rb +11 -1
- data/lib/wukong/local.rb +40 -0
- data/lib/wukong/local/event_machine_driver.rb +27 -0
- data/lib/wukong/local/runner.rb +98 -0
- data/lib/wukong/local/stdio_driver.rb +44 -0
- data/lib/wukong/local/tcp_driver.rb +47 -0
- data/lib/wukong/logger.rb +16 -7
- data/lib/wukong/plugin.rb +48 -0
- data/lib/wukong/processor.rb +57 -15
- data/lib/wukong/rake_helper.rb +6 -0
- data/lib/wukong/runner.rb +151 -128
- data/lib/wukong/runner/boot_sequence.rb +123 -0
- data/lib/wukong/runner/code_loader.rb +52 -0
- data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
- data/lib/wukong/runner/help_message.rb +42 -0
- data/lib/wukong/spec_helpers.rb +4 -12
- data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
- data/lib/wukong/spec_helpers/{integration_driver_matchers.rb → integration_tests/integration_test_matchers.rb} +28 -62
- data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
- data/lib/wukong/spec_helpers/shared_examples.rb +19 -10
- data/lib/wukong/spec_helpers/unit_tests.rb +134 -0
- data/lib/wukong/spec_helpers/{processor_methods.rb → unit_tests/unit_test_driver.rb} +42 -8
- data/lib/wukong/spec_helpers/{spec_driver_matchers.rb → unit_tests/unit_test_matchers.rb} +6 -32
- data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +54 -0
- data/lib/wukong/version.rb +1 -1
- data/lib/wukong/widget/filters.rb +134 -8
- data/lib/wukong/widget/processors.rb +64 -5
- data/lib/wukong/widget/reducers/bin.rb +68 -18
- data/lib/wukong/widget/reducers/count.rb +12 -0
- data/lib/wukong/widget/reducers/group.rb +48 -5
- data/lib/wukong/widget/reducers/group_concat.rb +30 -2
- data/lib/wukong/widget/reducers/moments.rb +4 -4
- data/lib/wukong/widget/reducers/sort.rb +53 -3
- data/lib/wukong/widget/serializers.rb +37 -12
- data/lib/wukong/widget/utils.rb +1 -1
- data/spec/spec_helper.rb +20 -2
- data/spec/wukong/driver_spec.rb +2 -0
- data/spec/wukong/local/runner_spec.rb +40 -0
- data/spec/wukong/local_spec.rb +6 -0
- data/spec/wukong/logger_spec.rb +49 -0
- data/spec/wukong/processor_spec.rb +22 -0
- data/spec/wukong/runner_spec.rb +128 -8
- data/spec/wukong/widget/filters_spec.rb +28 -10
- data/spec/wukong/widget/processors_spec.rb +5 -5
- data/spec/wukong/widget/reducers/bin_spec.rb +14 -14
- data/spec/wukong/widget/reducers/count_spec.rb +1 -1
- data/spec/wukong/widget/reducers/group_spec.rb +7 -6
- data/spec/wukong/widget/reducers/moments_spec.rb +2 -2
- data/spec/wukong/widget/reducers/sort_spec.rb +1 -1
- data/spec/wukong/widget/serializers_spec.rb +84 -88
- data/spec/wukong/wu-local_spec.rb +109 -0
- metadata +43 -20
- data/bin/wu-server +0 -70
- data/lib/wukong/boot.rb +0 -96
- data/lib/wukong/configuration.rb +0 -8
- data/lib/wukong/emitter.rb +0 -22
- data/lib/wukong/server.rb +0 -119
- data/lib/wukong/spec_helpers/integration_driver.rb +0 -157
- data/lib/wukong/spec_helpers/processor_helpers.rb +0 -89
- data/lib/wukong/spec_helpers/spec_driver.rb +0 -28
- data/spec/wukong/local_runner_spec.rb +0 -31
- data/spec/wukong/wu_local_spec.rb +0 -125
@@ -0,0 +1,27 @@
|
|
1
|
+
module Wukong
|
2
|
+
|
3
|
+
# A module which can be included by other drivers which lets them
|
4
|
+
# use EventMachine under the hood.
|
5
|
+
module EventMachineDriver
|
6
|
+
|
7
|
+
include DriverMethods
|
8
|
+
|
9
|
+
# :nodoc:
|
10
|
+
def self.included klass
|
11
|
+
klass.class_eval do
|
12
|
+
def self.add_signal_traps
|
13
|
+
Signal.trap('INT') { log.info 'Received SIGINT. Stopping.' ; EM.stop }
|
14
|
+
Signal.trap('TERM') { log.info 'Received SIGTERM. Stopping.' ; EM.stop }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# :nodoc:
|
20
|
+
def initialize(label, settings)
|
21
|
+
super
|
22
|
+
@settings = settings
|
23
|
+
@dataflow = construct_dataflow(label, settings)
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require_relative 'stdio_driver'
|
2
|
+
require_relative 'tcp_driver'
|
3
|
+
|
4
|
+
module Wukong
|
5
|
+
module Local
|
6
|
+
|
7
|
+
# Implements the Runner for wu-local.
|
8
|
+
class LocalRunner < Wukong::Runner
|
9
|
+
|
10
|
+
include Wukong::Logging
|
11
|
+
|
12
|
+
usage "PROCESSOR|FLOW"
|
13
|
+
|
14
|
+
description <<-EOF.gsub(/^ {8}/, '')
|
15
|
+
wu-local is a tool for running Wukong processors and flows locally on
|
16
|
+
the command-line. Use wu-local by passing it a processor and feeding
|
17
|
+
in some data:
|
18
|
+
|
19
|
+
$ echo 'UNIX is Clever and Fun...' | wu-local tokenizer.rb
|
20
|
+
UNIX
|
21
|
+
is
|
22
|
+
Clever
|
23
|
+
and
|
24
|
+
Fun
|
25
|
+
|
26
|
+
If your processors have named fields you can pass them in as
|
27
|
+
arguments:
|
28
|
+
|
29
|
+
$ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4
|
30
|
+
UNIX
|
31
|
+
Clever
|
32
|
+
|
33
|
+
You can chain processors and calls to wu-local together:
|
34
|
+
|
35
|
+
$ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4 | wu-local downcaser.rb
|
36
|
+
unix
|
37
|
+
clever
|
38
|
+
|
39
|
+
Which is a good way to develop a combined data flow which you can
|
40
|
+
again test locally:
|
41
|
+
|
42
|
+
$ echo 'UNIX is clever and fun...' | wu-local tokenize_and_downcase_big_words.rb
|
43
|
+
unix
|
44
|
+
clever
|
45
|
+
EOF
|
46
|
+
|
47
|
+
# Returns the name of the processor we're going to run.
|
48
|
+
#
|
49
|
+
# @return [String]
|
50
|
+
def processor
|
51
|
+
arg = args.first
|
52
|
+
basename = File.basename(arg.to_s, '.rb')
|
53
|
+
|
54
|
+
case
|
55
|
+
when settings[:run] then settings[:run]
|
56
|
+
when arg && File.exist?(arg) then basename
|
57
|
+
else arg
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Validates the chosen processor.
|
62
|
+
#
|
63
|
+
# @raise [Wukong::Error] if it finds a problem
|
64
|
+
# @return [true]
|
65
|
+
def validate
|
66
|
+
raise Error.new("Must provide a processor or dataflow to run, via either the --run option or as the first argument") if processor.nil? || processor.empty?
|
67
|
+
raise Error.new("No such processor or dataflow <#{processor}>") unless registered?(processor)
|
68
|
+
true
|
69
|
+
end
|
70
|
+
|
71
|
+
# Adds a customized help message built from the Processor
|
72
|
+
# # itself.
|
73
|
+
def setup
|
74
|
+
super()
|
75
|
+
dataflow_class_for(processor).configure(settings) if processor?(processor)
|
76
|
+
end
|
77
|
+
|
78
|
+
# Runs either the StdioDriver or the TCPDriver, depending on
|
79
|
+
# what settings were passed.
|
80
|
+
def run
|
81
|
+
EM.run do
|
82
|
+
driver.start(processor, settings)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# The driver this Runner will use.
|
87
|
+
#
|
88
|
+
# Defaults to the Wukong::Local::StdioDriver, but will use the
|
89
|
+
# TcpDriver if it has a :port setting defined.
|
90
|
+
#
|
91
|
+
# @return [Wukong::Local::TCPDriver, Wukong::Local::StdioDriver]
|
92
|
+
def driver
|
93
|
+
(settings[:tcp_port] ? TCPDriver : StdioDriver)
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require_relative('event_machine_driver')
|
2
|
+
module Wukong
|
3
|
+
module Local
|
4
|
+
|
5
|
+
# A class for driving processors over the STDIN/STDOUT protocol.
|
6
|
+
class StdioDriver < EM::P::LineAndTextProtocol
|
7
|
+
include EventMachineDriver
|
8
|
+
include Processor::StdoutProcessor
|
9
|
+
include Logging
|
10
|
+
|
11
|
+
def self.start(label, settings = {})
|
12
|
+
EM.attach($stdin, self, label, settings)
|
13
|
+
end
|
14
|
+
|
15
|
+
def post_init
|
16
|
+
self.class.add_signal_traps
|
17
|
+
setup_dataflow
|
18
|
+
end
|
19
|
+
|
20
|
+
def receive_line line
|
21
|
+
driver.send_through_dataflow(line)
|
22
|
+
rescue => e
|
23
|
+
error = Wukong::Error.new(e)
|
24
|
+
EM.stop
|
25
|
+
|
26
|
+
# We'd to *raise* `error` here and have it be handled by
|
27
|
+
# Wukong::Runner.run but we are fighting with EventMachine.
|
28
|
+
# It seems no matter what we do, EventMachine will swallow any
|
29
|
+
# Exception raised here (including SystemExit) and exit the
|
30
|
+
# Ruby process with a return code of 0.
|
31
|
+
#
|
32
|
+
# Instead we just log the message that *would* have gotten
|
33
|
+
# logged by Wukong::Runner.run and leave it to EventMachine to
|
34
|
+
# exit very unnaturally.
|
35
|
+
log.error(error.message)
|
36
|
+
end
|
37
|
+
|
38
|
+
def unbind
|
39
|
+
finalize_and_stop_dataflow
|
40
|
+
EM.stop
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require_relative('event_machine_driver')
|
2
|
+
module Wukong
|
3
|
+
module Local
|
4
|
+
|
5
|
+
# A class for driving processors over a TCP protocol.
|
6
|
+
class TCPDriver < EM::P::LineAndTextProtocol
|
7
|
+
include EventMachineDriver
|
8
|
+
include Processor::BufferedProcessor
|
9
|
+
include Logging
|
10
|
+
|
11
|
+
def self.start(label, settings = {})
|
12
|
+
host = (settings[:host] || Socket.gethostname) rescue 'localhost'
|
13
|
+
port = (settings[:port] || 9000).to_i rescue 9000
|
14
|
+
EM.start_server(host, port, self, label, settings)
|
15
|
+
log.info "Server started on #{host} on port #{port}"
|
16
|
+
add_signal_traps
|
17
|
+
end
|
18
|
+
|
19
|
+
def post_init
|
20
|
+
port, ip = Socket.unpack_sockaddr_in(get_peername)
|
21
|
+
log.info "Connected to #{ip} on #{port}"
|
22
|
+
setup_dataflow
|
23
|
+
end
|
24
|
+
|
25
|
+
def receive_line line
|
26
|
+
@buffer = []
|
27
|
+
operation = proc { driver.send_through_dataflow(line) }
|
28
|
+
callback = proc { flush_buffer @buffer }
|
29
|
+
EM.defer(operation, callback)
|
30
|
+
rescue => e
|
31
|
+
EM.stop
|
32
|
+
raise Wukong::Error.new(e)
|
33
|
+
end
|
34
|
+
|
35
|
+
def flush_buffer records
|
36
|
+
send_data(records.join("\n") + "\n")
|
37
|
+
records.clear
|
38
|
+
end
|
39
|
+
|
40
|
+
def unbind
|
41
|
+
finalize_and_stop_dataflow
|
42
|
+
EM.stop
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/lib/wukong/logger.rb
CHANGED
@@ -1,10 +1,19 @@
|
|
1
|
-
module Wukong
|
1
|
+
module Wukong
|
2
|
+
|
2
3
|
class LogFactory
|
3
4
|
|
4
5
|
attr_reader :created_log
|
5
6
|
|
6
|
-
def self.
|
7
|
-
Log4r::StderrOutputter.new('console', formatter:
|
7
|
+
def self.default_outputter klass
|
8
|
+
Log4r::StderrOutputter.new('console', formatter: default_formatter(klass))
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.default_formatter klass
|
12
|
+
Log4r::PatternFormatter.new(pattern: default_pattern(klass))
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.default_pattern klass
|
16
|
+
"%l %d [%-20c] -- %m"
|
8
17
|
end
|
9
18
|
|
10
19
|
def self.configure(klass, options = {})
|
@@ -14,7 +23,7 @@ module Wukong
|
|
14
23
|
|
15
24
|
def initialize(logger, config)
|
16
25
|
@created_log = logger.is_a?(Log4r::Logger) ? logger : Log4r::Logger.new(logger.to_s)
|
17
|
-
outputter(LogFactory.
|
26
|
+
outputter(LogFactory.default_outputter(logger)) unless ancestry_has_outputter?(@created_log)
|
18
27
|
apply_options(config)
|
19
28
|
end
|
20
29
|
|
@@ -33,7 +42,7 @@ module Wukong
|
|
33
42
|
begin
|
34
43
|
send(option, value)
|
35
44
|
rescue
|
36
|
-
raise "
|
45
|
+
raise Error.new("Error setting option <#{option}> to value <#{value}>")
|
37
46
|
end
|
38
47
|
end
|
39
48
|
end
|
@@ -51,7 +60,7 @@ module Wukong
|
|
51
60
|
debug: Log4r::DEBUG,
|
52
61
|
info: Log4r::INFO,
|
53
62
|
warn: Log4r::WARN
|
54
|
-
}.fetch(lvl){ raise "
|
63
|
+
}.fetch(lvl){ raise Error.new("Invalid log level: <#{lvl}>") }
|
55
64
|
end
|
56
65
|
|
57
66
|
def pattern ptrn
|
@@ -75,7 +84,7 @@ module Wukong
|
|
75
84
|
def self.included klass
|
76
85
|
if klass.ancestors.include?(Gorillib::Model)
|
77
86
|
klass.class_eval do
|
78
|
-
field(:log, Whatever, :default => ->{ Wukong::LogFactory.configure(self.class) })
|
87
|
+
field(:log, Whatever, :default => ->{ Wukong::LogFactory.configure(self.class) }, :doc => "Shared Wukong logger")
|
79
88
|
|
80
89
|
def receive_log params
|
81
90
|
@log = LogFactory.configure(self.class, params)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Wukong
|
2
|
+
|
3
|
+
# An array of known plugins.
|
4
|
+
PLUGINS = []
|
5
|
+
|
6
|
+
# Asks each loaded plugin to configure the given +settings+ for the
|
7
|
+
# given +program_name+.
|
8
|
+
#
|
9
|
+
# @param [Configliere::Param] settings the settings to be configured by each plugin
|
10
|
+
# @param [String] program_name the name of the currently executing program
|
11
|
+
def self.configure_plugins(settings, program_name)
|
12
|
+
PLUGINS.each do |plugin|
|
13
|
+
plugin.configure(settings, program_name) if plugin.respond_to?(:configure)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Asks each loaded plugin to boot itself from the given +settings+
|
18
|
+
# in the given +root+ directory.
|
19
|
+
#
|
20
|
+
# @param [Configliere::Param] settings the settings for each plugin to boot from
|
21
|
+
# @param [String] root the root directory the plugins are booting in
|
22
|
+
def self.boot_plugins(settings, root)
|
23
|
+
PLUGINS.each do |plugin|
|
24
|
+
plugin.boot(settings, root) if plugin.respond_to?(:boot)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Include this module in your own class or module to have it
|
29
|
+
# register itself as a Wukong plugin.
|
30
|
+
#
|
31
|
+
# Your class or module must define the following methods:
|
32
|
+
#
|
33
|
+
# * `configure` called with a (pre-resolved) Configliere::Param argument and the basename of the running program
|
34
|
+
# * `boot` called with a (resolved) Configliere::Param argument and the current working directory of the running program, reacts to any settings as necessary
|
35
|
+
#
|
36
|
+
# Subclasses of Wukong::Runner will automatically load and boot each
|
37
|
+
# plugin.
|
38
|
+
module Plugin
|
39
|
+
# :nodoc:
|
40
|
+
def self.included mod
|
41
|
+
PLUGINS << mod unless PLUGINS.include?(mod)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
|
data/lib/wukong/processor.rb
CHANGED
@@ -15,15 +15,12 @@ module Wukong
|
|
15
15
|
include Logging
|
16
16
|
include Vayacondios::Notifications
|
17
17
|
|
18
|
-
field :action,
|
18
|
+
field :action, Whatever, :doc => false
|
19
19
|
|
20
20
|
class << self
|
21
21
|
|
22
|
-
def
|
23
|
-
@description = desc
|
24
|
-
end
|
25
|
-
|
26
|
-
def description
|
22
|
+
def description desc=nil
|
23
|
+
@description = desc if desc
|
27
24
|
@description
|
28
25
|
end
|
29
26
|
|
@@ -47,6 +44,29 @@ module Wukong
|
|
47
44
|
instance_variable_set("@serialization_#{direction}", label) if %w[ tsv json xml ].include?(label.to_s)
|
48
45
|
end
|
49
46
|
|
47
|
+
def configure(settings)
|
48
|
+
settings.description = description if description
|
49
|
+
fields.each_pair do |name, field|
|
50
|
+
next if field.doc == false || field.doc.to_s == 'false'
|
51
|
+
next if [:log, :notifier].include?(name)
|
52
|
+
field_props = {}.tap do |props|
|
53
|
+
props[:description] = field.doc unless field.doc == "#{name} field"
|
54
|
+
field_type = (field.type.respond_to?(:product) ? field.type.product : field.type)
|
55
|
+
configliere_type = case field_type
|
56
|
+
when String then nil
|
57
|
+
when TrueClass, FalseClass then :boolean
|
58
|
+
else field_type
|
59
|
+
end
|
60
|
+
|
61
|
+
props[:type] = configliere_type if configliere_type
|
62
|
+
props[:default] = field.default if field.default
|
63
|
+
end
|
64
|
+
existing_value = settings[name]
|
65
|
+
settings.define(name, field_props)
|
66
|
+
settings[name] = existing_value unless existing_value.nil?
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
50
70
|
end
|
51
71
|
|
52
72
|
def expected_record_type(type)
|
@@ -57,21 +77,38 @@ module Wukong
|
|
57
77
|
self.class.instance_variable_get("@serialization_#{direction.to_s}")
|
58
78
|
end
|
59
79
|
|
60
|
-
#
|
61
|
-
|
80
|
+
# When instantiated with a block, the block will replace this
|
81
|
+
# method.
|
82
|
+
#
|
83
|
+
# @param [Array<Object>] args
|
84
|
+
# @yield record a record that might be yielded by the block
|
85
|
+
# @yieldparam [Object] record the yielded record
|
86
|
+
def perform_action(*args)
|
87
|
+
end
|
62
88
|
|
63
|
-
#
|
89
|
+
# :nodoc:
|
90
|
+
#
|
91
|
+
# The action attribute is turned into the perform action method.
|
92
|
+
#
|
93
|
+
# @param [Proc] action
|
64
94
|
def receive_action(action)
|
65
95
|
self.define_singleton_method(:perform_action, &action)
|
66
96
|
end
|
67
97
|
|
68
|
-
# This method is called after the processor class has been
|
69
|
-
# but before any records are given to it to process
|
98
|
+
# This method is called after the processor class has been
|
99
|
+
# instantiated but before any records are given to it to process.
|
100
|
+
#
|
101
|
+
# Override this method in your subclass.
|
70
102
|
def setup
|
71
103
|
end
|
72
104
|
|
73
|
-
# This method is called once per record
|
74
|
-
#
|
105
|
+
# This method is called once per record.
|
106
|
+
#
|
107
|
+
# Override this method in your subclass.
|
108
|
+
#
|
109
|
+
# @param [Object] record
|
110
|
+
# @yield record the record you want to yield
|
111
|
+
# @yieldparam [Object] record the yielded record
|
75
112
|
def process(record, &emit)
|
76
113
|
yield record
|
77
114
|
end
|
@@ -83,13 +120,18 @@ module Wukong
|
|
83
120
|
# This can be used within an aggregating processor (like a reducer
|
84
121
|
# in a map/reduce job) to start processing the final aggregate of
|
85
122
|
# records since the "last record" has already been received.
|
123
|
+
#
|
124
|
+
# Override this method in your subclass
|
125
|
+
#
|
126
|
+
# @yield record the record you want to yield
|
127
|
+
# @yieldparam [Object] record the yielded record
|
86
128
|
def finalize
|
87
129
|
end
|
88
130
|
|
89
131
|
# This method is called after all records have been passed. It
|
90
132
|
# signals that processing should stop.
|
91
|
-
|
92
|
-
#
|
133
|
+
#
|
134
|
+
# Override this method in your subclass.
|
93
135
|
def stop
|
94
136
|
end
|
95
137
|
|