wukong 3.0.0.pre3 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/README.md +689 -50
- data/bin/wu-local +1 -74
- data/diagrams/wu_local.dot +39 -0
- data/diagrams/wu_local.dot.png +0 -0
- data/examples/loadable.rb +2 -0
- data/examples/string_reverser.rb +7 -0
- data/lib/hanuman/stage.rb +2 -2
- data/lib/wukong.rb +21 -10
- data/lib/wukong/dataflow.rb +2 -5
- data/lib/wukong/doc_helpers.rb +14 -0
- data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
- data/lib/wukong/doc_helpers/field_handler.rb +91 -0
- data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
- data/lib/wukong/driver.rb +11 -1
- data/lib/wukong/local.rb +40 -0
- data/lib/wukong/local/event_machine_driver.rb +27 -0
- data/lib/wukong/local/runner.rb +98 -0
- data/lib/wukong/local/stdio_driver.rb +44 -0
- data/lib/wukong/local/tcp_driver.rb +47 -0
- data/lib/wukong/logger.rb +16 -7
- data/lib/wukong/plugin.rb +48 -0
- data/lib/wukong/processor.rb +57 -15
- data/lib/wukong/rake_helper.rb +6 -0
- data/lib/wukong/runner.rb +151 -128
- data/lib/wukong/runner/boot_sequence.rb +123 -0
- data/lib/wukong/runner/code_loader.rb +52 -0
- data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
- data/lib/wukong/runner/help_message.rb +42 -0
- data/lib/wukong/spec_helpers.rb +4 -12
- data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
- data/lib/wukong/spec_helpers/{integration_driver_matchers.rb → integration_tests/integration_test_matchers.rb} +28 -62
- data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
- data/lib/wukong/spec_helpers/shared_examples.rb +19 -10
- data/lib/wukong/spec_helpers/unit_tests.rb +134 -0
- data/lib/wukong/spec_helpers/{processor_methods.rb → unit_tests/unit_test_driver.rb} +42 -8
- data/lib/wukong/spec_helpers/{spec_driver_matchers.rb → unit_tests/unit_test_matchers.rb} +6 -32
- data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +54 -0
- data/lib/wukong/version.rb +1 -1
- data/lib/wukong/widget/filters.rb +134 -8
- data/lib/wukong/widget/processors.rb +64 -5
- data/lib/wukong/widget/reducers/bin.rb +68 -18
- data/lib/wukong/widget/reducers/count.rb +12 -0
- data/lib/wukong/widget/reducers/group.rb +48 -5
- data/lib/wukong/widget/reducers/group_concat.rb +30 -2
- data/lib/wukong/widget/reducers/moments.rb +4 -4
- data/lib/wukong/widget/reducers/sort.rb +53 -3
- data/lib/wukong/widget/serializers.rb +37 -12
- data/lib/wukong/widget/utils.rb +1 -1
- data/spec/spec_helper.rb +20 -2
- data/spec/wukong/driver_spec.rb +2 -0
- data/spec/wukong/local/runner_spec.rb +40 -0
- data/spec/wukong/local_spec.rb +6 -0
- data/spec/wukong/logger_spec.rb +49 -0
- data/spec/wukong/processor_spec.rb +22 -0
- data/spec/wukong/runner_spec.rb +128 -8
- data/spec/wukong/widget/filters_spec.rb +28 -10
- data/spec/wukong/widget/processors_spec.rb +5 -5
- data/spec/wukong/widget/reducers/bin_spec.rb +14 -14
- data/spec/wukong/widget/reducers/count_spec.rb +1 -1
- data/spec/wukong/widget/reducers/group_spec.rb +7 -6
- data/spec/wukong/widget/reducers/moments_spec.rb +2 -2
- data/spec/wukong/widget/reducers/sort_spec.rb +1 -1
- data/spec/wukong/widget/serializers_spec.rb +84 -88
- data/spec/wukong/wu-local_spec.rb +109 -0
- metadata +43 -20
- data/bin/wu-server +0 -70
- data/lib/wukong/boot.rb +0 -96
- data/lib/wukong/configuration.rb +0 -8
- data/lib/wukong/emitter.rb +0 -22
- data/lib/wukong/server.rb +0 -119
- data/lib/wukong/spec_helpers/integration_driver.rb +0 -157
- data/lib/wukong/spec_helpers/processor_helpers.rb +0 -89
- data/lib/wukong/spec_helpers/spec_driver.rb +0 -28
- data/spec/wukong/local_runner_spec.rb +0 -31
- data/spec/wukong/wu_local_spec.rb +0 -125
@@ -0,0 +1,27 @@
|
|
1
|
+
module Wukong
|
2
|
+
|
3
|
+
# A module which can be included by other drivers which lets them
|
4
|
+
# use EventMachine under the hood.
|
5
|
+
module EventMachineDriver
|
6
|
+
|
7
|
+
include DriverMethods
|
8
|
+
|
9
|
+
# :nodoc:
|
10
|
+
def self.included klass
|
11
|
+
klass.class_eval do
|
12
|
+
def self.add_signal_traps
|
13
|
+
Signal.trap('INT') { log.info 'Received SIGINT. Stopping.' ; EM.stop }
|
14
|
+
Signal.trap('TERM') { log.info 'Received SIGTERM. Stopping.' ; EM.stop }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# :nodoc:
|
20
|
+
def initialize(label, settings)
|
21
|
+
super
|
22
|
+
@settings = settings
|
23
|
+
@dataflow = construct_dataflow(label, settings)
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require_relative 'stdio_driver'
|
2
|
+
require_relative 'tcp_driver'
|
3
|
+
|
4
|
+
module Wukong
|
5
|
+
module Local
|
6
|
+
|
7
|
+
# Implements the Runner for wu-local.
|
8
|
+
class LocalRunner < Wukong::Runner
|
9
|
+
|
10
|
+
include Wukong::Logging
|
11
|
+
|
12
|
+
usage "PROCESSOR|FLOW"
|
13
|
+
|
14
|
+
description <<-EOF.gsub(/^ {8}/, '')
|
15
|
+
wu-local is a tool for running Wukong processors and flows locally on
|
16
|
+
the command-line. Use wu-local by passing it a processor and feeding
|
17
|
+
in some data:
|
18
|
+
|
19
|
+
$ echo 'UNIX is Clever and Fun...' | wu-local tokenizer.rb
|
20
|
+
UNIX
|
21
|
+
is
|
22
|
+
Clever
|
23
|
+
and
|
24
|
+
Fun
|
25
|
+
|
26
|
+
If your processors have named fields you can pass them in as
|
27
|
+
arguments:
|
28
|
+
|
29
|
+
$ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4
|
30
|
+
UNIX
|
31
|
+
Clever
|
32
|
+
|
33
|
+
You can chain processors and calls to wu-local together:
|
34
|
+
|
35
|
+
$ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4 | wu-local downcaser.rb
|
36
|
+
unix
|
37
|
+
clever
|
38
|
+
|
39
|
+
Which is a good way to develop a combined data flow which you can
|
40
|
+
again test locally:
|
41
|
+
|
42
|
+
$ echo 'UNIX is clever and fun...' | wu-local tokenize_and_downcase_big_words.rb
|
43
|
+
unix
|
44
|
+
clever
|
45
|
+
EOF
|
46
|
+
|
47
|
+
# Returns the name of the processor we're going to run.
|
48
|
+
#
|
49
|
+
# @return [String]
|
50
|
+
def processor
|
51
|
+
arg = args.first
|
52
|
+
basename = File.basename(arg.to_s, '.rb')
|
53
|
+
|
54
|
+
case
|
55
|
+
when settings[:run] then settings[:run]
|
56
|
+
when arg && File.exist?(arg) then basename
|
57
|
+
else arg
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Validates the chosen processor.
|
62
|
+
#
|
63
|
+
# @raise [Wukong::Error] if it finds a problem
|
64
|
+
# @return [true]
|
65
|
+
def validate
|
66
|
+
raise Error.new("Must provide a processor or dataflow to run, via either the --run option or as the first argument") if processor.nil? || processor.empty?
|
67
|
+
raise Error.new("No such processor or dataflow <#{processor}>") unless registered?(processor)
|
68
|
+
true
|
69
|
+
end
|
70
|
+
|
71
|
+
# Adds a customized help message built from the Processor
|
72
|
+
# # itself.
|
73
|
+
def setup
|
74
|
+
super()
|
75
|
+
dataflow_class_for(processor).configure(settings) if processor?(processor)
|
76
|
+
end
|
77
|
+
|
78
|
+
# Runs either the StdioDriver or the TCPDriver, depending on
|
79
|
+
# what settings were passed.
|
80
|
+
def run
|
81
|
+
EM.run do
|
82
|
+
driver.start(processor, settings)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# The driver this Runner will use.
|
87
|
+
#
|
88
|
+
# Defaults to the Wukong::Local::StdioDriver, but will use the
|
89
|
+
# TcpDriver if it has a :port setting defined.
|
90
|
+
#
|
91
|
+
# @return [Wukong::Local::TCPDriver, Wukong::Local::StdioDriver]
|
92
|
+
def driver
|
93
|
+
(settings[:tcp_port] ? TCPDriver : StdioDriver)
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require_relative('event_machine_driver')
|
2
|
+
module Wukong
|
3
|
+
module Local
|
4
|
+
|
5
|
+
# A class for driving processors over the STDIN/STDOUT protocol.
|
6
|
+
class StdioDriver < EM::P::LineAndTextProtocol
|
7
|
+
include EventMachineDriver
|
8
|
+
include Processor::StdoutProcessor
|
9
|
+
include Logging
|
10
|
+
|
11
|
+
def self.start(label, settings = {})
|
12
|
+
EM.attach($stdin, self, label, settings)
|
13
|
+
end
|
14
|
+
|
15
|
+
def post_init
|
16
|
+
self.class.add_signal_traps
|
17
|
+
setup_dataflow
|
18
|
+
end
|
19
|
+
|
20
|
+
def receive_line line
|
21
|
+
driver.send_through_dataflow(line)
|
22
|
+
rescue => e
|
23
|
+
error = Wukong::Error.new(e)
|
24
|
+
EM.stop
|
25
|
+
|
26
|
+
# We'd to *raise* `error` here and have it be handled by
|
27
|
+
# Wukong::Runner.run but we are fighting with EventMachine.
|
28
|
+
# It seems no matter what we do, EventMachine will swallow any
|
29
|
+
# Exception raised here (including SystemExit) and exit the
|
30
|
+
# Ruby process with a return code of 0.
|
31
|
+
#
|
32
|
+
# Instead we just log the message that *would* have gotten
|
33
|
+
# logged by Wukong::Runner.run and leave it to EventMachine to
|
34
|
+
# exit very unnaturally.
|
35
|
+
log.error(error.message)
|
36
|
+
end
|
37
|
+
|
38
|
+
def unbind
|
39
|
+
finalize_and_stop_dataflow
|
40
|
+
EM.stop
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require_relative('event_machine_driver')
|
2
|
+
module Wukong
|
3
|
+
module Local
|
4
|
+
|
5
|
+
# A class for driving processors over a TCP protocol.
|
6
|
+
class TCPDriver < EM::P::LineAndTextProtocol
|
7
|
+
include EventMachineDriver
|
8
|
+
include Processor::BufferedProcessor
|
9
|
+
include Logging
|
10
|
+
|
11
|
+
def self.start(label, settings = {})
|
12
|
+
host = (settings[:host] || Socket.gethostname) rescue 'localhost'
|
13
|
+
port = (settings[:port] || 9000).to_i rescue 9000
|
14
|
+
EM.start_server(host, port, self, label, settings)
|
15
|
+
log.info "Server started on #{host} on port #{port}"
|
16
|
+
add_signal_traps
|
17
|
+
end
|
18
|
+
|
19
|
+
def post_init
|
20
|
+
port, ip = Socket.unpack_sockaddr_in(get_peername)
|
21
|
+
log.info "Connected to #{ip} on #{port}"
|
22
|
+
setup_dataflow
|
23
|
+
end
|
24
|
+
|
25
|
+
def receive_line line
|
26
|
+
@buffer = []
|
27
|
+
operation = proc { driver.send_through_dataflow(line) }
|
28
|
+
callback = proc { flush_buffer @buffer }
|
29
|
+
EM.defer(operation, callback)
|
30
|
+
rescue => e
|
31
|
+
EM.stop
|
32
|
+
raise Wukong::Error.new(e)
|
33
|
+
end
|
34
|
+
|
35
|
+
def flush_buffer records
|
36
|
+
send_data(records.join("\n") + "\n")
|
37
|
+
records.clear
|
38
|
+
end
|
39
|
+
|
40
|
+
def unbind
|
41
|
+
finalize_and_stop_dataflow
|
42
|
+
EM.stop
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/lib/wukong/logger.rb
CHANGED
@@ -1,10 +1,19 @@
|
|
1
|
-
module Wukong
|
1
|
+
module Wukong
|
2
|
+
|
2
3
|
class LogFactory
|
3
4
|
|
4
5
|
attr_reader :created_log
|
5
6
|
|
6
|
-
def self.
|
7
|
-
Log4r::StderrOutputter.new('console', formatter:
|
7
|
+
def self.default_outputter klass
|
8
|
+
Log4r::StderrOutputter.new('console', formatter: default_formatter(klass))
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.default_formatter klass
|
12
|
+
Log4r::PatternFormatter.new(pattern: default_pattern(klass))
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.default_pattern klass
|
16
|
+
"%l %d [%-20c] -- %m"
|
8
17
|
end
|
9
18
|
|
10
19
|
def self.configure(klass, options = {})
|
@@ -14,7 +23,7 @@ module Wukong
|
|
14
23
|
|
15
24
|
def initialize(logger, config)
|
16
25
|
@created_log = logger.is_a?(Log4r::Logger) ? logger : Log4r::Logger.new(logger.to_s)
|
17
|
-
outputter(LogFactory.
|
26
|
+
outputter(LogFactory.default_outputter(logger)) unless ancestry_has_outputter?(@created_log)
|
18
27
|
apply_options(config)
|
19
28
|
end
|
20
29
|
|
@@ -33,7 +42,7 @@ module Wukong
|
|
33
42
|
begin
|
34
43
|
send(option, value)
|
35
44
|
rescue
|
36
|
-
raise "
|
45
|
+
raise Error.new("Error setting option <#{option}> to value <#{value}>")
|
37
46
|
end
|
38
47
|
end
|
39
48
|
end
|
@@ -51,7 +60,7 @@ module Wukong
|
|
51
60
|
debug: Log4r::DEBUG,
|
52
61
|
info: Log4r::INFO,
|
53
62
|
warn: Log4r::WARN
|
54
|
-
}.fetch(lvl){ raise "
|
63
|
+
}.fetch(lvl){ raise Error.new("Invalid log level: <#{lvl}>") }
|
55
64
|
end
|
56
65
|
|
57
66
|
def pattern ptrn
|
@@ -75,7 +84,7 @@ module Wukong
|
|
75
84
|
def self.included klass
|
76
85
|
if klass.ancestors.include?(Gorillib::Model)
|
77
86
|
klass.class_eval do
|
78
|
-
field(:log, Whatever, :default => ->{ Wukong::LogFactory.configure(self.class) })
|
87
|
+
field(:log, Whatever, :default => ->{ Wukong::LogFactory.configure(self.class) }, :doc => "Shared Wukong logger")
|
79
88
|
|
80
89
|
def receive_log params
|
81
90
|
@log = LogFactory.configure(self.class, params)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Wukong
|
2
|
+
|
3
|
+
# An array of known plugins.
|
4
|
+
PLUGINS = []
|
5
|
+
|
6
|
+
# Asks each loaded plugin to configure the given +settings+ for the
|
7
|
+
# given +program_name+.
|
8
|
+
#
|
9
|
+
# @param [Configliere::Param] settings the settings to be configured by each plugin
|
10
|
+
# @param [String] program_name the name of the currently executing program
|
11
|
+
def self.configure_plugins(settings, program_name)
|
12
|
+
PLUGINS.each do |plugin|
|
13
|
+
plugin.configure(settings, program_name) if plugin.respond_to?(:configure)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Asks each loaded plugin to boot itself from the given +settings+
|
18
|
+
# in the given +root+ directory.
|
19
|
+
#
|
20
|
+
# @param [Configliere::Param] settings the settings for each plugin to boot from
|
21
|
+
# @param [String] root the root directory the plugins are booting in
|
22
|
+
def self.boot_plugins(settings, root)
|
23
|
+
PLUGINS.each do |plugin|
|
24
|
+
plugin.boot(settings, root) if plugin.respond_to?(:boot)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Include this module in your own class or module to have it
|
29
|
+
# register itself as a Wukong plugin.
|
30
|
+
#
|
31
|
+
# Your class or module must define the following methods:
|
32
|
+
#
|
33
|
+
# * `configure` called with a (pre-resolved) Configliere::Param argument and the basename of the running program
|
34
|
+
# * `boot` called with a (resolved) Configliere::Param argument and the current working directory of the running program, reacts to any settings as necessary
|
35
|
+
#
|
36
|
+
# Subclasses of Wukong::Runner will automatically load and boot each
|
37
|
+
# plugin.
|
38
|
+
module Plugin
|
39
|
+
# :nodoc:
|
40
|
+
def self.included mod
|
41
|
+
PLUGINS << mod unless PLUGINS.include?(mod)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
|
data/lib/wukong/processor.rb
CHANGED
@@ -15,15 +15,12 @@ module Wukong
|
|
15
15
|
include Logging
|
16
16
|
include Vayacondios::Notifications
|
17
17
|
|
18
|
-
field :action,
|
18
|
+
field :action, Whatever, :doc => false
|
19
19
|
|
20
20
|
class << self
|
21
21
|
|
22
|
-
def
|
23
|
-
@description = desc
|
24
|
-
end
|
25
|
-
|
26
|
-
def description
|
22
|
+
def description desc=nil
|
23
|
+
@description = desc if desc
|
27
24
|
@description
|
28
25
|
end
|
29
26
|
|
@@ -47,6 +44,29 @@ module Wukong
|
|
47
44
|
instance_variable_set("@serialization_#{direction}", label) if %w[ tsv json xml ].include?(label.to_s)
|
48
45
|
end
|
49
46
|
|
47
|
+
def configure(settings)
|
48
|
+
settings.description = description if description
|
49
|
+
fields.each_pair do |name, field|
|
50
|
+
next if field.doc == false || field.doc.to_s == 'false'
|
51
|
+
next if [:log, :notifier].include?(name)
|
52
|
+
field_props = {}.tap do |props|
|
53
|
+
props[:description] = field.doc unless field.doc == "#{name} field"
|
54
|
+
field_type = (field.type.respond_to?(:product) ? field.type.product : field.type)
|
55
|
+
configliere_type = case field_type
|
56
|
+
when String then nil
|
57
|
+
when TrueClass, FalseClass then :boolean
|
58
|
+
else field_type
|
59
|
+
end
|
60
|
+
|
61
|
+
props[:type] = configliere_type if configliere_type
|
62
|
+
props[:default] = field.default if field.default
|
63
|
+
end
|
64
|
+
existing_value = settings[name]
|
65
|
+
settings.define(name, field_props)
|
66
|
+
settings[name] = existing_value unless existing_value.nil?
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
50
70
|
end
|
51
71
|
|
52
72
|
def expected_record_type(type)
|
@@ -57,21 +77,38 @@ module Wukong
|
|
57
77
|
self.class.instance_variable_get("@serialization_#{direction.to_s}")
|
58
78
|
end
|
59
79
|
|
60
|
-
#
|
61
|
-
|
80
|
+
# When instantiated with a block, the block will replace this
|
81
|
+
# method.
|
82
|
+
#
|
83
|
+
# @param [Array<Object>] args
|
84
|
+
# @yield record a record that might be yielded by the block
|
85
|
+
# @yieldparam [Object] record the yielded record
|
86
|
+
def perform_action(*args)
|
87
|
+
end
|
62
88
|
|
63
|
-
#
|
89
|
+
# :nodoc:
|
90
|
+
#
|
91
|
+
# The action attribute is turned into the perform action method.
|
92
|
+
#
|
93
|
+
# @param [Proc] action
|
64
94
|
def receive_action(action)
|
65
95
|
self.define_singleton_method(:perform_action, &action)
|
66
96
|
end
|
67
97
|
|
68
|
-
# This method is called after the processor class has been
|
69
|
-
# but before any records are given to it to process
|
98
|
+
# This method is called after the processor class has been
|
99
|
+
# instantiated but before any records are given to it to process.
|
100
|
+
#
|
101
|
+
# Override this method in your subclass.
|
70
102
|
def setup
|
71
103
|
end
|
72
104
|
|
73
|
-
# This method is called once per record
|
74
|
-
#
|
105
|
+
# This method is called once per record.
|
106
|
+
#
|
107
|
+
# Override this method in your subclass.
|
108
|
+
#
|
109
|
+
# @param [Object] record
|
110
|
+
# @yield record the record you want to yield
|
111
|
+
# @yieldparam [Object] record the yielded record
|
75
112
|
def process(record, &emit)
|
76
113
|
yield record
|
77
114
|
end
|
@@ -83,13 +120,18 @@ module Wukong
|
|
83
120
|
# This can be used within an aggregating processor (like a reducer
|
84
121
|
# in a map/reduce job) to start processing the final aggregate of
|
85
122
|
# records since the "last record" has already been received.
|
123
|
+
#
|
124
|
+
# Override this method in your subclass
|
125
|
+
#
|
126
|
+
# @yield record the record you want to yield
|
127
|
+
# @yieldparam [Object] record the yielded record
|
86
128
|
def finalize
|
87
129
|
end
|
88
130
|
|
89
131
|
# This method is called after all records have been passed. It
|
90
132
|
# signals that processing should stop.
|
91
|
-
|
92
|
-
#
|
133
|
+
#
|
134
|
+
# Override this method in your subclass.
|
93
135
|
def stop
|
94
136
|
end
|
95
137
|
|