wukong 3.0.0.pre3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/Gemfile +1 -0
  2. data/README.md +689 -50
  3. data/bin/wu-local +1 -74
  4. data/diagrams/wu_local.dot +39 -0
  5. data/diagrams/wu_local.dot.png +0 -0
  6. data/examples/loadable.rb +2 -0
  7. data/examples/string_reverser.rb +7 -0
  8. data/lib/hanuman/stage.rb +2 -2
  9. data/lib/wukong.rb +21 -10
  10. data/lib/wukong/dataflow.rb +2 -5
  11. data/lib/wukong/doc_helpers.rb +14 -0
  12. data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
  13. data/lib/wukong/doc_helpers/field_handler.rb +91 -0
  14. data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
  15. data/lib/wukong/driver.rb +11 -1
  16. data/lib/wukong/local.rb +40 -0
  17. data/lib/wukong/local/event_machine_driver.rb +27 -0
  18. data/lib/wukong/local/runner.rb +98 -0
  19. data/lib/wukong/local/stdio_driver.rb +44 -0
  20. data/lib/wukong/local/tcp_driver.rb +47 -0
  21. data/lib/wukong/logger.rb +16 -7
  22. data/lib/wukong/plugin.rb +48 -0
  23. data/lib/wukong/processor.rb +57 -15
  24. data/lib/wukong/rake_helper.rb +6 -0
  25. data/lib/wukong/runner.rb +151 -128
  26. data/lib/wukong/runner/boot_sequence.rb +123 -0
  27. data/lib/wukong/runner/code_loader.rb +52 -0
  28. data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
  29. data/lib/wukong/runner/help_message.rb +42 -0
  30. data/lib/wukong/spec_helpers.rb +4 -12
  31. data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
  32. data/lib/wukong/spec_helpers/{integration_driver_matchers.rb → integration_tests/integration_test_matchers.rb} +28 -62
  33. data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
  34. data/lib/wukong/spec_helpers/shared_examples.rb +19 -10
  35. data/lib/wukong/spec_helpers/unit_tests.rb +134 -0
  36. data/lib/wukong/spec_helpers/{processor_methods.rb → unit_tests/unit_test_driver.rb} +42 -8
  37. data/lib/wukong/spec_helpers/{spec_driver_matchers.rb → unit_tests/unit_test_matchers.rb} +6 -32
  38. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +54 -0
  39. data/lib/wukong/version.rb +1 -1
  40. data/lib/wukong/widget/filters.rb +134 -8
  41. data/lib/wukong/widget/processors.rb +64 -5
  42. data/lib/wukong/widget/reducers/bin.rb +68 -18
  43. data/lib/wukong/widget/reducers/count.rb +12 -0
  44. data/lib/wukong/widget/reducers/group.rb +48 -5
  45. data/lib/wukong/widget/reducers/group_concat.rb +30 -2
  46. data/lib/wukong/widget/reducers/moments.rb +4 -4
  47. data/lib/wukong/widget/reducers/sort.rb +53 -3
  48. data/lib/wukong/widget/serializers.rb +37 -12
  49. data/lib/wukong/widget/utils.rb +1 -1
  50. data/spec/spec_helper.rb +20 -2
  51. data/spec/wukong/driver_spec.rb +2 -0
  52. data/spec/wukong/local/runner_spec.rb +40 -0
  53. data/spec/wukong/local_spec.rb +6 -0
  54. data/spec/wukong/logger_spec.rb +49 -0
  55. data/spec/wukong/processor_spec.rb +22 -0
  56. data/spec/wukong/runner_spec.rb +128 -8
  57. data/spec/wukong/widget/filters_spec.rb +28 -10
  58. data/spec/wukong/widget/processors_spec.rb +5 -5
  59. data/spec/wukong/widget/reducers/bin_spec.rb +14 -14
  60. data/spec/wukong/widget/reducers/count_spec.rb +1 -1
  61. data/spec/wukong/widget/reducers/group_spec.rb +7 -6
  62. data/spec/wukong/widget/reducers/moments_spec.rb +2 -2
  63. data/spec/wukong/widget/reducers/sort_spec.rb +1 -1
  64. data/spec/wukong/widget/serializers_spec.rb +84 -88
  65. data/spec/wukong/wu-local_spec.rb +109 -0
  66. metadata +43 -20
  67. data/bin/wu-server +0 -70
  68. data/lib/wukong/boot.rb +0 -96
  69. data/lib/wukong/configuration.rb +0 -8
  70. data/lib/wukong/emitter.rb +0 -22
  71. data/lib/wukong/server.rb +0 -119
  72. data/lib/wukong/spec_helpers/integration_driver.rb +0 -157
  73. data/lib/wukong/spec_helpers/processor_helpers.rb +0 -89
  74. data/lib/wukong/spec_helpers/spec_driver.rb +0 -28
  75. data/spec/wukong/local_runner_spec.rb +0 -31
  76. data/spec/wukong/wu_local_spec.rb +0 -125
@@ -0,0 +1,27 @@
1
+ module Wukong
2
+
3
+ # A module which can be included by other drivers which lets them
4
+ # use EventMachine under the hood.
5
+ module EventMachineDriver
6
+
7
+ include DriverMethods
8
+
9
+ # :nodoc:
10
+ def self.included klass
11
+ klass.class_eval do
12
+ def self.add_signal_traps
13
+ Signal.trap('INT') { log.info 'Received SIGINT. Stopping.' ; EM.stop }
14
+ Signal.trap('TERM') { log.info 'Received SIGTERM. Stopping.' ; EM.stop }
15
+ end
16
+ end
17
+ end
18
+
19
+ # :nodoc:
20
+ def initialize(label, settings)
21
+ super
22
+ @settings = settings
23
+ @dataflow = construct_dataflow(label, settings)
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,98 @@
1
+ require_relative 'stdio_driver'
2
+ require_relative 'tcp_driver'
3
+
4
+ module Wukong
5
+ module Local
6
+
7
+ # Implements the Runner for wu-local.
8
+ class LocalRunner < Wukong::Runner
9
+
10
+ include Wukong::Logging
11
+
12
+ usage "PROCESSOR|FLOW"
13
+
14
+ description <<-EOF.gsub(/^ {8}/, '')
15
+ wu-local is a tool for running Wukong processors and flows locally on
16
+ the command-line. Use wu-local by passing it a processor and feeding
17
+ in some data:
18
+
19
+ $ echo 'UNIX is Clever and Fun...' | wu-local tokenizer.rb
20
+ UNIX
21
+ is
22
+ Clever
23
+ and
24
+ Fun
25
+
26
+ If your processors have named fields you can pass them in as
27
+ arguments:
28
+
29
+ $ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4
30
+ UNIX
31
+ Clever
32
+
33
+ You can chain processors and calls to wu-local together:
34
+
35
+ $ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4 | wu-local downcaser.rb
36
+ unix
37
+ clever
38
+
39
+ Which is a good way to develop a combined data flow which you can
40
+ again test locally:
41
+
42
+ $ echo 'UNIX is clever and fun...' | wu-local tokenize_and_downcase_big_words.rb
43
+ unix
44
+ clever
45
+ EOF
46
+
47
+ # Returns the name of the processor we're going to run.
48
+ #
49
+ # @return [String]
50
+ def processor
51
+ arg = args.first
52
+ basename = File.basename(arg.to_s, '.rb')
53
+
54
+ case
55
+ when settings[:run] then settings[:run]
56
+ when arg && File.exist?(arg) then basename
57
+ else arg
58
+ end
59
+ end
60
+
61
+ # Validates the chosen processor.
62
+ #
63
+ # @raise [Wukong::Error] if it finds a problem
64
+ # @return [true]
65
+ def validate
66
+ raise Error.new("Must provide a processor or dataflow to run, via either the --run option or as the first argument") if processor.nil? || processor.empty?
67
+ raise Error.new("No such processor or dataflow <#{processor}>") unless registered?(processor)
68
+ true
69
+ end
70
+
71
+ # Adds a customized help message built from the Processor
72
+ # # itself.
73
+ def setup
74
+ super()
75
+ dataflow_class_for(processor).configure(settings) if processor?(processor)
76
+ end
77
+
78
+ # Runs either the StdioDriver or the TCPDriver, depending on
79
+ # what settings were passed.
80
+ def run
81
+ EM.run do
82
+ driver.start(processor, settings)
83
+ end
84
+ end
85
+
86
+ # The driver this Runner will use.
87
+ #
88
+ # Defaults to the Wukong::Local::StdioDriver, but will use the
89
+ # TcpDriver if it has a :port setting defined.
90
+ #
91
+ # @return [Wukong::Local::TCPDriver, Wukong::Local::StdioDriver]
92
+ def driver
93
+ (settings[:tcp_port] ? TCPDriver : StdioDriver)
94
+ end
95
+
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,44 @@
1
+ require_relative('event_machine_driver')
2
+ module Wukong
3
+ module Local
4
+
5
+ # A class for driving processors over the STDIN/STDOUT protocol.
6
+ class StdioDriver < EM::P::LineAndTextProtocol
7
+ include EventMachineDriver
8
+ include Processor::StdoutProcessor
9
+ include Logging
10
+
11
+ def self.start(label, settings = {})
12
+ EM.attach($stdin, self, label, settings)
13
+ end
14
+
15
+ def post_init
16
+ self.class.add_signal_traps
17
+ setup_dataflow
18
+ end
19
+
20
+ def receive_line line
21
+ driver.send_through_dataflow(line)
22
+ rescue => e
23
+ error = Wukong::Error.new(e)
24
+ EM.stop
25
+
26
+ # We'd to *raise* `error` here and have it be handled by
27
+ # Wukong::Runner.run but we are fighting with EventMachine.
28
+ # It seems no matter what we do, EventMachine will swallow any
29
+ # Exception raised here (including SystemExit) and exit the
30
+ # Ruby process with a return code of 0.
31
+ #
32
+ # Instead we just log the message that *would* have gotten
33
+ # logged by Wukong::Runner.run and leave it to EventMachine to
34
+ # exit very unnaturally.
35
+ log.error(error.message)
36
+ end
37
+
38
+ def unbind
39
+ finalize_and_stop_dataflow
40
+ EM.stop
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,47 @@
1
+ require_relative('event_machine_driver')
2
+ module Wukong
3
+ module Local
4
+
5
+ # A class for driving processors over a TCP protocol.
6
+ class TCPDriver < EM::P::LineAndTextProtocol
7
+ include EventMachineDriver
8
+ include Processor::BufferedProcessor
9
+ include Logging
10
+
11
+ def self.start(label, settings = {})
12
+ host = (settings[:host] || Socket.gethostname) rescue 'localhost'
13
+ port = (settings[:port] || 9000).to_i rescue 9000
14
+ EM.start_server(host, port, self, label, settings)
15
+ log.info "Server started on #{host} on port #{port}"
16
+ add_signal_traps
17
+ end
18
+
19
+ def post_init
20
+ port, ip = Socket.unpack_sockaddr_in(get_peername)
21
+ log.info "Connected to #{ip} on #{port}"
22
+ setup_dataflow
23
+ end
24
+
25
+ def receive_line line
26
+ @buffer = []
27
+ operation = proc { driver.send_through_dataflow(line) }
28
+ callback = proc { flush_buffer @buffer }
29
+ EM.defer(operation, callback)
30
+ rescue => e
31
+ EM.stop
32
+ raise Wukong::Error.new(e)
33
+ end
34
+
35
+ def flush_buffer records
36
+ send_data(records.join("\n") + "\n")
37
+ records.clear
38
+ end
39
+
40
+ def unbind
41
+ finalize_and_stop_dataflow
42
+ EM.stop
43
+ end
44
+
45
+ end
46
+ end
47
+ end
@@ -1,10 +1,19 @@
1
- module Wukong
1
+ module Wukong
2
+
2
3
  class LogFactory
3
4
 
4
5
  attr_reader :created_log
5
6
 
6
- def self.defaults
7
- Log4r::StderrOutputter.new('console', formatter: Log4r::PatternFormatter.new(pattern: "%l %d [%-20c] -- %m"))
7
+ def self.default_outputter klass
8
+ Log4r::StderrOutputter.new('console', formatter: default_formatter(klass))
9
+ end
10
+
11
+ def self.default_formatter klass
12
+ Log4r::PatternFormatter.new(pattern: default_pattern(klass))
13
+ end
14
+
15
+ def self.default_pattern klass
16
+ "%l %d [%-20c] -- %m"
8
17
  end
9
18
 
10
19
  def self.configure(klass, options = {})
@@ -14,7 +23,7 @@ module Wukong
14
23
 
15
24
  def initialize(logger, config)
16
25
  @created_log = logger.is_a?(Log4r::Logger) ? logger : Log4r::Logger.new(logger.to_s)
17
- outputter(LogFactory.defaults) unless ancestry_has_outputter?(@created_log)
26
+ outputter(LogFactory.default_outputter(logger)) unless ancestry_has_outputter?(@created_log)
18
27
  apply_options(config)
19
28
  end
20
29
 
@@ -33,7 +42,7 @@ module Wukong
33
42
  begin
34
43
  send(option, value)
35
44
  rescue
36
- raise "invalid log option"
45
+ raise Error.new("Error setting option <#{option}> to value <#{value}>")
37
46
  end
38
47
  end
39
48
  end
@@ -51,7 +60,7 @@ module Wukong
51
60
  debug: Log4r::DEBUG,
52
61
  info: Log4r::INFO,
53
62
  warn: Log4r::WARN
54
- }.fetch(lvl){ raise "invalid log level" }
63
+ }.fetch(lvl){ raise Error.new("Invalid log level: <#{lvl}>") }
55
64
  end
56
65
 
57
66
  def pattern ptrn
@@ -75,7 +84,7 @@ module Wukong
75
84
  def self.included klass
76
85
  if klass.ancestors.include?(Gorillib::Model)
77
86
  klass.class_eval do
78
- field(:log, Whatever, :default => ->{ Wukong::LogFactory.configure(self.class) })
87
+ field(:log, Whatever, :default => ->{ Wukong::LogFactory.configure(self.class) }, :doc => "Shared Wukong logger")
79
88
 
80
89
  def receive_log params
81
90
  @log = LogFactory.configure(self.class, params)
@@ -0,0 +1,48 @@
1
+ module Wukong
2
+
3
+ # An array of known plugins.
4
+ PLUGINS = []
5
+
6
+ # Asks each loaded plugin to configure the given +settings+ for the
7
+ # given +program_name+.
8
+ #
9
+ # @param [Configliere::Param] settings the settings to be configured by each plugin
10
+ # @param [String] program_name the name of the currently executing program
11
+ def self.configure_plugins(settings, program_name)
12
+ PLUGINS.each do |plugin|
13
+ plugin.configure(settings, program_name) if plugin.respond_to?(:configure)
14
+ end
15
+ end
16
+
17
+ # Asks each loaded plugin to boot itself from the given +settings+
18
+ # in the given +root+ directory.
19
+ #
20
+ # @param [Configliere::Param] settings the settings for each plugin to boot from
21
+ # @param [String] root the root directory the plugins are booting in
22
+ def self.boot_plugins(settings, root)
23
+ PLUGINS.each do |plugin|
24
+ plugin.boot(settings, root) if plugin.respond_to?(:boot)
25
+ end
26
+ end
27
+
28
+ # Include this module in your own class or module to have it
29
+ # register itself as a Wukong plugin.
30
+ #
31
+ # Your class or module must define the following methods:
32
+ #
33
+ # * `configure` called with a (pre-resolved) Configliere::Param argument and the basename of the running program
34
+ # * `boot` called with a (resolved) Configliere::Param argument and the current working directory of the running program, reacts to any settings as necessary
35
+ #
36
+ # Subclasses of Wukong::Runner will automatically load and boot each
37
+ # plugin.
38
+ module Plugin
39
+ # :nodoc:
40
+ def self.included mod
41
+ PLUGINS << mod unless PLUGINS.include?(mod)
42
+ end
43
+ end
44
+
45
+ end
46
+
47
+
48
+
@@ -15,15 +15,12 @@ module Wukong
15
15
  include Logging
16
16
  include Vayacondios::Notifications
17
17
 
18
- field :action, Whatever
18
+ field :action, Whatever, :doc => false
19
19
 
20
20
  class << self
21
21
 
22
- def describe desc
23
- @description = desc
24
- end
25
-
26
- def description
22
+ def description desc=nil
23
+ @description = desc if desc
27
24
  @description
28
25
  end
29
26
 
@@ -47,6 +44,29 @@ module Wukong
47
44
  instance_variable_set("@serialization_#{direction}", label) if %w[ tsv json xml ].include?(label.to_s)
48
45
  end
49
46
 
47
+ def configure(settings)
48
+ settings.description = description if description
49
+ fields.each_pair do |name, field|
50
+ next if field.doc == false || field.doc.to_s == 'false'
51
+ next if [:log, :notifier].include?(name)
52
+ field_props = {}.tap do |props|
53
+ props[:description] = field.doc unless field.doc == "#{name} field"
54
+ field_type = (field.type.respond_to?(:product) ? field.type.product : field.type)
55
+ configliere_type = case field_type
56
+ when String then nil
57
+ when TrueClass, FalseClass then :boolean
58
+ else field_type
59
+ end
60
+
61
+ props[:type] = configliere_type if configliere_type
62
+ props[:default] = field.default if field.default
63
+ end
64
+ existing_value = settings[name]
65
+ settings.define(name, field_props)
66
+ settings[name] = existing_value unless existing_value.nil?
67
+ end
68
+ end
69
+
50
70
  end
51
71
 
52
72
  def expected_record_type(type)
@@ -57,21 +77,38 @@ module Wukong
57
77
  self.class.instance_variable_get("@serialization_#{direction.to_s}")
58
78
  end
59
79
 
60
- # This is a placeholder method intended to be overridden
61
- def perform_action(*args) ; end
80
+ # When instantiated with a block, the block will replace this
81
+ # method.
82
+ #
83
+ # @param [Array<Object>] args
84
+ # @yield record a record that might be yielded by the block
85
+ # @yieldparam [Object] record the yielded record
86
+ def perform_action(*args)
87
+ end
62
88
 
63
- # The action attribute is turned into the perform action method
89
+ # :nodoc:
90
+ #
91
+ # The action attribute is turned into the perform action method.
92
+ #
93
+ # @param [Proc] action
64
94
  def receive_action(action)
65
95
  self.define_singleton_method(:perform_action, &action)
66
96
  end
67
97
 
68
- # This method is called after the processor class has been instantiated
69
- # but before any records are given to it to process
98
+ # This method is called after the processor class has been
99
+ # instantiated but before any records are given to it to process.
100
+ #
101
+ # Override this method in your subclass.
70
102
  def setup
71
103
  end
72
104
 
73
- # This method is called once per record
74
- # Override this in your subclass
105
+ # This method is called once per record.
106
+ #
107
+ # Override this method in your subclass.
108
+ #
109
+ # @param [Object] record
110
+ # @yield record the record you want to yield
111
+ # @yieldparam [Object] record the yielded record
75
112
  def process(record, &emit)
76
113
  yield record
77
114
  end
@@ -83,13 +120,18 @@ module Wukong
83
120
  # This can be used within an aggregating processor (like a reducer
84
121
  # in a map/reduce job) to start processing the final aggregate of
85
122
  # records since the "last record" has already been received.
123
+ #
124
+ # Override this method in your subclass
125
+ #
126
+ # @yield record the record you want to yield
127
+ # @yieldparam [Object] record the yielded record
86
128
  def finalize
87
129
  end
88
130
 
89
131
  # This method is called after all records have been passed. It
90
132
  # signals that processing should stop.
91
-
92
- # This method is called after all records have been processed
133
+ #
134
+ # Override this method in your subclass.
93
135
  def stop
94
136
  end
95
137
 
@@ -0,0 +1,6 @@
1
+ require 'rake'
2
+ require 'wukong'
3
+
4
+ task :environment => [] do
5
+ Wukong::Runner.run
6
+ end