wukong 3.0.0.pre3 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/Gemfile +1 -0
  2. data/README.md +689 -50
  3. data/bin/wu-local +1 -74
  4. data/diagrams/wu_local.dot +39 -0
  5. data/diagrams/wu_local.dot.png +0 -0
  6. data/examples/loadable.rb +2 -0
  7. data/examples/string_reverser.rb +7 -0
  8. data/lib/hanuman/stage.rb +2 -2
  9. data/lib/wukong.rb +21 -10
  10. data/lib/wukong/dataflow.rb +2 -5
  11. data/lib/wukong/doc_helpers.rb +14 -0
  12. data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
  13. data/lib/wukong/doc_helpers/field_handler.rb +91 -0
  14. data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
  15. data/lib/wukong/driver.rb +11 -1
  16. data/lib/wukong/local.rb +40 -0
  17. data/lib/wukong/local/event_machine_driver.rb +27 -0
  18. data/lib/wukong/local/runner.rb +98 -0
  19. data/lib/wukong/local/stdio_driver.rb +44 -0
  20. data/lib/wukong/local/tcp_driver.rb +47 -0
  21. data/lib/wukong/logger.rb +16 -7
  22. data/lib/wukong/plugin.rb +48 -0
  23. data/lib/wukong/processor.rb +57 -15
  24. data/lib/wukong/rake_helper.rb +6 -0
  25. data/lib/wukong/runner.rb +151 -128
  26. data/lib/wukong/runner/boot_sequence.rb +123 -0
  27. data/lib/wukong/runner/code_loader.rb +52 -0
  28. data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
  29. data/lib/wukong/runner/help_message.rb +42 -0
  30. data/lib/wukong/spec_helpers.rb +4 -12
  31. data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
  32. data/lib/wukong/spec_helpers/{integration_driver_matchers.rb → integration_tests/integration_test_matchers.rb} +28 -62
  33. data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
  34. data/lib/wukong/spec_helpers/shared_examples.rb +19 -10
  35. data/lib/wukong/spec_helpers/unit_tests.rb +134 -0
  36. data/lib/wukong/spec_helpers/{processor_methods.rb → unit_tests/unit_test_driver.rb} +42 -8
  37. data/lib/wukong/spec_helpers/{spec_driver_matchers.rb → unit_tests/unit_test_matchers.rb} +6 -32
  38. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +54 -0
  39. data/lib/wukong/version.rb +1 -1
  40. data/lib/wukong/widget/filters.rb +134 -8
  41. data/lib/wukong/widget/processors.rb +64 -5
  42. data/lib/wukong/widget/reducers/bin.rb +68 -18
  43. data/lib/wukong/widget/reducers/count.rb +12 -0
  44. data/lib/wukong/widget/reducers/group.rb +48 -5
  45. data/lib/wukong/widget/reducers/group_concat.rb +30 -2
  46. data/lib/wukong/widget/reducers/moments.rb +4 -4
  47. data/lib/wukong/widget/reducers/sort.rb +53 -3
  48. data/lib/wukong/widget/serializers.rb +37 -12
  49. data/lib/wukong/widget/utils.rb +1 -1
  50. data/spec/spec_helper.rb +20 -2
  51. data/spec/wukong/driver_spec.rb +2 -0
  52. data/spec/wukong/local/runner_spec.rb +40 -0
  53. data/spec/wukong/local_spec.rb +6 -0
  54. data/spec/wukong/logger_spec.rb +49 -0
  55. data/spec/wukong/processor_spec.rb +22 -0
  56. data/spec/wukong/runner_spec.rb +128 -8
  57. data/spec/wukong/widget/filters_spec.rb +28 -10
  58. data/spec/wukong/widget/processors_spec.rb +5 -5
  59. data/spec/wukong/widget/reducers/bin_spec.rb +14 -14
  60. data/spec/wukong/widget/reducers/count_spec.rb +1 -1
  61. data/spec/wukong/widget/reducers/group_spec.rb +7 -6
  62. data/spec/wukong/widget/reducers/moments_spec.rb +2 -2
  63. data/spec/wukong/widget/reducers/sort_spec.rb +1 -1
  64. data/spec/wukong/widget/serializers_spec.rb +84 -88
  65. data/spec/wukong/wu-local_spec.rb +109 -0
  66. metadata +43 -20
  67. data/bin/wu-server +0 -70
  68. data/lib/wukong/boot.rb +0 -96
  69. data/lib/wukong/configuration.rb +0 -8
  70. data/lib/wukong/emitter.rb +0 -22
  71. data/lib/wukong/server.rb +0 -119
  72. data/lib/wukong/spec_helpers/integration_driver.rb +0 -157
  73. data/lib/wukong/spec_helpers/processor_helpers.rb +0 -89
  74. data/lib/wukong/spec_helpers/spec_driver.rb +0 -28
  75. data/spec/wukong/local_runner_spec.rb +0 -31
  76. data/spec/wukong/wu_local_spec.rb +0 -125
@@ -0,0 +1,27 @@
1
+ module Wukong
2
+
3
+ # A module which can be included by other drivers which lets them
4
+ # use EventMachine under the hood.
5
+ module EventMachineDriver
6
+
7
+ include DriverMethods
8
+
9
+ # :nodoc:
10
+ def self.included klass
11
+ klass.class_eval do
12
+ def self.add_signal_traps
13
+ Signal.trap('INT') { log.info 'Received SIGINT. Stopping.' ; EM.stop }
14
+ Signal.trap('TERM') { log.info 'Received SIGTERM. Stopping.' ; EM.stop }
15
+ end
16
+ end
17
+ end
18
+
19
+ # :nodoc:
20
+ def initialize(label, settings)
21
+ super
22
+ @settings = settings
23
+ @dataflow = construct_dataflow(label, settings)
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,98 @@
1
+ require_relative 'stdio_driver'
2
+ require_relative 'tcp_driver'
3
+
4
+ module Wukong
5
+ module Local
6
+
7
+ # Implements the Runner for wu-local.
8
+ class LocalRunner < Wukong::Runner
9
+
10
+ include Wukong::Logging
11
+
12
+ usage "PROCESSOR|FLOW"
13
+
14
+ description <<-EOF.gsub(/^ {8}/, '')
15
+ wu-local is a tool for running Wukong processors and flows locally on
16
+ the command-line. Use wu-local by passing it a processor and feeding
17
+ in some data:
18
+
19
+ $ echo 'UNIX is Clever and Fun...' | wu-local tokenizer.rb
20
+ UNIX
21
+ is
22
+ Clever
23
+ and
24
+ Fun
25
+
26
+ If your processors have named fields you can pass them in as
27
+ arguments:
28
+
29
+ $ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4
30
+ UNIX
31
+ Clever
32
+
33
+ You can chain processors and calls to wu-local together:
34
+
35
+ $ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4 | wu-local downcaser.rb
36
+ unix
37
+ clever
38
+
39
+ Which is a good way to develop a combined data flow which you can
40
+ again test locally:
41
+
42
+ $ echo 'UNIX is clever and fun...' | wu-local tokenize_and_downcase_big_words.rb
43
+ unix
44
+ clever
45
+ EOF
46
+
47
+ # Returns the name of the processor we're going to run.
48
+ #
49
+ # @return [String]
50
+ def processor
51
+ arg = args.first
52
+ basename = File.basename(arg.to_s, '.rb')
53
+
54
+ case
55
+ when settings[:run] then settings[:run]
56
+ when arg && File.exist?(arg) then basename
57
+ else arg
58
+ end
59
+ end
60
+
61
+ # Validates the chosen processor.
62
+ #
63
+ # @raise [Wukong::Error] if it finds a problem
64
+ # @return [true]
65
+ def validate
66
+ raise Error.new("Must provide a processor or dataflow to run, via either the --run option or as the first argument") if processor.nil? || processor.empty?
67
+ raise Error.new("No such processor or dataflow <#{processor}>") unless registered?(processor)
68
+ true
69
+ end
70
+
71
+ # Adds a customized help message built from the Processor
72
+ # # itself.
73
+ def setup
74
+ super()
75
+ dataflow_class_for(processor).configure(settings) if processor?(processor)
76
+ end
77
+
78
+ # Runs either the StdioDriver or the TCPDriver, depending on
79
+ # what settings were passed.
80
+ def run
81
+ EM.run do
82
+ driver.start(processor, settings)
83
+ end
84
+ end
85
+
86
+ # The driver this Runner will use.
87
+ #
88
+ # Defaults to the Wukong::Local::StdioDriver, but will use the
89
+ # TcpDriver if it has a :port setting defined.
90
+ #
91
+ # @return [Wukong::Local::TCPDriver, Wukong::Local::StdioDriver]
92
+ def driver
93
+ (settings[:tcp_port] ? TCPDriver : StdioDriver)
94
+ end
95
+
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,44 @@
1
+ require_relative('event_machine_driver')
2
+ module Wukong
3
+ module Local
4
+
5
+ # A class for driving processors over the STDIN/STDOUT protocol.
6
+ class StdioDriver < EM::P::LineAndTextProtocol
7
+ include EventMachineDriver
8
+ include Processor::StdoutProcessor
9
+ include Logging
10
+
11
+ def self.start(label, settings = {})
12
+ EM.attach($stdin, self, label, settings)
13
+ end
14
+
15
+ def post_init
16
+ self.class.add_signal_traps
17
+ setup_dataflow
18
+ end
19
+
20
+ def receive_line line
21
+ driver.send_through_dataflow(line)
22
+ rescue => e
23
+ error = Wukong::Error.new(e)
24
+ EM.stop
25
+
26
+ # We'd to *raise* `error` here and have it be handled by
27
+ # Wukong::Runner.run but we are fighting with EventMachine.
28
+ # It seems no matter what we do, EventMachine will swallow any
29
+ # Exception raised here (including SystemExit) and exit the
30
+ # Ruby process with a return code of 0.
31
+ #
32
+ # Instead we just log the message that *would* have gotten
33
+ # logged by Wukong::Runner.run and leave it to EventMachine to
34
+ # exit very unnaturally.
35
+ log.error(error.message)
36
+ end
37
+
38
+ def unbind
39
+ finalize_and_stop_dataflow
40
+ EM.stop
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,47 @@
1
+ require_relative('event_machine_driver')
2
+ module Wukong
3
+ module Local
4
+
5
+ # A class for driving processors over a TCP protocol.
6
+ class TCPDriver < EM::P::LineAndTextProtocol
7
+ include EventMachineDriver
8
+ include Processor::BufferedProcessor
9
+ include Logging
10
+
11
+ def self.start(label, settings = {})
12
+ host = (settings[:host] || Socket.gethostname) rescue 'localhost'
13
+ port = (settings[:port] || 9000).to_i rescue 9000
14
+ EM.start_server(host, port, self, label, settings)
15
+ log.info "Server started on #{host} on port #{port}"
16
+ add_signal_traps
17
+ end
18
+
19
+ def post_init
20
+ port, ip = Socket.unpack_sockaddr_in(get_peername)
21
+ log.info "Connected to #{ip} on #{port}"
22
+ setup_dataflow
23
+ end
24
+
25
+ def receive_line line
26
+ @buffer = []
27
+ operation = proc { driver.send_through_dataflow(line) }
28
+ callback = proc { flush_buffer @buffer }
29
+ EM.defer(operation, callback)
30
+ rescue => e
31
+ EM.stop
32
+ raise Wukong::Error.new(e)
33
+ end
34
+
35
+ def flush_buffer records
36
+ send_data(records.join("\n") + "\n")
37
+ records.clear
38
+ end
39
+
40
+ def unbind
41
+ finalize_and_stop_dataflow
42
+ EM.stop
43
+ end
44
+
45
+ end
46
+ end
47
+ end
@@ -1,10 +1,19 @@
1
- module Wukong
1
+ module Wukong
2
+
2
3
  class LogFactory
3
4
 
4
5
  attr_reader :created_log
5
6
 
6
- def self.defaults
7
- Log4r::StderrOutputter.new('console', formatter: Log4r::PatternFormatter.new(pattern: "%l %d [%-20c] -- %m"))
7
+ def self.default_outputter klass
8
+ Log4r::StderrOutputter.new('console', formatter: default_formatter(klass))
9
+ end
10
+
11
+ def self.default_formatter klass
12
+ Log4r::PatternFormatter.new(pattern: default_pattern(klass))
13
+ end
14
+
15
+ def self.default_pattern klass
16
+ "%l %d [%-20c] -- %m"
8
17
  end
9
18
 
10
19
  def self.configure(klass, options = {})
@@ -14,7 +23,7 @@ module Wukong
14
23
 
15
24
  def initialize(logger, config)
16
25
  @created_log = logger.is_a?(Log4r::Logger) ? logger : Log4r::Logger.new(logger.to_s)
17
- outputter(LogFactory.defaults) unless ancestry_has_outputter?(@created_log)
26
+ outputter(LogFactory.default_outputter(logger)) unless ancestry_has_outputter?(@created_log)
18
27
  apply_options(config)
19
28
  end
20
29
 
@@ -33,7 +42,7 @@ module Wukong
33
42
  begin
34
43
  send(option, value)
35
44
  rescue
36
- raise "invalid log option"
45
+ raise Error.new("Error setting option <#{option}> to value <#{value}>")
37
46
  end
38
47
  end
39
48
  end
@@ -51,7 +60,7 @@ module Wukong
51
60
  debug: Log4r::DEBUG,
52
61
  info: Log4r::INFO,
53
62
  warn: Log4r::WARN
54
- }.fetch(lvl){ raise "invalid log level" }
63
+ }.fetch(lvl){ raise Error.new("Invalid log level: <#{lvl}>") }
55
64
  end
56
65
 
57
66
  def pattern ptrn
@@ -75,7 +84,7 @@ module Wukong
75
84
  def self.included klass
76
85
  if klass.ancestors.include?(Gorillib::Model)
77
86
  klass.class_eval do
78
- field(:log, Whatever, :default => ->{ Wukong::LogFactory.configure(self.class) })
87
+ field(:log, Whatever, :default => ->{ Wukong::LogFactory.configure(self.class) }, :doc => "Shared Wukong logger")
79
88
 
80
89
  def receive_log params
81
90
  @log = LogFactory.configure(self.class, params)
@@ -0,0 +1,48 @@
1
+ module Wukong
2
+
3
+ # An array of known plugins.
4
+ PLUGINS = []
5
+
6
+ # Asks each loaded plugin to configure the given +settings+ for the
7
+ # given +program_name+.
8
+ #
9
+ # @param [Configliere::Param] settings the settings to be configured by each plugin
10
+ # @param [String] program_name the name of the currently executing program
11
+ def self.configure_plugins(settings, program_name)
12
+ PLUGINS.each do |plugin|
13
+ plugin.configure(settings, program_name) if plugin.respond_to?(:configure)
14
+ end
15
+ end
16
+
17
+ # Asks each loaded plugin to boot itself from the given +settings+
18
+ # in the given +root+ directory.
19
+ #
20
+ # @param [Configliere::Param] settings the settings for each plugin to boot from
21
+ # @param [String] root the root directory the plugins are booting in
22
+ def self.boot_plugins(settings, root)
23
+ PLUGINS.each do |plugin|
24
+ plugin.boot(settings, root) if plugin.respond_to?(:boot)
25
+ end
26
+ end
27
+
28
+ # Include this module in your own class or module to have it
29
+ # register itself as a Wukong plugin.
30
+ #
31
+ # Your class or module must define the following methods:
32
+ #
33
+ # * `configure` called with a (pre-resolved) Configliere::Param argument and the basename of the running program
34
+ # * `boot` called with a (resolved) Configliere::Param argument and the current working directory of the running program, reacts to any settings as necessary
35
+ #
36
+ # Subclasses of Wukong::Runner will automatically load and boot each
37
+ # plugin.
38
+ module Plugin
39
+ # :nodoc:
40
+ def self.included mod
41
+ PLUGINS << mod unless PLUGINS.include?(mod)
42
+ end
43
+ end
44
+
45
+ end
46
+
47
+
48
+
@@ -15,15 +15,12 @@ module Wukong
15
15
  include Logging
16
16
  include Vayacondios::Notifications
17
17
 
18
- field :action, Whatever
18
+ field :action, Whatever, :doc => false
19
19
 
20
20
  class << self
21
21
 
22
- def describe desc
23
- @description = desc
24
- end
25
-
26
- def description
22
+ def description desc=nil
23
+ @description = desc if desc
27
24
  @description
28
25
  end
29
26
 
@@ -47,6 +44,29 @@ module Wukong
47
44
  instance_variable_set("@serialization_#{direction}", label) if %w[ tsv json xml ].include?(label.to_s)
48
45
  end
49
46
 
47
+ def configure(settings)
48
+ settings.description = description if description
49
+ fields.each_pair do |name, field|
50
+ next if field.doc == false || field.doc.to_s == 'false'
51
+ next if [:log, :notifier].include?(name)
52
+ field_props = {}.tap do |props|
53
+ props[:description] = field.doc unless field.doc == "#{name} field"
54
+ field_type = (field.type.respond_to?(:product) ? field.type.product : field.type)
55
+ configliere_type = case field_type
56
+ when String then nil
57
+ when TrueClass, FalseClass then :boolean
58
+ else field_type
59
+ end
60
+
61
+ props[:type] = configliere_type if configliere_type
62
+ props[:default] = field.default if field.default
63
+ end
64
+ existing_value = settings[name]
65
+ settings.define(name, field_props)
66
+ settings[name] = existing_value unless existing_value.nil?
67
+ end
68
+ end
69
+
50
70
  end
51
71
 
52
72
  def expected_record_type(type)
@@ -57,21 +77,38 @@ module Wukong
57
77
  self.class.instance_variable_get("@serialization_#{direction.to_s}")
58
78
  end
59
79
 
60
- # This is a placeholder method intended to be overridden
61
- def perform_action(*args) ; end
80
+ # When instantiated with a block, the block will replace this
81
+ # method.
82
+ #
83
+ # @param [Array<Object>] args
84
+ # @yield record a record that might be yielded by the block
85
+ # @yieldparam [Object] record the yielded record
86
+ def perform_action(*args)
87
+ end
62
88
 
63
- # The action attribute is turned into the perform action method
89
+ # :nodoc:
90
+ #
91
+ # The action attribute is turned into the perform action method.
92
+ #
93
+ # @param [Proc] action
64
94
  def receive_action(action)
65
95
  self.define_singleton_method(:perform_action, &action)
66
96
  end
67
97
 
68
- # This method is called after the processor class has been instantiated
69
- # but before any records are given to it to process
98
+ # This method is called after the processor class has been
99
+ # instantiated but before any records are given to it to process.
100
+ #
101
+ # Override this method in your subclass.
70
102
  def setup
71
103
  end
72
104
 
73
- # This method is called once per record
74
- # Override this in your subclass
105
+ # This method is called once per record.
106
+ #
107
+ # Override this method in your subclass.
108
+ #
109
+ # @param [Object] record
110
+ # @yield record the record you want to yield
111
+ # @yieldparam [Object] record the yielded record
75
112
  def process(record, &emit)
76
113
  yield record
77
114
  end
@@ -83,13 +120,18 @@ module Wukong
83
120
  # This can be used within an aggregating processor (like a reducer
84
121
  # in a map/reduce job) to start processing the final aggregate of
85
122
  # records since the "last record" has already been received.
123
+ #
124
+ # Override this method in your subclass
125
+ #
126
+ # @yield record the record you want to yield
127
+ # @yieldparam [Object] record the yielded record
86
128
  def finalize
87
129
  end
88
130
 
89
131
  # This method is called after all records have been passed. It
90
132
  # signals that processing should stop.
91
-
92
- # This method is called after all records have been processed
133
+ #
134
+ # Override this method in your subclass.
93
135
  def stop
94
136
  end
95
137
 
@@ -0,0 +1,6 @@
1
+ require 'rake'
2
+ require 'wukong'
3
+
4
+ task :environment => [] do
5
+ Wukong::Runner.run
6
+ end